In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from tqdm import tqdm
import wandb

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [2]:
def G_modified(X, model, device):
    # 开始计时
    # start = time.time()
    
    input_dim, m = model.W.shape  # m: 隐藏层神经元数量, input_dim: 输入维度
    batch_size = X.shape[0]       # batch_size: 批处理大小
    
    # 初始化 Jacobian 矩阵 J，大小为 (batch_size, m * (input_dim + 1))
    J = torch.zeros(batch_size, m * (input_dim + 1), device=device)
    
    # 计算所有样本的 <w_i, x> 和 ReLU 激活
    relu_input = X @ model.W  # (batch_size, m)
    relu_output = torch.relu(relu_input)  # (batch_size, m)
    # 计算模型输出
    # 对 w_i 的部分并行计算 Jacobian
    for j in range(m):
        mask = relu_output[:, j] > 0  # 只选择 ReLU 激活大于0的元素
        J[:, j*input_dim:(j+1)*input_dim] = (model.a[j] * X * mask.view(-1, 1)) / m
    
    # 对 a_i 的部分并行计算 Jacobian
    J[:, m*input_dim:] = relu_output / m

    # 结束计时
    # end = time.time()
    # print("优化后Time: ", end - start)
    
    return J


In [3]:
X_train, Y_train, X_test, Y_test = torch.load('../data/dataset_2_40D.pt', weights_only=True)

# 将数据移动到适当的设备
X_train = X_train.to(device)
Y_train = Y_train.to(device)
X_test = X_test.to(device)
Y_test = Y_test.to(device)

l = 64
train_dataset = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_dataset, batch_size=l, shuffle=True)

  return self.fget.__get__(instance, owner)()


In [4]:
D = 40
m = 100

class PM_Euler(nn.Module):
    def __init__(self, input, hidden_layer, output):
        super(PM_Euler, self).__init__()
        self.relu = nn.ReLU()
        self.hidden_dim = hidden_layer
        self.W = nn.Parameter(torch.rand(input, hidden_layer, device=device), requires_grad=True)
        # HE初始化
        nn.init.kaiming_normal_(self.W, mode='fan_in', nonlinearity='relu')
        self.a = nn.Parameter(torch.rand(hidden_layer, output, device=device), requires_grad=True)
        nn.init.kaiming_normal_(self.a, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        z1 = self.relu(torch.mm(x, self.W))
        z2 = torch.mm(z1, self.a) / self.hidden_dim
        return z2

    def loss(self, y_pred, y_true):
        return (y_pred - y_true.reshape(y_pred.shape)) ** 2

        # 计算模型W和a的Norm
    def get_norm(self):
        return [torch.norm(self.W).item(), torch.norm(self.a).item()]

In [5]:
epochs = 10000
lr = 1

train_losses = []
test_losses = []

In [6]:
config = {
    'learning_rate': lr,
    'batch_size': l,
    'epochs': epochs,
    'hidden_layer': m,
    'input': D + 1,
    'output': 1,
    'optimizer': 'IEQ'
}

import datetime

date = datetime.datetime.now().strftime("%m%d%H%M")
wandb.init(project='Numerical Method', name=f"PM_IEQ_Example_2_{date}", config=config, notes="IEQ modified lr with lr / batch_size")

AttributeError: module 'wandb' has no attribute 'init'

In [7]:
model = PM_Euler(D + 1, m, 1).to(device)

for epoch in tqdm(range(epochs)):
    flag = True
    for X, Y in train_loader:
        if flag:
            U = (model.forward(X) - Y.reshape(-1, 1))
            flag = False
        J = G_modified(X, model, device)
        with torch.no_grad():
            theta_0 = torch.cat([model.W.flatten(), model.a.flatten()]).reshape(-1, 1)
            J_T = J.T
            # 计算量A，A=I + 2lr(J^T)J
            A = torch.eye(theta_0.numel(), device=device) + 2 * (lr / l) * torch.mm(J_T, J)
            L = torch.linalg.cholesky(A)
            A_inv = torch.cholesky_inverse(L)
            theta_1 = theta_0 - 2 * (lr / l) * torch.mm(torch.mm(A_inv, J_T), U)
            model.W.data = theta_1[:(D + 1) * m].reshape(D + 1, m)
            model.a.data = theta_1[(D + 1) * m:].reshape(m, 1)
            U = (torch.eye(U.numel(), device=device) - 2 * (lr / l) * torch.mm(J, torch.mm(A_inv, J_T))) @ U 
            # wandb.log({'U_norm': torch.norm(U).item()})
    with torch.no_grad():
        train_loss = model.loss(model(X_train), Y_train).mean()
        test_loss = model.loss(model(X_test), Y_test).mean()
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        norm = model.get_norm()
        wandb.log({'epoch': epoch + 1,
                   'train_loss': train_loss, 
                   'test_loss': test_loss,
                   'norm_W': norm[0],
                   'norm_a': norm[1],
                   'accuracy': 1 - test_loss})
        # print(f'epoch {epoch + 1}, loss {train_loss:.8f}, test loss {test_loss:.8f}')

  7%|▋         | 680/10000 [20:24<4:39:39,  1.80s/it]


KeyboardInterrupt: 

In [8]:
wandb.finish()

0,1
accuracy,▁▁▂▁▁▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
norm_W,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█████
norm_a,▁▁▁▁▂▂▂▃▄▄▅▅▆█▇▇▆▆▅▆▅▇▆██▇▇▆▇▅▅▆▆▆▆▆▆▆▅▆
test_loss,██▇██▇▇▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
train_loss,██▇▇█▇▇▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁

0,1
accuracy,0.56327
epoch,680.0
norm_W,75.7319
norm_a,17.54812
test_loss,0.43673
train_loss,0.41878
