In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import wandb

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
X_train, Y_train, X_test, Y_test = torch.load('./data/dataset_2_40D.pt')

# 将数据移动到适当的设备
X_train = X_train.to(device)
Y_train = Y_train.to(device)
X_test = X_test.to(device)
Y_test = Y_test.to(device)

X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

(torch.Size([8000, 41]),
 torch.Size([8000]),
 torch.Size([2000, 41]),
 torch.Size([2000]))

In [4]:
# 使用 DataLoader 进行批处理
l = 64
train_dataset = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_dataset, batch_size=l, shuffle=True)

# 打印第一个批次的大小
for x, y in train_loader:
    print(x.shape, y.shape)
    break

torch.Size([64, 41]) torch.Size([64])


In [5]:
D = 40
m = 100

In [6]:
class Model(nn.Module):
    def __init__(self, input, hidden_layer, output):
        super(Model, self).__init__()
        self.relu = nn.ReLU()
        self.hidden_dim = hidden_layer
        self.W = nn.Parameter(torch.rand(input, hidden_layer, device=device), requires_grad=True)
        # HE初始化
        nn.init.kaiming_normal_(self.W, mode='fan_in', nonlinearity='relu')
        self.a = nn.Parameter(torch.rand(hidden_layer, output, device=device), requires_grad=True)
        nn.init.kaiming_normal_(self.a, mode='fan_in', nonlinearity='relu')
        
    def forward(self, x):
        # print(x.shape)
        z1 = self.relu(torch.mm(x, self.W))
        # print(z1.shape)
        z2 = torch.mm(z1, self.a) / self.hidden_dim
        return z2

    def loss(self, y_pred, y_true):
        return (y_pred - y_true.reshape(y_pred.shape)) ** 2

model = Model(D + 1, m, 1).to(device)

# 计算模型W和a的Norm
def get_norm(model):
    return torch.norm(model.W).item(), torch.norm(model.a).item()

get_norm(model)

(9.02367877960205, 14.648740768432617)

In [7]:
epochs = 10000
lr = 0.1
r = 0
J = 10
h = 0.01
epsilon = 1e-8
beta_1 = 0.9
beta_2 = 0.999
m = 0
v = 0

train_losses = []
test_losses = []

In [8]:
config = {
    'learning_rate': lr,
    'batch_size': l,
    'epochs': epochs,
    'hidden_layer': m,
    'input': D + 1,
    'output': 1,
    'optimizer': 'Adam',
    'Approx Method': 'SPM',
    'r': r,
    'J': J,
    'h': h,
    'epsilon': epsilon,
    
}

In [9]:
import datetime

date = datetime.datetime.now().strftime("%m%d%H%M")
wandb.init(project='Numerical Method', name=f"SPM_A_Euler_Example_2_{date}", config=config)

[34m[1mwandb[0m: Currently logged in as: [33mpheonizard[0m ([33mpheonizard-university-of-nottingham[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113537206417985, max=1.0…

In [10]:
for epoch in range(epochs):
    cnt = 0
    for X, Y in train_loader:
        loss = 0
        for j in range(J):
            original_params = [model.W.clone(), model.a.clone()]
            for param in model.parameters():
                param.data += h * torch.randn_like(param)
            loss += model.loss(model(X), Y).sum()
            model.W.data, model.a.data = original_params
        loss /= J 
        loss.backward()
        with torch.no_grad():
            #=========Nonlinear Term==========
            N_a_init = model.a.grad
            N_w_init = model.W.grad
            m_a = beta_1 * m + (1 - beta_1) * N_a_init
            m_w = beta_1 * m + (1 - beta_1) * N_w_init
            v_a = beta_2 * v + (1 - beta_2) * torch.norm(N_a_init) ** 2
            v_w = beta_2 * v + (1 - beta_2) * torch.norm(N_w_init) ** 2
            m_a_hat = m_a / (1 - beta_1 ** (epoch + 1))
            m_w_hat = m_w / (1 - beta_1 ** (epoch + 1))
            v_a_hat = v_a / (1 - beta_2 ** (epoch + 1))
            v_w_hat = v_w / (1 - beta_2 ** (epoch + 1))
            N_a = m_a_hat
            N_w = m_w_hat
            #=========Time Step Update========
            adaptive_lr = lr / (torch.sqrt(v_a_hat) + epsilon)
            #=========SAV Update========== 
            model.a -= adaptive_lr * N_a
            model.W -= adaptive_lr * N_w
            model.a.grad.zero_()
            model.W.grad.zero_()
            
    with torch.no_grad():
        train_loss = model.loss(model(X_train), Y_train).mean()
        test_loss = model.loss(model(X_test), Y_test).mean()
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        norm = get_norm(model)
        wandb.log({'epoch': epoch + 1,
                   'train_loss': train_loss, 
                   'test_loss': test_loss,
                   'norm_W': norm[0],
                   'norm_a': norm[1],
                   'accuracy': 1 - test_loss,
                   'adaptive_lr': adaptive_lr.item()})
        print(f'epoch {epoch + 1}, loss {train_loss:.6f}, test loss {test_loss:.6f}')

epoch 1, loss 0.054842, test loss 0.059639
epoch 2, loss 0.043109, test loss 0.050135
epoch 3, loss 0.031767, test loss 0.038820
epoch 4, loss 0.023497, test loss 0.029703
epoch 5, loss 0.018250, test loss 0.024237
epoch 6, loss 0.015897, test loss 0.021631
epoch 7, loss 0.012471, test loss 0.018260
epoch 8, loss 0.010560, test loss 0.016153
epoch 9, loss 0.009290, test loss 0.014753
epoch 10, loss 0.008212, test loss 0.014030
epoch 11, loss 0.006983, test loss 0.012123
epoch 12, loss 0.006167, test loss 0.011124
epoch 13, loss 0.005788, test loss 0.010879
epoch 14, loss 0.005653, test loss 0.010647
epoch 15, loss 0.005103, test loss 0.009972
epoch 16, loss 0.004611, test loss 0.009690
epoch 17, loss 0.004609, test loss 0.009587
epoch 18, loss 0.004262, test loss 0.009496
epoch 19, loss 0.004429, test loss 0.009353
epoch 20, loss 0.003904, test loss 0.008848
epoch 21, loss 0.003540, test loss 0.008753
epoch 22, loss 0.003494, test loss 0.008632
epoch 23, loss 0.003827, test loss 0.0091

In [1]:
wandb.finish()

NameError: name 'wandb' is not defined