In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import wandb

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
X_train, Y_train, X_test, Y_test = torch.load('./data/dataset_2_40D.pt')

# 将数据移动到适当的设备
X_train = X_train.to(device)
Y_train = Y_train.to(device)
X_test = X_test.to(device)
Y_test = Y_test.to(device)

X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

(torch.Size([8000, 41]),
 torch.Size([8000]),
 torch.Size([2000, 41]),
 torch.Size([2000]))

In [4]:
# 使用 DataLoader 进行批处理
l = 64
train_dataset = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_dataset, batch_size=l, shuffle=True)

# 打印第一个批次的大小
for x, y in train_loader:
    print(x.shape, y.shape)
    break

torch.Size([64, 41]) torch.Size([64])


In [5]:
D = 40
m = 100

In [6]:
class PM_Euler(nn.Module):
    def __init__(self, input, hidden_layer, output):
        super(PM_Euler, self).__init__()
        self.relu = nn.ReLU()
        self.hidden_dim = hidden_layer
        self.W = nn.Parameter(torch.rand(input, hidden_layer, device=device), requires_grad=True)
        # HE初始化
        nn.init.kaiming_normal_(self.W, mode='fan_in', nonlinearity='relu')
        self.a = nn.Parameter(torch.rand(hidden_layer, output, device=device), requires_grad=True)
        nn.init.kaiming_normal_(self.a, mode='fan_in', nonlinearity='relu')
        
    def forward(self, x):
        # print(x.shape)
        z1 = self.relu(torch.mm(x, self.W))
        # print(z1.shape)
        z2 = torch.mm(z1, self.a) / self.hidden_dim
        return z2

    def loss(self, y_pred, y_true):
        return (y_pred - y_true.reshape(y_pred.shape)) ** 2
model = PM_Euler(D + 1, m, 1).to(device)


# 计算模型W和a的Norm
def get_norm(model):
    return torch.norm(model.W).item(), torch.norm(model.a).item()

get_norm(model)

(9.233951568603516, 15.293822288513184)

In [7]:
epochs = 10000
lr = 1
C = 1
_lambda = 4
# Define the relax parameters 
r_wave = 0
r_hat = 0
r = 0
a = 0
b = 0
c = 0
ellipsis_0 = 0
ratio_n = 0.99

train_losses = []
test_losses = []

In [8]:
config = {
    'learning_rate': lr,
    'batch_size': l,
    'epochs': epochs,
    'hidden_layer': m,
    'input': D + 1,
    'output': 1,
    'optimizer': 'RelaxedSAV',
    'C': C,
    '_lambda': _lambda,
    'ratio_n': 0.99
}

In [9]:
import datetime

date = datetime.datetime.now().strftime("%m%d%H%M")
wandb.init(project='Numerical Method', name=f"PM_RelaxedSAV_sum_Example_2_test_{date}", config=config)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mpheonizard[0m ([33mpheonizard-university-of-nottingham[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [1]:
for epoch in range(epochs):
    cnt = 0
    for X, Y in train_loader:
        y_pred = model(X)
        loss = model.loss(y_pred, Y).sum()
        if cnt == 0:
            r = torch.tensor(torch.sqrt(loss + C))
            cnt = 1
        loss.backward()
        with torch.no_grad():
            #===============Update the parameters in SAV================
            N_a = model.a.grad.clone()
            N_w = model.W.grad.clone()
            theta_a_2 = lr * N_a / (torch.sqrt(loss + C) * (1 + lr * _lambda))
            theta_w_2 = lr * N_w / (torch.sqrt(loss + C) * (1 + lr * _lambda))
            r_wave = r / (1 + lr * (torch.sum(N_a * (N_a / (1 + lr * _lambda))) + torch.sum(N_w * (N_w) / (1 + lr * _lambda))) / (2 * (loss + C)))
            model.a -= r_wave.item() * theta_a_2
            model.W -= r_wave.item() * theta_w_2
            model.a.grad.zero_()
            model.W.grad.zero_()
            # ===============Update r in SAV================
            tmp_loss = model.loss(model(X), Y).sum()
            r_hat = torch.sqrt(tmp_loss + C)
            a = (r_wave - r_hat) ** 2
            b = 2 * r_hat * (r_wave - r_hat)
            c = r_hat ** 2 - r_wave ** 2 -  ratio_n * (torch.norm(model.a - N_a) ** 2 + torch.norm(model.W - N_w) ** 2) / lr
            ellipsis_0 = max((-b - torch.sqrt(b ** 2 - 4 * a * c)) / (2 * a), 0)
            # if r.item() == r_hat.item():
            #     raise ValueError('r == r_hat')
            if (b ** 2 - 4 * a * c) < 0:
                raise ValueError('b^2 - 4ac < 0')
            # print(r, r_wave, r_hat, ellipsis_0, a, b, c, (-b - torch.sqrt(b ** 2 - 4 * a * c)) / (2 * a), r - r_hat)
            r = torch.tensor(ellipsis_0 * r_wave + (1 - ellipsis_0) * r_hat)
            # 检测是否有nan
            if torch.isnan(r_wave) or torch.isnan(r_hat) or torch.isnan(a) or torch.isnan(b) or torch.isnan(c):
                print(r, r_wave, r_hat, ellipsis_0, a, b, c)
                raise ValueError('nan')
    with torch.no_grad():
        train_loss = model.loss(model(X_train), Y_train).mean()
        test_loss = model.loss(model(X_test), Y_test).mean()
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        norm = get_norm(model)
        wandb.log({'epoch': epoch + 1,
                   'train_loss': train_loss, 
                   'test_loss': test_loss,
                   'norm_W': norm[0],
                   'norm_a': norm[1],
                   'accuracy': 1 - test_loss,
                   'r': r.item(),
                   'r_wave': r_wave.item(),
                   'r_hat': r_hat.item(),
                   'ellipsis': ellipsis_0})
        print(f'epoch {epoch + 1}, loss {train_loss:.6f}, test loss {test_loss:.6f}')

NameError: name 'epochs' is not defined

In [None]:
wandb.finish()