In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from tqdm.notebook import tqdm as tqdm
from sklearn.preprocessing import StandardScaler
from pathlib import Path
from datetime import datetime
import math
import numpy as np
import pandas as pd

In [2]:
print(torch.__version__)

1.12.1+cu116


In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    torch.backends.cudnn.benchmark = True
else:
    device = torch.device('cpu')

print(device)

cuda


In [4]:
data_dir = str(Path(fr"./data/train.csv"))

In [5]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 64
# define the input size of the model
input_size = 160
epoch_num = 300
dtype = torch.float

# Learning Rate
LR = 1e-3
EPS = 1e-7
LR_STEP = 100
LR_GAMMA = 1

In [6]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        data = pd.read_csv(data_dir)
        self.inputs = data[['x1', 'x2']].to_numpy()
        self.outputs = data['y'].to_numpy()
        self.transform = transform

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_data = torch.tensor(self.inputs[idx], dtype= torch.float)
        output_data = torch.tensor(self.outputs[idx], dtype= torch.float)

        if self.transform:
            input_data = self.transform(input_data)

        return input_data, output_data

In [7]:
dataset = CustomDataset(data_dir)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

In [8]:
# class DeepModel(nn.Module):
#     def __init__(self, layer_sizes):
#         super(DeepModel, self).__init__()

#         self.layers = nn.ModuleList()

#         for i in range(len(layer_sizes) - 1):
#             layer = nn.Linear(layer_sizes[i], layer_sizes[i + 1])
#             init.xavier_uniform_(layer.weight)  # 使用Xavier初始化
#             self.layers.append(layer)

#         self.activation = nn.LeakyReLU()

#     def forward(self, x):
#         for layer in self.layers[:-1]:
#             x = self.activation(layer(x))
        
#         x = self.layers[-1](x)  # 最后一层不使用激活函数
#         return x

In [9]:
class DeepModelWithBN(nn.Module):
    def __init__(
            self, 
            layer_sizes):
        super().__init__()

        self.layers = nn.ModuleList()

        for i in range(len(layer_sizes) - 1):
            layer = nn.Linear(layer_sizes[i], layer_sizes[i + 1]).to(dtype)
            # nn.init.xavier_uniform_(layer.weight)  # 使用Xavier初始化
            self.layers.append(layer)
            
            # if i != len(layer_sizes) - 2:  # 不在最后一个线性层后使用批量归一化
            #     bn = nn.BatchNorm1d(layer_sizes[i + 1]).to(dtype)
            #     self.layers.append(bn)

        # self.activation = nn.LeakyReLU()

    def forward(self, x):
        for i, layer in enumerate(self.layers[:-1]):
            x = layer(x)
            # if isinstance(layer, nn.Linear):  # 只在线性层后使用激活函数
            #     x = self.activation(x)
        
        x = self.layers[-1](x)  # 最后一层不使用激活函数
        return x

In [10]:
class EasyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(2, 128)
        self.layer2 = nn.Linear(128, 1)
        self.activation = nn.ReLU()

    def forward(self, x):
        x = nn.functional.relu(self.layer1(x))
        x = self.layer2(x)
        return x

In [11]:
# model = DeepModelWithBN([2, 16, 1])
model = EasyModel()
optimizer = torch.optim.Adam(model.parameters(), lr=LR, eps=EPS)
# optimizer = torch.optim.SGD(model.parameters(), lr=LR)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=LR_STEP, gamma=LR_GAMMA)
criterion = nn.MSELoss()
# criterion = nn.L1Loss()
# criterion = nn.SmoothL1Loss()
# criterion = nn.PoissonNLLLoss()

In [12]:
model = model.to(device)


for epoch in range(epoch_num):

    # keep track of training and validation loss
    train_loss = 0.0
    val_loss = 0.0

    # Training loop
    model.train()
    for inputs, outputs in train_loader:
        optimizer.zero_grad()
        
        inputs = inputs.to(dtype).to(device)
        outputs = outputs.to(dtype).to(device)

        predictions = model(inputs)
        loss = criterion(predictions.squeeze(1), outputs)
        loss.backward()
        optimizer.step()

        # update training loss
        train_loss += loss.item() * inputs.size(0)

    # Validation loop
    model.eval()
    with torch.no_grad():
        for inputs, outputs in val_loader:

            inputs = inputs.to(dtype).to(device)
            outputs = outputs.to(dtype).to(device)

            predictions = model(inputs)
            loss = criterion(predictions.squeeze(1), outputs)
            val_loss += loss.item() * inputs.size(0)
    
    print(f"Epoch: {epoch+1}/{epoch_num}, Train Loss: {train_loss/len(train_loader)}, Validation Loss: {val_loss/len(val_loader)}")

Epoch: 1/300, Train Loss: 20.59938539123535, Validation Loss: 10.060813674926758
Epoch: 2/300, Train Loss: 9.053247436523437, Validation Loss: 8.570980491638183
Epoch: 3/300, Train Loss: 8.095714611053467, Validation Loss: 7.986491680145264
Epoch: 4/300, Train Loss: 7.680414859771728, Validation Loss: 7.684364547729492
Epoch: 5/300, Train Loss: 7.410926628112793, Validation Loss: 7.424878330230713
Epoch: 6/300, Train Loss: 7.209316089630127, Validation Loss: 7.231489810943604
Epoch: 7/300, Train Loss: 7.025253211975097, Validation Loss: 7.040695743560791
Epoch: 8/300, Train Loss: 6.866330814361572, Validation Loss: 6.957788200378418
Epoch: 9/300, Train Loss: 6.744995132446289, Validation Loss: 6.786459712982178
Epoch: 10/300, Train Loss: 6.604659015655518, Validation Loss: 6.684731388092041
Epoch: 11/300, Train Loss: 6.496509456634522, Validation Loss: 6.481972274780273
Epoch: 12/300, Train Loss: 6.325773052215576, Validation Loss: 6.405028972625733
Epoch: 13/300, Train Loss: 6.2328229

In [12]:
# 4.67 [2, 128, 128, 1] 600epochs

In [36]:
a = iter(train_loader)
b,c = next(a)
print(b)
print(c)

tensor([[-1.0000, -0.5760],
        [ 0.7370,  0.8590],
        [ 0.8790, -0.6360],
        [ 0.9800, -0.1920],
        [-0.0909, -0.7780],
        [-0.5150, -0.8990],
        [-0.8180, -0.9800],
        [-0.6360,  0.2930],
        [ 0.3940,  0.9390],
        [-0.5560, -0.6970],
        [ 0.6570, -0.2320],
        [-0.2120,  0.6970],
        [ 0.1520,  0.1110],
        [ 0.2120, -0.6770],
        [ 0.3330,  0.7980],
        [ 0.2730, -0.8180],
        [-0.2530, -0.0909],
        [-0.3940,  0.7580],
        [-0.0707,  0.8180],
        [ 0.3130,  0.0909],
        [ 0.4750, -0.6770],
        [-0.0707,  0.5350],
        [-0.0303, -0.5350],
        [-0.5150,  0.5560],
        [-0.4140,  0.5350],
        [ 0.1310,  0.3130],
        [-0.7980, -0.6360],
        [ 0.9600,  0.9600],
        [ 0.6570, -0.7980],
        [ 0.1520,  0.8380],
        [ 0.8990,  0.7370],
        [ 0.1720, -0.1310],
        [-0.6160,  0.5760],
        [-0.1720,  0.6160],
        [ 0.9800,  0.8180],
        [ 0.1720, -0