**NOTE**: The code in this notebook takes a long time to run. Therefore, we have saved the results to files after running it once. 
You can load the results from the file directly instead of running the code again by setting the below `LOAD_FROM_FILE` variable to `True`.

In [1]:
LOAD_FROM_FILE = True

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt
from utils.utils import skip_if, generate_dataset
from sklearn.model_selection import train_test_split

plt.style.use('./utils/_plot_style.mplstyle')

## Test PyTorch for regression task

In [3]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using mps device


In [4]:
# Generate dataset
np.random.seed(124)

n = 300
x, y = generate_dataset(num=n)
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.2,random_state=44)
num_nodes = 50
num_hidden_layers = [1,2]

# Define training methods
training_methods = [
    "SGD",
    "GD",
]

# Step methods
step_methods = [
    "RMSprop", 
    "ADAM"
]

In [5]:
x_train_tensor = torch.tensor(x_train, dtype=torch.float32).to(device).float()
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device).float()
x_test_tensor  = torch.tensor(x_test, dtype=torch.float32).to(device).float()
y_test_tensor  = torch.tensor(y_test, dtype=torch.long).to(device).float()

train_dataset = torch.utils.data.TensorDataset(x_train_tensor, y_train_tensor)
train_loader_SGD = DataLoader(train_dataset, batch_size=60, shuffle=True)
train_loader_GD = DataLoader(train_dataset, shuffle=False)

test_dataset = torch.utils.data.TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, shuffle=False)

In [6]:
class NeuralNetwork_regression(nn.Module):
    def __init__(self,num_nodes,num_hidden_layers):
        super().__init__()

        model_layers = [nn.Linear(1,num_nodes),nn.Sigmoid()]
        for i in range(num_hidden_layers-1): 
            model_layers.append(nn.Linear(num_nodes,num_nodes))
            model_layers.append(nn.Sigmoid())
        model_layers.append(nn.Linear(num_nodes,1))

        self.linear_stack = nn.Sequential(*model_layers)

    def forward(self, x):
        out = self.linear_stack(x)
        return out
    
    def testing_MSE(self): 
        with torch.no_grad():  # disable gradient calculation for evaluation 
            input, target = test_dataset.tensors
            output = self.linear_stack(input)
        return torch.mean((target - output)**2)/len(target)
            
    
    def train_model_regression(self,lr=0.01, num_epochs=3000, lambd=0,training_method = "SGD",step_method = "ADAM"):   # train NN model for regression
        criterion = nn.MSELoss(reduction='mean')

        if training_method == "SGD": 
            loader = train_loader_SGD
            n_batches = 5   # define number of batches, so the learning rate can be divided by it (corresponds then to our neural network code)
        else: 
            loader = train_loader_GD
            n_batches = 1

        if step_method == "ADAM":
            optimizer = optim.Adam(self.parameters(), lr=lr/n_batches, weight_decay=lambd) # lambd is regularization parameter for L2 regularization
        if step_method=="RMSprop": 
            optimizer = optim.RMSprop(self.parameters(), alpha = 0.9,lr=lr/n_batches, weight_decay=lambd) # lambd is regularization parameter for L2 regularization

        optimizer = optim.SGD(self.parameters(), lr=lr/n_batches, weight_decay=lambd)  
        for _ in range(num_epochs):
            self.train()  # set model to training mode

            for input, target in loader: 
                optimizer.zero_grad()            # reset gradients to zero
                outputs = self.linear_stack(input)          # forward pass: compute predictions
                loss = criterion(outputs,target)  # compute MSE
                loss.backward()                 # backpropagate to compute gradients
                optimizer.step()                # update weights using SGD step 

        return self.testing_MSE()

In [7]:
filepath_1 = "../data/best_learning_rate_SGD_final.npy"
filepath_2 = "../data/best_learning_rate_GD_final.npy"

best_learning_rates_SGD = np.load(filepath_1)
best_learning_rates_GD = np.load(filepath_2)

In [8]:
%%skip_if LOAD_FROM_FILE

n_learning_rates = 9
num_iterations = 3000
pytorch_mse_data = np.zeros((len(num_hidden_layers), len(training_methods),len(step_methods)))

# Analyze mse vs. learning rates for each combination of model, training method, and step method
for i in range(len(num_hidden_layers)):
    print(num_hidden_layers[i])
    for j, training_method_name in enumerate(training_methods):
        for l, step_method_name in enumerate(step_methods): 
            print(".", end="")
            np.random.seed(124)
            torch.manual_seed(124)

            if training_method_name == "SGD": 
                best_lr = best_learning_rates_SGD
            else: 
                best_lr = best_learning_rates_GD

            model = NeuralNetwork_regression(num_nodes, num_hidden_layers[i]).to(device)
            mse_data = model.train_model_regression(lr = best_lr[i,l],training_method = training_method_name,step_method = step_method_name)
            pytorch_mse_data[i][j][l] = mse_data


In [9]:
filepath = "../data/regression_mse_data_torch.npy"
# np.save(filepath, pytorch_mse_data)
if LOAD_FROM_FILE:
    pytorch_mse_data = np.load(filepath)

In [10]:
print("PyTorch MSE for SGD: ",pytorch_mse_data[:,0,:]) 
print("PyTorch MSE for GD: ",pytorch_mse_data[:,1,:]) 

PyTorch MSE for SGD:  [[2.22179897e-05 6.84068141e-07]
 [1.41547707e-06 1.43313116e-06]]
PyTorch MSE for GD:  [[2.02882325e-06 2.02882325e-06]
 [1.47618073e-06 1.47618073e-06]]
