# Backpropagation Neural Networks - Learning and Implementation strategies

In this notebook, we will study the use of `PyTorch` and `Tensorflow` frameworks for implementing and training Neural Networks. This is not intended to be exhaustive, but rather to provide examples for exploring the algorithms and their hyperparameters with these frameworks.


### Imports

In [1]:
import numpy as np

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import SGD, RMSprop, Adam

In [23]:
# import matplotlib.pyplot as plt
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
# from mlxtend.plotting import plot_decision_regions
from tqdm import tqdm

In [24]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

### Global settings

In [25]:
seed = 1
device = 'cpu'

# read from here

In [26]:
import time
class Net_group_Y(nn.Module):
    
    def __init__(self, input_size=2, output_size=1,
                 hidden_layer_sizes=[3,4,5], activation=nn.ReLU()):
        
        super(Net_group_Y, self).__init__()
        #
        # 1. 1. Network architecture
        
        self.add_module(f'fc{1}', nn.Linear(input_size, hidden_layer_sizes[0]))
        
        for i in range(1,len(hidden_layer_sizes)):
            self.add_module(f'fc{i+1}', nn.Linear(hidden_layer_sizes[i-1], hidden_layer_sizes[i]))
        
        self.add_module(f'fc{len(hidden_layer_sizes) +1 }', nn.Linear(hidden_layer_sizes[-1], output_size))

        # Weights initialisation
        # The apply method applies the function passed as the apply() argument
        # to each element in the object, that in this case is the neural network.
        self.apply(self._init_weights)
        # Store the parameters
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_layer_sizes = hidden_layer_sizes
        self.activation = activation
        
        self.n_forward_calls = 0
        
    #
    # 1. 2. Weights and bias initialization
    #
    def _init_weights(self, attribute):
        if isinstance(attribute, nn.Linear):
          torch.nn.init.xavier_uniform_(attribute.weight)
          torch.nn.init.zeros_(attribute.bias)
    #
    # 1. 3. Forward pass
    """    
    def forward(self, x):
        # For each layer, the output will be the ReLu activation applied to the output of the linear operation
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        # For the last layer, the sigmoid function will be the activation
        x = torch.sigmoid(self.fc3(x))
        return x"""
    
    def forward(self, x):
        # Forward pass through all layers
        for i in range(1, len(self.hidden_layer_sizes) + 2):
            #print(f'forward pass layer {i}')
            layer = getattr(self, f'fc{i}')
            x = layer(x)
            if i < len(self.hidden_layer_sizes):
                self.n_forward_calls += 1
                x = self.activation(x)
        # Apply sigmoid activation to the output layer
        self.n_forward_calls += 1
        x = torch.sigmoid(x)
        return x
    
    #
    # 1. 4. Training loop
    # For details, see Machine Learning with PyTorch and Scikit-Learn.
    #
    def train(self, num_epochs, loss_fn, optimizer, train_dl, train_size, batch_size, x_valid, y_valid):
        # Initialize weights
        self.apply(self._init_weights)
    
        # Loss and accuracy history objects initialization
        loss_hist_train = [0] * num_epochs
        accuracy_hist_train = [0] * num_epochs
        loss_hist_valid = [0] * num_epochs
        accuracy_hist_valid = [0] * num_epochs
        delta_times = [0] * num_epochs
        self.n_forward_calls = 0
        
        # Learning loop
        for epoch in tqdm(range(num_epochs)):
            start_time = time.time()
            # Batch learn
            for x_batch, y_batch in train_dl:
                #print('*'*20)
                # ---
                # 1.4.1. Get the predictions, the [:,0] reshapes from (batch_size,1) to (batch_size)
                pred = self(x_batch)[:,0]
                # 1.4.2. Compute the loss
                loss = loss_fn(pred, y_batch)
                # 1.4.3. Back propagate the gradients
                # The `backward()` method, already available in PyTroch, calculates the 
                # derivative of the Error in respect to the NN weights
                # applying the chain rule for hidden neurons.
                loss.backward()
                # 1.4.4. Update the weights based on the computed gradients
                optimizer.step()
                # ---
                
                # Reset to zero the gradients so they will not accumulate over the mini-batches
                optimizer.zero_grad()
                
                # Update performance metrics
                loss_hist_train[epoch] += loss.item()
                is_correct = ((pred>=0.5).float() == y_batch).float()
                accuracy_hist_train[epoch] += is_correct.mean()
            
            delta_times[epoch] = time.time() - start_time
            # Average the results
            loss_hist_train[epoch] /= train_size/batch_size
            accuracy_hist_train[epoch] /= train_size/batch_size
            
            # Predict the validation set
            pred = self(x_valid)[:, 0]
            loss_hist_valid[epoch] = loss_fn(pred, y_valid).item()
            is_correct = ((pred>=0.5).float() == y_valid).float()
            accuracy_hist_valid[epoch] += is_correct.mean()
            
        return loss_hist_train, loss_hist_valid, accuracy_hist_train, accuracy_hist_valid, self.n_forward_calls, delta_times

    # Not needed normaly, it is just for mlextend plots
    def predict(self, x):
        print(f'predict with input shape: {x.shape}')
        x = torch.tensor(x, dtype=torch.float32)
        pred = self.forward(x)[:, 0]
        print(f'finished predict with output shape: {pred.shape}')
        return (pred>=0.5).float()
        

In [27]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

criterion = RMSELoss()

class nn_model():
    def __init__(self,
                X_train,
                X_val,
                y_train,
                y_val,
                input_size, 
                output_size,
                hidden_layer_sizes, 
                optimizer,
                num_epochs,
                train_dl,  
                train_size,
                batch_size,
                learning_rate,
                activation=nn.ReLU(),
                loss_fn = RMSELoss()
                ):
        
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.train_dl = train_dl
        self.train_size = train_size
        self.batch_size = batch_size
        
        self.arcthicture = Net_group_Y(input_size=input_size,
                                  output_size=output_size,
                                  hidden_layer_sizes=hidden_layer_sizes,
                                  activation=activation)
        
        self.arcthicture.to(device)
        self.optimizer = optimizer
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate
        
        self.fitness, self.test_fitness, self.accuracy, self.accuracy_valid, self.n_forward_calls, self.delta_times = self.fit(
                X_train,
                X_val,
                y_train,
                y_val,
                num_epochs=num_epochs, 
                loss_fn=criterion, 
                optimizer_name=optimizer,
                batch_size=batch_size, 
                learning_rate=learning_rate)
        
        
       

    def fit(self,X_train,X_val,y_train,y_val,
                num_epochs, loss_fn, optimizer_name, batch_size, learning_rate):
        """
        Train the model with the given parameters.
        
        Parameters:
        - num_epochs: Number of epochs to train the model.
        - loss_fn: Loss function to use for training.
        - optimizer: Optimizer to use for training.
        - batch_size: Size of each batch during training.
        - x_valid: Validation input data.
        - y_valid: Validation target data.
        - learning_rate: Learning rate for the optimizer.
        
        Returns:
        - history: Training history containing loss and accuracy metrics.
        """

        # Define datasets for data loaders
        train_ds = TensorDataset(X_train, y_train)
        val_ds = TensorDataset(X_val, y_val)

        train_size = len(train_ds)
        if optimizer_name == 'GD':
            batch_size = train_size
            train_dl = DataLoader(train_ds, batch_size, shuffle=True)
            #val_dl = DataLoader(val_ds, batch_size, shuffle=True)
        
        elif optimizer_name == 'SGD':
            batch_size = 1
            train_dl = DataLoader(train_ds, batch_size, shuffle=True)
            #val_dl = DataLoader(val_ds, batch_size, shuffle=True)

        else:
            batch_size = batch_size
            train_dl = DataLoader(train_ds, batch_size, shuffle=True)
            #val_dl = DataLoader(val_ds, batch_size, shuffle=True)
        
        optimizer_choiche = {
        'GD': torch.optim.SGD(self.arcthicture.parameters(), lr=learning_rate),
        'SGD': torch.optim.SGD(self.arcthicture.parameters(), lr=learning_rate),
        'MiniSGD': torch.optim.SGD(self.arcthicture.parameters(), lr=learning_rate),
        'ASGD': torch.optim.ASGD(self.arcthicture.parameters(), lr=learning_rate),
        
        'RMSprop': torch.optim.RMSprop(self.arcthicture.parameters(), lr=learning_rate),
        'Adam': torch.optim.Adam(self.arcthicture.parameters(), lr=learning_rate)
        }
        optimizer_instance = optimizer_choiche[optimizer_name]
        
        return self.arcthicture.train(
            num_epochs=num_epochs, 
            loss_fn=loss_fn, 
            optimizer=optimizer_instance, 
            train_dl=train_dl, 
            train_size=train_size, 
            batch_size=batch_size,
            x_valid=val_ds[0],
            y_valid=val_ds[1])
        



### Experimenting the function

In [28]:
# Data simulation
N_data = 200
train_size = 150
X = 2 * torch.rand(N_data, 2, device=device, dtype=torch.float32) - 1
y = torch.tensor([0 if elem[0]*elem[1] < 0 else 1 for elem in X], device=device, dtype=torch.float32)

# Split training and test partitions
X_train = X[:train_size, :]
y_train = y[:train_size]
X_val = X[train_size:, :]
y_val = y[train_size:]

# Define datasets for data loaders
train_ds = TensorDataset(X_train, y_train)
val_ds = TensorDataset(X_val, y_val)

# Create the data loaders
batch_size_GD = train_size
train_dl_GD = DataLoader(train_ds, batch_size_GD, shuffle=True)
val_dl_GD = DataLoader(val_ds, batch_size_GD, shuffle=True)

batch_size_SGD = 1
train_dl_SGD = DataLoader(train_ds, batch_size_SGD, shuffle=True)
val_dl_SGD = DataLoader(val_ds, batch_size_SGD, shuffle=True)

batch_size_MiniSGD = 32
train_dl_MniSGD = DataLoader(train_ds, batch_size_MiniSGD, shuffle=True)
val_dl_MiniSGD = DataLoader(val_ds, batch_size_MiniSGD, shuffle=True)

print('Partition sizes')
print([X_train.shape, y_train.shape], [X_train.dtype, y_train.dtype], [X_train.device, y_train.device])
print([X_val.shape, y_val.shape], [X_val.dtype, y_val.dtype], [X_val.device, y_val.device])

print('\nBatch sizes')
print(batch_size_GD)
print(batch_size_SGD)
print(batch_size_MiniSGD)

Partition sizes
[torch.Size([150, 2]), torch.Size([150])] [torch.float32, torch.float32] [device(type='cpu'), device(type='cpu')]
[torch.Size([50, 2]), torch.Size([50])] [torch.float32, torch.float32] [device(type='cpu'), device(type='cpu')]

Batch sizes
150
1
32


In [30]:
nn_model(X_train = X_train,
         X_val = X_val,
         y_train = y_train,
        y_val = y_val,
        input_size = 3, 
        output_size = 1,
        hidden_layer_sizes = [2,3,4], 
        optimizer = 'GD',
        num_epochs = 10,
        train_dl = train_dl_GD,  
        train_size = train_size,
        batch_size = batch_size_GD,
        learning_rate = 0.01
        )

  0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (150x2 and 3x2)

# Calculating the time of the other models

In [None]:
import os
import pandas as pd
os.chdir(os.path.join(os.getcwd(), os.pardir))
%cd notebooks/

c:\Users\irism\OneDrive - NOVAIMS\Msc-NEL\Neural_Evo_Learn\notebooks


In [None]:
model_times = {}
for model in ['SLIM-GSGP', 'GSGP', 'GP']:
    total_time = 0
    for i_outer in range(10):
        df = pd.read_csv(f"./log/{model}/{model}_sustavianfeed_outer_{i_outer}.csv")
        total_time += df.iloc[:, 6].sum()
    model_times[model] = total_time

model_times_df = pd.DataFrame.from_dict(model_times, orient='index', columns=['Total Time (s)'])
    

In [None]:
model_times_df

Unnamed: 0,Total Time (s)
SLIM-GSGP,24.840032
GSGP,44.637842
GP,144.93446
