In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from functions_to_optimize import f_rastrigin
from CMA_obj import CMA_opt
from PEPG_obj import PEPG_opt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from SPSA_obj import SPSA_opt
from ADAM_opt import AdamOptimizer
from PSO_obj import PSO_opt

In [2]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pandas as pd

#   Online training of Neural Networks

## 1) Defining a simple neural network
- Define the feed forward NN class
- Load the datasets 

In [111]:
#this is a simple single layer feed forward NN with ReLU activation and adjustable hidden layer size
class Neural_Net(nn.Module):
    def __init__(self, input_size, hidden_size, n_classes):
        "init method that defines the NN architecture and inherits from nn.Module"
        super(Neural_Net, self).__init__()
        
        self.NN_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, n_classes),
            #nn.Softmax(dim=1)
        )
        
        #self.num_params = sum(p.numel for p in self.parameters())
    
    def forward(self, X):
        "forward pass"
        logits = self.NN_stack(X)
        return logits
    
    
    def reset_weights(self):
        "method to reset the weights of the NN"
        for layer in self.NN_stack:
            if isinstance(layer,nn.Linear):
                layer.reset_parameters()
        for param in self.parameters():
            param.requires_grad = True
                
    def get_params(self):
        "Method to get parameters from the neural network"
        params_list = []
        
        for param in self.parameters():
            params_list.append(param.view(-1))
        
        full_params = torch.cat(params_list)
        return full_params
    
    def set_params(self,params_to_send):
        "Method to set parameters params in the neural network for online training. params_to_send is a column vector "
        idx_prev = 0
        for param in self.parameters():
           n_params = param.data.numel()
           new_param =  torch.reshape(torch.from_numpy(params_to_send[idx_prev: idx_prev + n_params ]),shape=param.data.shape)
           param.data.copy_(new_param)
           idx_prev += n_params
    
    def forward_pass_params(self,params_to_send,X):
        "This method is a forward pass that also takes in the parameters of the neural network as a variable, to use in online learning"
        self.set_params(params_to_send)
        logits = self.NN_stack(X)
        return logits
        

#class for a linear classifier

class Lin_classifier(nn.Module):
    def __init__(self, input_size, n_classes):
        super(Lin_classifier,self).__init__()
        
        self.NN_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, n_classes),
        )
        
    def forward(self,X):
        logits = self.NN_stack(X)
        return logits
    


# custom dataset class for datasets that are not included in torchvision like wine or IRIS

class Custom_dataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]



In [37]:
#define training loop od pytorch NN as a function

def train_pytorch_NN(model, n_epochs, train_loader, test_loader, loss, optimizer):
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    print(f"Using {device} device")
    print(model)
    
    #array to store the accuracy of the model
    accuracy_list = []
    for epoch in range(n_epochs):
        model.train()
        for i, (images,labels) in enumerate(train_loader):
            #move data to gpu for faster processing
            images = images.to(device)
            labels = labels.to(device)
            #forward pass
            Y_pred = model.forward(images)
            loss_value = loss(Y_pred,labels)
            #backward pass
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            
            #print accuracy every 100 steps for the test set
            if (i+1) % 100 == 0:
                model.eval()
                correct = 0 
                total = 0
                for images, labels in test_loader:
                    images = images.to(device)
                    labels = labels.to(device)
                    Y_pred = model.forward(images)
                    _, predicted = torch.max(Y_pred.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                accuracy = ( 100*correct/total)
                accuracy_list.append(accuracy)
                print(f'Epoch [{epoch+1}/{n_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss_value.item()}, Test Accuracy: {accuracy}%')
    return accuracy_list


def train_online_pop_NN(model, n_epochs, train_loader, test_loader, loss, optimizer):
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    print(f"Using {device} device")
    print(model)
    
    best_reward = np.ones([n_epochs,1])
    rewards = []
    for epoch in range(n_epochs):
        model.eval()
        for i, (features,labels) in enumerate(train_loader):
            
            coordinates = optimizer.ask()
            
            for k in range(coordinates.shape[-1]):
                
                Y_pred = model.forward_pass_params(coordinates[:,k],features)
                loss_value = loss(Y_pred,labels)
                rewards.append(loss_value)

            rewards = np.ndarray(rewards)[:,np.newaxis]
            optimizer.tell(rewards)
            best_params = coordinates[:,np.argmin(rewards)]
            
                #print accuracy every 100 steps for the test set
            if (i+1) % 100 == 0:
                model.eval()
                correct = 0 
                total = 0
                for features, labels in test_loader:
                    features = features.to(device)
                    labels = labels.to(device)
                    Y_pred = model.forward_pass_params(best_params,features)
                    _, predicted = torch.max(Y_pred.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                accuracy = ( 100*correct/total)
                accuracy_list.append(accuracy)
                print(f'Epoch [{epoch+1}/{n_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss_value.item()}, Test Accuracy: {accuracy}%')
    return accuracy_list
    
def quantize_model(model,quant_levels):
    "function to quantize weights of model on a layer by layer basis, quant_levels is the number of quantization steps"
    with torch.no_grad():
            
        for param in model.parameters():
            min_param = param.min()
            max_param = param.min()
            step = (max_param - min_param ) / (quant_levels)
            
            n_steps = ((param - min_param) / step).round()
            
            quantized_value = min_param + step * n_steps
            
            param.copy_(quantized_value)
    return


### Loading datasets
X is the input, Y the output

In [5]:
# MNIST dataset
MNIST_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
MNIST_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_train, batch_size=100, shuffle=True)
test_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_test, batch_size=100, shuffle=False)

X_train_MNIST, Y_train_MNIST = next(iter(train_loader_MNIST))
X_test_MNIST, Y_test_MNIST = next(iter(test_loader_MNIST))

In [6]:
# Wine dataset
wine_df = pd.read_csv("data\\WINE\\winequality-red.csv")

wine_raw = wine_df.values.astype(np.float32)

# Convert to PyTorch tensors
X = torch.from_numpy(wine_raw[:, :-1])
Y = torch.from_numpy(wine_raw[:, -1]).unsqueeze(1)

# Create a single dataset
full_dataset = Custom_dataset(X, Y)

# Split into train and test sets, first set the size of the split
train_size = int(0.75 * len(full_dataset))
test_size = len(full_dataset) - train_size
# split into train and test sets using pytorch randomsplit

Wine_train, Wine_test = torch.utils.data.random_split(full_dataset, [train_size, test_size])


Wine_train_loader = torch.utils.data.DataLoader(dataset=Wine_train, batch_size=100, shuffle=True)
Wine_test_loader = torch.utils.data.DataLoader(dataset=Wine_test, batch_size=100, shuffle=False)

In [23]:
#Iris dataset
iris_df = pd.read_csv("data\\IRIS\\iris.csv")
# convert the last column 


iris_raw = iris_df.values

for i in range(len(iris_raw)):
    if iris_raw[i,-1] == 'Iris-setosa':
        iris_raw[i,-1] = 0
    elif iris_raw[i,-1] == 'Iris-versicolor':
        iris_raw[i,-1] = 1
    else:
        iris_raw[i,-1] = 2
        
iris_raw = iris_raw.astype(np.float32)
#iris raw needs to be shuffled randomly because the data is ordered by class
np.random.shuffle(iris_raw)

# Convert to PyTorch tensors
X = torch.from_numpy(iris_raw[:, :-1])
Y = torch.from_numpy(iris_raw[:, -1]).unsqueeze(1)

# Create a single dataset
full_dataset = Custom_dataset(X, Y)

# Split into train and test sets, first set the size of the split
train_size = int(0.75 * len(full_dataset))
test_size = len(full_dataset) - train_size
# split into train and test sets using pytorch randomsplit
Iris_train, Iris_test = torch.utils.data.random_split(full_dataset, [train_size, test_size])

Iris_train_loader = torch.utils.data.DataLoader(dataset=Iris_train, batch_size=train_size, shuffle=True)
Iris_test_loader = torch.utils.data.DataLoader(dataset=Iris_test, batch_size=test_size, shuffle=False)

In [33]:
#We Now create an instance of the NN class and move if to the GPU if available
n_neurons = 100
NN_MNIST = Neural_Net(input_size=28*28, hidden_size=n_neurons, n_classes=10)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.001)

#training the full NN
n_epochs = 10
test_acc = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Epoch [1/10], Step [100/600], Loss: 0.4451817274093628, Test Accuracy: 88.18%
Epoch [1/10], Step [200/600], Loss: 0.37893033027648926, Test Accuracy: 90.72%
Epoch [1/10], Step [300/600], Loss: 0.38505715131759644, Test Accuracy: 91.69%
Epoch [1/10], Step [400/600], Loss: 0.29797354340553284, Test Accuracy: 92.65%
Epoch [1/10], Step [500/600], Loss: 0.20445844531059265, Test Accuracy: 93.07%
Epoch [1/10], Step [600/600], Loss: 0.1554512083530426, Test Accuracy: 93.39%
Epoch [2/10], Step [100/600], Loss: 0.14834657311439514, Test Accuracy: 93.89%
Epoch [2/10], Step [200/600], Loss: 0.24757377803325653, Test Accuracy: 94.49%
Epoch [2/10], Step [300/600], Loss: 0.19392041862010956, Test Accuracy: 94.44%
Epoch [2/10], Step [400/600], Loss: 0.1630591601

In [49]:
# Training loop for MNIST but without training the input layer (extreme learning machine)

#first reset the model we trained before
NN_MNIST.reset_weights()

#set the input layer to not require gradients
NN_MNIST.NN_stack[1].weight.requires_grad = False
NN_MNIST.NN_stack[1].bias.requires_grad = False

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.01)

n_epochs = 10
test_acc_ELM = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)



Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Epoch [1/10], Step [100/600], Loss: 1.1636791229248047, Test Accuracy: 76.66%
Epoch [1/10], Step [200/600], Loss: 0.8452395796775818, Test Accuracy: 80.28%
Epoch [1/10], Step [300/600], Loss: 0.8284144401550293, Test Accuracy: 82.33%
Epoch [1/10], Step [400/600], Loss: 0.7853445410728455, Test Accuracy: 83.3%
Epoch [1/10], Step [500/600], Loss: 0.6560500264167786, Test Accuracy: 83.89%
Epoch [1/10], Step [600/600], Loss: 0.8256441354751587, Test Accuracy: 84.47%
Epoch [2/10], Step [100/600], Loss: 0.5336945652961731, Test Accuracy: 84.95%
Epoch [2/10], Step [200/600], Loss: 0.5550755858421326, Test Accuracy: 85.08%
Epoch [2/10], Step [300/600], Loss: 0.5654203295707703, Test Accuracy: 85.45%
Epoch [2/10], Step [400/600], Loss: 0.5484793782234192, 

In [None]:
#Train the linear model for reference
Linear_model = Lin_classifier(input_size=28*28,n_classes=10)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Linear_model.parameters(), lr=0.001)

n_epochs = 10
test_acc_lin = train_pytorch_NN(Linear_model, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

In [86]:
# plot the accuracy of the ELM and FF models using plotly

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc)), y=test_acc, mode='lines', name='Full NN'))
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_ELM)), y=test_acc_ELM, mode='lines', name='ELM'))
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_lin)), y=test_acc_lin, mode='lines', name='Linear'))

#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=400,margin=dict(l=20, r=20, t=20, b=20))
#set xlim
fig.update_xaxes(range=[0, 60])


In [68]:
#We scan the NN size and plot the accuracy of the ELM and FF models to compare with MNIST

n_neurons_vec = [10,20,50,100,200,500,1000,2000]
n_epochs = 10
loss = nn.CrossEntropyLoss()

test_acc_mat = np.zeros([len(n_neurons_vec),60])
test_acc_ELM_mat = np.zeros([len(n_neurons_vec),60])

for i in range(len(n_neurons_vec)):
    print('Training FFNN with ', n_neurons_vec[i], ' neurons...')
    n_neurons = n_neurons_vec[i]
    
    NN_MNIST = Neural_Net(input_size=28*28, hidden_size=n_neurons, n_classes=10)
    optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.001)

    NN_MNIST.NN_stack[1].weight.requires_grad = True
    NN_MNIST.NN_stack[1].bias.requires_grad = True

    #training the full NN
    
    test_acc_mat[i,:] = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)
    
    NN_MNIST.reset_weights()
    NN_MNIST.NN_stack[1].weight.requires_grad = False
    NN_MNIST.NN_stack[1].bias.requires_grad = False
    optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.01)
    
    print('Training ELM with ', n_neurons_vec[i], ' neurons...')
    #set the input layer to not require gradients

    
    test_acc_ELM_mat[i,:] = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)
    
    del NN_MNIST  # Assuming 'model' is your neural network instance
    torch.cuda.empty_cache()  # Advisable after deleting the model

    
    

Training FFNN with  10  neurons...
Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=10, bias=True)
  )
)
Epoch [1/10], Step [100/600], Loss: 1.149375557899475, Test Accuracy: 74.51%
Epoch [1/10], Step [200/600], Loss: 0.58460932970047, Test Accuracy: 83.41%
Epoch [1/10], Step [300/600], Loss: 0.547219455242157, Test Accuracy: 85.53%
Epoch [1/10], Step [400/600], Loss: 0.5631541013717651, Test Accuracy: 86.84%
Epoch [1/10], Step [500/600], Loss: 0.6401227712631226, Test Accuracy: 87.89%
Epoch [1/10], Step [600/600], Loss: 0.42665836215019226, Test Accuracy: 88.57%
Epoch [2/10], Step [100/600], Loss: 0.3109077513217926, Test Accuracy: 89.05%
Epoch [2/10], Step [200/600], Loss: 0.35700684785842896, Test Accuracy: 89.39%
Epoch [2/10], Step [300/600], Loss: 0.3251744210720062, Test Accuracy: 89.57%
Epoch [2/10], Step [400/

In [104]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=n_neurons_vec, y=np.max(test_acc_mat,1), mode='markers+lines', name='Full NN', line = dict(color='royalblue')))
fig.add_trace(go.Scatter(x=n_neurons_vec, y=np.max(test_acc_ELM_mat,1), mode='markers+lines', name='ELM',line = dict(color='firebrick')))
#add linear model as dotted line 
fig.add_trace(go.Scatter(x=n_neurons_vec, y=np.max(test_acc_lin)*np.ones(len(n_neurons_vec)), mode='lines', name='Linear', line=dict(dash='dot')))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
#set xlim
#fig.update_xaxes(range=[0, 1000])
#log scale on y axis
fig.update_yaxes(title_text="Accuracy [%]")
fig.update_xaxes(title_text="Neurons",type = 'log',exponentformat="power")

#fig.update_yaxes(range=[0, 1000])

In [96]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=n_neurons_vec, y=np.max(test_acc_mat,1)/np.max(test_acc_ELM_mat,1), mode='lines', name='Performance ratio'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=400,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Neurons",type = 'log',exponentformat="power")



In [112]:
#impact of quantization on network performance 

n_bit_vector = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12]

#We Now create an instance of the NN class and move if to the GPU if available
n_neurons = 100
NN_MNIST = Neural_Net(input_size=28*28, hidden_size=n_neurons, n_classes=10)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.001)

#training the full NN
n_epochs = 1
test_acc = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

trained_params = NN_MNIST.get_params()

accuracy_mat = np.zeros([100,len(n_bit_vector)])

for i in range(len(n_bit_vector)):
     accuracy_list = []
     
     n_levels = 2**n_bit_vector[i]
     
     quantize_model(NN_MNIST,n_levels)
     
     correct = 0 
     total = 0
     for images, labels in test_loader_MNIST:
          images = images.to(device)
          labels = labels.to(device)
          Y_pred = NN_MNIST.forward(images)
          _, predicted = torch.max(Y_pred.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
          accuracy = ( 100*correct/total)
          accuracy_list.append(accuracy)
     accuracy_mat[:,i] = accuracy_list

     NN_MNIST.set_params(trained_params)
     


Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Epoch [1/1], Step [100/600], Loss: 0.4787842631340027, Test Accuracy: 88.98%
Epoch [1/1], Step [200/600], Loss: 0.2602790594100952, Test Accuracy: 90.93%
Epoch [1/1], Step [300/600], Loss: 0.43224671483039856, Test Accuracy: 91.59%
Epoch [1/1], Step [400/600], Loss: 0.34260377287864685, Test Accuracy: 92.59%
Epoch [1/1], Step [500/600], Loss: 0.37550801038742065, Test Accuracy: 92.71%
Epoch [1/1], Step [600/600], Loss: 0.2897663414478302, Test Accuracy: 93.5%


IndexError: too many indices for tensor of dimension 1

In [None]:
# Training loop PEPG for MNIST: 

# TODO : need to rework PEPG and CMA and PSO to take a param dictionary as input parameters so I can create an optimizer loop for them 

NN_MNIST.reset_weights()
NN_MNIST.NN_stack[0].requires_grad = True

# learning parameters

epochs = 10
N_dim = NN_MNIST.num_params
pop_size = 100

init_pos = NN_MNIST.get_params().values

PEPG_optimizer = PEPG_opt(N_dim, pop_size, learning_rate=0.05, starting_mu=init_pos ,starting_sigma=0.5)


train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)
