In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from functions_to_optimize import f_rastrigin
from CMA_obj import CMA_opt
from PEPG_obj import PEPG_opt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from SPSA_obj import SPSA_opt
from ADAM_opt import AdamOptimizer
from PSO_obj import PSO_opt
from scipy.interpolate import interp1d


In [2]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pandas as pd

#   Online training of Neural Networks

## 1) Defining a simple neural network
- Define the feed forward NN class
- Define the linear classifier class for comparison
- Define a custom dataset class to help interfacing with pytorch

In [3]:
#this is a simple single layer feed forward NN with ReLU activation and adjustable hidden layer size
class Neural_Net(nn.Module):
    def __init__(self, input_size, hidden_size, n_classes):
        "init method that defines the NN architecture and inherits from nn.Module"
        super(Neural_Net, self).__init__()
        
        self.NN_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, n_classes),
            #nn.Softmax(dim=1)
        )
        
        self.num_params = sum(p.data.numel() for p in self.parameters())
    
    def forward(self, X):
        "forward pass"
        logits = self.NN_stack(X)
        return logits
    
    
    def reset_weights(self):
        "method to reset the weights of the NN"
        for layer in self.NN_stack:
            if isinstance(layer,nn.Linear):
                layer.reset_parameters()
        for param in self.parameters():
            param.requires_grad = True
                
    def get_params(self):
        "Method to get parameters from the neural network"
        params_list = []
        
        for param in self.parameters():
            params_list.append(param.view(-1))
        
        full_params = torch.cat(params_list)
        return full_params
    
    def set_params(self,params_to_send):
        "Method to set parameters params in the neural network for online training. params_to_send is a column vector "
        idx_prev = 0
        for param in self.parameters():
           n_params = param.data.numel()
           new_param =  torch.reshape(torch.from_numpy(params_to_send[idx_prev: idx_prev + n_params ]),shape=param.data.shape)
           param.data.copy_(new_param)
           idx_prev += n_params
    
    def forward_pass_params(self,params_to_send,X):
        "This method is a forward pass that also takes in the parameters of the neural network as a variable, to use in online learning"
        self.set_params(params_to_send)
        logits = self.NN_stack(X)
        return logits
        

#class for a linear classifier

class Lin_classifier(nn.Module):
    def __init__(self, input_size, n_classes):
        super(Lin_classifier,self).__init__()
        
        self.NN_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, n_classes),
        )
        
    def forward(self,X):
        logits = self.NN_stack(X)
        return logits
    


# custom dataset class for datasets that are not included in torchvision like wine or IRIS

class Custom_dataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]



- Define some helper functions to train the model in a loop
- Define a function that quantizes a given model to study the impact of parameter resolution on performance 

In [4]:
#define training loop od pytorch NN as a function

def train_pytorch_NN(model, n_epochs, train_loader, test_loader, loss, optimizer):
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    print(f"Using {device} device")
    print(model)
    
    #array to store the accuracy of the model
    accuracy_list = []
    for epoch in range(n_epochs):
        model.train()
        for i, (images,labels) in enumerate(train_loader):
            #move data to gpu for faster processing
            images = images.to(device)
            labels = labels.to(device)
            #forward pass
            Y_pred = model.forward(images)
            loss_value = loss(Y_pred,labels)
            #backward pass
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            
            #print accuracy every 100 steps for the test set
            if (i+1) % 100 == 0:
                model.eval()
                correct = 0 
                total = 0
                for images, labels in test_loader:
                    images = images.to(device)
                    labels = labels.to(device)
                    Y_pred = model.forward(images)
                    _, predicted = torch.max(Y_pred.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                accuracy = ( 100*correct/total)
                accuracy_list.append(accuracy)
                print(f'Epoch [{epoch+1}/{n_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss_value.item()}, Test Accuracy: {accuracy}%')
    return accuracy_list


def train_online_pop_NN(model, n_epochs, train_loader, test_loader, loss, optimizer):
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    print(f"Using {device} device")
    print(model)
    
    best_reward = np.ones([n_epochs,1])
    rewards = []
    for epoch in range(n_epochs):
        model.eval()
        for i, (features,labels) in enumerate(train_loader):
            
            coordinates = optimizer.ask()
            
            for k in range(coordinates.shape[0]):
                if device == 'cuda':
                    features = features.to(device)
                    labels = labels.to(device)
                    Y_pred = model.forward_pass_params(coordinates[k,:],features)
                if device == 'cpu':
                    Y_pred = model.forward_pass_params(coordinates[k,:],features)    
                loss_value = loss(Y_pred,labels)
                rewards.append(loss_value)

            rewards = np.ndarray(rewards)[:,np.newaxis]
            optimizer.tell(rewards)
            best_params = coordinates[:,np.argmin(rewards)]
            
                #print accuracy every 100 steps for the test set
            if (i+1) % 100 == 0:
                model.eval()
                correct = 0 
                total = 0
                for features, labels in test_loader:
                    features = features.to(device)
                    labels = labels.to(device)
                    Y_pred = model.forward_pass_params(best_params,features)
                    _, predicted = torch.max(Y_pred.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                accuracy = ( 100*correct/total)
                accuracy_list.append(accuracy)
                print(f'Epoch [{epoch+1}/{n_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss_value.item()}, Test Accuracy: {accuracy}%')
    return accuracy_list
    
def quantize_model(model,quant_levels):
    "function to quantize weights of model on a layer by layer basis, quant_levels is the number of quantization steps"
    with torch.no_grad():
            
        for param in model.parameters():
            min_param = param.min()
            max_param = param.max()
            step = (max_param - min_param ) / (quant_levels)
            
            n_steps = ((param - min_param) / step).round()
            
            quantized_value = min_param + step * n_steps
            
            param.copy_(quantized_value)
    return


### Loading datasets
X is the input, Y the output

In [5]:
# MNIST dataset
MNIST_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
MNIST_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_train, batch_size=100, shuffle=True)
test_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_test, batch_size=100, shuffle=False)

X_train_MNIST, Y_train_MNIST = next(iter(train_loader_MNIST))
X_test_MNIST, Y_test_MNIST = next(iter(test_loader_MNIST))

In [6]:
# Wine dataset
wine_df = pd.read_csv("data\\WINE\\winequality-red.csv")

wine_raw = wine_df.values.astype(np.float32)

# Convert to PyTorch tensors
X = torch.from_numpy(wine_raw[:, :-1])
Y = torch.from_numpy(wine_raw[:, -1]).unsqueeze(1)

# Create a single dataset
full_dataset = Custom_dataset(X, Y)

# Split into train and test sets, first set the size of the split
train_size = int(0.75 * len(full_dataset))
test_size = len(full_dataset) - train_size
# split into train and test sets using pytorch randomsplit

Wine_train, Wine_test = torch.utils.data.random_split(full_dataset, [train_size, test_size])


Wine_train_loader = torch.utils.data.DataLoader(dataset=Wine_train, batch_size=100, shuffle=True)
Wine_test_loader = torch.utils.data.DataLoader(dataset=Wine_test, batch_size=100, shuffle=False)

In [23]:
#Iris dataset
iris_df = pd.read_csv("data\\IRIS\\iris.csv")
# convert the last column 


iris_raw = iris_df.values

for i in range(len(iris_raw)):
    if iris_raw[i,-1] == 'Iris-setosa':
        iris_raw[i,-1] = 0
    elif iris_raw[i,-1] == 'Iris-versicolor':
        iris_raw[i,-1] = 1
    else:
        iris_raw[i,-1] = 2
        
iris_raw = iris_raw.astype(np.float32)
#iris raw needs to be shuffled randomly because the data is ordered by class
np.random.shuffle(iris_raw)

# Convert to PyTorch tensors
X = torch.from_numpy(iris_raw[:, :-1])
Y = torch.from_numpy(iris_raw[:, -1]).unsqueeze(1)

# Create a single dataset
full_dataset = Custom_dataset(X, Y)

# Split into train and test sets, first set the size of the split
train_size = int(0.75 * len(full_dataset))
test_size = len(full_dataset) - train_size
# split into train and test sets using pytorch randomsplit
Iris_train, Iris_test = torch.utils.data.random_split(full_dataset, [train_size, test_size])

Iris_train_loader = torch.utils.data.DataLoader(dataset=Iris_train, batch_size=train_size, shuffle=True)
Iris_test_loader = torch.utils.data.DataLoader(dataset=Iris_test, batch_size=test_size, shuffle=False)

- Train a feed forward neural network of 100 neurons

In [7]:
#We Now create an instance of the NN class and move if to the GPU if available
n_neurons = 100
NN_MNIST = Neural_Net(input_size=28*28, hidden_size=n_neurons, n_classes=10)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.001)

#training the full NN
n_epochs = 10
test_acc = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Epoch [1/10], Step [100/600], Loss: 0.4191577136516571, Test Accuracy: 88.8%
Epoch [1/10], Step [200/600], Loss: 0.2891732454299927, Test Accuracy: 90.58%
Epoch [1/10], Step [300/600], Loss: 0.2360798418521881, Test Accuracy: 91.81%
Epoch [1/10], Step [400/600], Loss: 0.25218361616134644, Test Accuracy: 91.74%
Epoch [1/10], Step [500/600], Loss: 0.26800140738487244, Test Accuracy: 92.99%
Epoch [1/10], Step [600/600], Loss: 0.14968359470367432, Test Accuracy: 93.45%
Epoch [2/10], Step [100/600], Loss: 0.11221376061439514, Test Accuracy: 93.85%
Epoch [2/10], Step [200/600], Loss: 0.13010767102241516, Test Accuracy: 93.96%
Epoch [2/10], Step [300/600], Loss: 0.21192686259746552, Test Accuracy: 94.36%
Epoch [2/10], Step [400/600], Loss: 0.188226178288

- Train an extreme learning machine of the same size, we can just set the input weights and biases to not uses gradients and we reset them 

In [8]:
# Training loop for MNIST but without training the input layer (extreme learning machine)

#first reset the model we trained before
NN_MNIST.reset_weights()

#set the input layer to not require gradients
NN_MNIST.NN_stack[1].weight.requires_grad = False
NN_MNIST.NN_stack[1].bias.requires_grad = False

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.01)

n_epochs = 10
test_acc_ELM = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)



Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Epoch [1/10], Step [100/600], Loss: 1.271210789680481, Test Accuracy: 76.16%
Epoch [1/10], Step [200/600], Loss: 1.0079594850540161, Test Accuracy: 79.18%
Epoch [1/10], Step [300/600], Loss: 0.9305825233459473, Test Accuracy: 80.97%
Epoch [1/10], Step [400/600], Loss: 0.6689517498016357, Test Accuracy: 82.45%
Epoch [1/10], Step [500/600], Loss: 0.535764217376709, Test Accuracy: 83.31%
Epoch [1/10], Step [600/600], Loss: 0.694669246673584, Test Accuracy: 84.08%
Epoch [2/10], Step [100/600], Loss: 0.7473541498184204, Test Accuracy: 84.07%
Epoch [2/10], Step [200/600], Loss: 0.6482231616973877, Test Accuracy: 84.73%
Epoch [2/10], Step [300/600], Loss: 0.6407319903373718, Test Accuracy: 84.9%
Epoch [2/10], Step [400/600], Loss: 0.5210379958152771, Tes

- For reference we train a linear model

In [9]:
#Train the linear model for reference
Linear_model = Lin_classifier(input_size=28*28,n_classes=10)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Linear_model.parameters(), lr=0.001)

n_epochs = 10
test_acc_lin = train_pytorch_NN(Linear_model, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

Using cuda device
Lin_classifier(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=10, bias=True)
  )
)
Epoch [1/10], Step [100/600], Loss: 0.8847620487213135, Test Accuracy: 83.91%
Epoch [1/10], Step [200/600], Loss: 0.536886990070343, Test Accuracy: 87.79%
Epoch [1/10], Step [300/600], Loss: 0.5970674753189087, Test Accuracy: 88.8%
Epoch [1/10], Step [400/600], Loss: 0.4797896444797516, Test Accuracy: 89.85%
Epoch [1/10], Step [500/600], Loss: 0.36830952763557434, Test Accuracy: 90.29%
Epoch [1/10], Step [600/600], Loss: 0.49706733226776123, Test Accuracy: 90.58%
Epoch [2/10], Step [100/600], Loss: 0.47784319519996643, Test Accuracy: 90.75%
Epoch [2/10], Step [200/600], Loss: 0.2628822326660156, Test Accuracy: 91.16%
Epoch [2/10], Step [300/600], Loss: 0.29350030422210693, Test Accuracy: 91.19%
Epoch [2/10], Step [400/600], Loss: 0.3803710639476776, Test Accuracy: 91.34%
Epoch [2/10], Step [500/600], Loss: 0.43666458129

Plot previous results

In [10]:
# plot the accuracy of the ELM and FF models using plotly

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc)), y=test_acc, mode='lines', name='Full NN'))
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_ELM)), y=test_acc_ELM, mode='lines', name='ELM'))
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_lin)), y=test_acc_lin, mode='lines', name='Linear'))

#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=400,margin=dict(l=20, r=20, t=20, b=20))
#set xlim
fig.update_xaxes(range=[0, 60])


- Here we scan the size of the hidden layer for both the FFNN and ELM to compare how performance scales

In [13]:
#We scan the NN size and plot the accuracy of the ELM and FF models to compare with MNIST

n_neurons_vec = [10,20,50,100,200,500,1000,2000]
n_epochs = 15
loss = nn.CrossEntropyLoss()

test_acc_mat = np.zeros([len(n_neurons_vec),6*n_epochs])
test_acc_ELM_mat = np.zeros([len(n_neurons_vec),6*n_epochs])

for i in range(len(n_neurons_vec)):
    print('Training FFNN with ', n_neurons_vec[i], ' neurons...')
    n_neurons = n_neurons_vec[i]
    
    NN_MNIST = Neural_Net(input_size=28*28, hidden_size=n_neurons, n_classes=10)
    optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.001)

    NN_MNIST.NN_stack[1].weight.requires_grad = True
    NN_MNIST.NN_stack[1].bias.requires_grad = True

    #training the full NN
    
    test_acc_mat[i,:] = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)
    
    NN_MNIST.reset_weights()
    NN_MNIST.NN_stack[1].weight.requires_grad = False
    NN_MNIST.NN_stack[1].bias.requires_grad = False
    optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.01)
    
    print('Training ELM with ', n_neurons_vec[i], ' neurons...')
    #set the input layer to not require gradients

    
    test_acc_ELM_mat[i,:] = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)
    
    del NN_MNIST  # Assuming 'model' is your neural network instance
    torch.cuda.empty_cache()  # Advisable after deleting the model

    
    

Training FFNN with  10  neurons...
Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=10, bias=True)
  )
)
Epoch [1/15], Step [100/600], Loss: 1.275258183479309, Test Accuracy: 76.24%
Epoch [1/15], Step [200/600], Loss: 0.6915454864501953, Test Accuracy: 84.11%
Epoch [1/15], Step [300/600], Loss: 0.7792010307312012, Test Accuracy: 86.98%
Epoch [1/15], Step [400/600], Loss: 0.45443809032440186, Test Accuracy: 88.45%
Epoch [1/15], Step [500/600], Loss: 0.35001206398010254, Test Accuracy: 89.06%
Epoch [1/15], Step [600/600], Loss: 0.3474467992782593, Test Accuracy: 90.14%
Epoch [2/15], Step [100/600], Loss: 0.3502334952354431, Test Accuracy: 90.51%
Epoch [2/15], Step [200/600], Loss: 0.32560884952545166, Test Accuracy: 90.59%
Epoch [2/15], Step [300/600], Loss: 0.4012940227985382, Test Accuracy: 90.94%
Epoch [2/15], Step [

- Plot comparison results

In [14]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=n_neurons_vec, y=np.max(test_acc_mat,1), mode='markers+lines', name='Full NN', line = dict(color='royalblue')))
fig.add_trace(go.Scatter(x=n_neurons_vec, y=np.max(test_acc_ELM_mat,1), mode='markers+lines', name='ELM',line = dict(color='firebrick')))
#add linear model as dotted line 
fig.add_trace(go.Scatter(x=n_neurons_vec, y=np.max(test_acc_lin)*np.ones(len(n_neurons_vec)), mode='lines', name='Linear', line=dict(dash='dot')))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
#log scale on y axis
fig.update_yaxes(title_text="Accuracy [%]")
fig.update_xaxes(title_text="Neurons",type = 'log',exponentformat="power")



We see that for the same number of neurons training the input weights is very beneficial we need 30x the amount of neurons when using the ELM for the same level of performance

In [23]:
# Assuming test_acc_mat and test_acc_ELM_mat contain the max accuracy for each neuron count
interp_neurons_NN = interp1d(np.max(test_acc_mat,1), n_neurons_vec, kind='linear', bounds_error=False)
interp_neurons_ELM = interp1d(np.max(test_acc_ELM_mat,1), n_neurons_vec, kind='linear', bounds_error=False)

# Define a common range of accuracies for comparison (ensure it's within the range both models can achieve)
common_accuracy_range = np.linspace(max(test_acc_mat.min(), test_acc_ELM_mat.min()), min(test_acc_mat.max(), test_acc_ELM_mat.max()), 100)

# Estimate the number of neurons required for each model to achieve these accuracies
neurons_NN = interp_neurons_NN(common_accuracy_range)
neurons_ELM = interp_neurons_ELM(common_accuracy_range)

# Calculate the ratio of neuron counts (NN / ELM) for the same accuracies
neuron_ratio = neurons_ELM / neurons_NN

# Plotting
fig = go.Figure()
fig.add_trace(go.Scatter(x=common_accuracy_range, y=neuron_ratio, mode='markers+lines', name='Performance ratio'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=400,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Accuracy [%]",range=[92,97])
fig.update_yaxes(title_text="# Neurons ELM / # Neurons NN")

In [27]:
#saving the data to txt files for processing

#from numpy import asarray
#from numpy import savetxt

# save to csv file
savetxt('data\\Results\\NN_training\\test_acc_FFNN.csv', test_acc_mat, delimiter=',')
savetxt('data\\Results\\NN_training\\test_acc_ELM.csv', test_acc_ELM_mat, delimiter=',')
savetxt('data\\Results\\NN_training\\test_acc_compare.csv', [test_acc,test_acc_ELM,test_acc_lin], delimiter=',')

savetxt('data\\Results\\NN_training\\common_acc_range.csv', common_accuracy_range, delimiter=',')
savetxt('data\\Results\\NN_training\\neuron_ratio.csv', neuron_ratio, delimiter=',')




#### Here we study the impact of model quantization on performance

In [28]:
#impact of quantization on network performance 

n_bit_vector = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12]

#We Now create an instance of the NN class and move if to the GPU if available
n_neurons = 100
NN_MNIST = Neural_Net(input_size=28*28, hidden_size=n_neurons, n_classes=10)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN_MNIST.parameters(), lr=0.001)

#training the full NN
n_epochs = 10
test_acc = train_pytorch_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

trained_params = NN_MNIST.get_params()
if trained_params.requires_grad:
    # Detach the tensor from the computation graph
    trained_params = trained_params.detach()
if trained_params.is_cuda:
    # Move the tensor to the CPU
    trained_params = trained_params.cpu()
trained_params = trained_params.numpy()

accuracy_mat_quant = np.zeros([100,len(n_bit_vector)])

for i in range(len(n_bit_vector)):
     accuracy_list = []
     
     n_levels = 2**n_bit_vector[i]
     
     quantize_model(NN_MNIST,n_levels)
     
     correct = 0 
     total = 0
     for images, labels in test_loader_MNIST:
          images = images.to(device)
          labels = labels.to(device)
          Y_pred = NN_MNIST.forward(images)
          _, predicted = torch.max(Y_pred.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
          accuracy = ( 100*correct/total)
          accuracy_list.append(accuracy)
     accuracy_mat_quant[:,i] = accuracy_list

     NN_MNIST.set_params(trained_params)
     


Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)
Epoch [1/10], Step [100/600], Loss: 0.3675820231437683, Test Accuracy: 89.06%
Epoch [1/10], Step [200/600], Loss: 0.26932501792907715, Test Accuracy: 90.72%
Epoch [1/10], Step [300/600], Loss: 0.15140490233898163, Test Accuracy: 91.92%
Epoch [1/10], Step [400/600], Loss: 0.16536912322044373, Test Accuracy: 92.81%
Epoch [1/10], Step [500/600], Loss: 0.26765143871307373, Test Accuracy: 93.13%
Epoch [1/10], Step [600/600], Loss: 0.2550232410430908, Test Accuracy: 93.22%
Epoch [2/10], Step [100/600], Loss: 0.2108728289604187, Test Accuracy: 93.84%
Epoch [2/10], Step [200/600], Loss: 0.3862888216972351, Test Accuracy: 94.41%
Epoch [2/10], Step [300/600], Loss: 0.20241504907608032, Test Accuracy: 94.81%
Epoch [2/10], Step [400/600], Loss: 0.178509712219

In [29]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=n_bit_vector, y=np.mean(accuracy_mat_quant,0), mode='markers+lines', name='Quantized performance'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
#increaser the number of ticks on the x axis
fig.update_xaxes(tickvals=n_bit_vector)
fig.update_xaxes(title_text="Number of bits")
fig.update_yaxes(title_text="Accuracy [%]")


In [33]:
savetxt('data\\Results\\NN_training\\quantization.csv', [n_bit_vector,np.mean(accuracy_mat_quant,0)], delimiter=',')


In [9]:
# Training loop PEPG for MNIST: 

# TODO : need to rework PEPG and CMA and PSO to take a param dictionary as input parameters so I can create an optimizer loop for them 

#NN_MNIST.reset_weights()
#NN_MNIST.NN_stack[0].requires_grad = True
n_neurons = 100
n_epochs =1
NN_MNIST = Neural_Net(input_size=28*28, hidden_size=n_neurons, n_classes=10)

loss = nn.CrossEntropyLoss()
# learning parameters

epochs = 10
N_dim = NN_MNIST.num_params
pop_size = 100

init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

PEPG_optimizer = PEPG_opt(N_dim, pop_size, learning_rate=0.05, starting_mu=init_pos ,starting_sigma=0.5)


train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, PEPG_optimizer)


Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=100, bias=True)
    (2): ReLU()
    (3): Linear(in_features=100, out_features=10, bias=True)
  )
)


ValueError: maximum supported dimension for an ndarray is 32, found 100

In [172]:
init_pos

array([ 0.01789798, -0.01329115, -0.00524061, ...,  0.05293956,
       -0.00237943, -0.03485161], dtype=float32)