In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from CMA_obj import CMA_opt
from PEPG_obj import PEPG_opt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from SPSA_obj import SPSA_opt
from Finite_diff_grad import FD_opt
from ADAM_opt import AdamOptimizer
from PSO_obj import PSO_opt
from scipy.interpolate import interp1d
from numpy import asarray
from numpy import savetxt
from NN_utils import *
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pandas as pd

#   Online Training of Neural Networks MNIST

- The NN class helper functions and training loop functions are defined in NN_utils, 

### Loading datasets
X is the input, Y the output

In [2]:
# MNIST dataset
MNIST_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
MNIST_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_train, batch_size=100, shuffle=True)
test_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_test, batch_size=100, shuffle=False)

X_train_MNIST, Y_train_MNIST = next(iter(train_loader_MNIST))
X_test_MNIST, Y_test_MNIST = next(iter(test_loader_MNIST))

In [4]:
Mini_NN = Tiny_convnet()
N_dim = Mini_NN.count_parameters()
print('Number of parameters in the network: ', N_dim)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Mini_NN.parameters(), lr=0.001)

#training the full NN
n_epochs = 5
test_acc = train_pytorch_NN(Mini_NN, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

Number of parameters in the network:  11274
Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
Epoch [1/5], Step [100/600], Loss: 0.43303343653678894, Test Accuracy: 89.35%
Epoch [1/5], Step [200/600], Loss: 0.2889914810657501, Test Accuracy: 93.68%
Epoch [1/5], Step [300/600], Loss: 0.24148117005825043, Test Accuracy: 95.39%
Epoch [1/5], Step [400/600], Loss: 0.07464292645454407, Test Accuracy: 96.25%
Epoch [1/5], Step [500/600], Loss: 0.2551101744174957, Test Accuracy: 97.01%
Epoch [1/5], Step [600/600], Loss: 0.1096537783741951, Test Accuracy: 97.16%
Epoch [2/5], Step [100/600], Loss: 0.04244609922170639, Test Accuracy: 97.

In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc)), y=test_acc, mode='lines', name='Full NN'))
fig.update_layout(template='plotly_white', width=400, height=400,margin=dict(l=20, r=20, t=20, b=20))


In [5]:
#save pepg data
savetxt('data\\Results\\NN_training\\online_training\\BP_test_acc.csv', test_acc, delimiter=',')

In [5]:
# Training loop PEPG for MNIST: 


#NN_MNIST.reset_weights()
#NN_MNIST.NN_stack[0].requires_grad = True
n_epochs =10
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
pop_size = 200

#specify we don't need the computation graph to keep track of the gradients, we will use pepg to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters


init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

PEPG_optimizer = PEPG_opt(N_dim, pop_size, learning_rate=0.01, starting_mu=init_pos ,starting_sigma=0.1)

PEPG_optimizer.sigma_decay = 0.9999
PEPG_optimizer.sigma_alpha=0.2
PEPG_optimizer.sigma_limit=0.02
PEPG_optimizer.elite_ratio=0.1
PEPG_optimizer.weight_decay=0.005

test_acc_PEPG,best_reward_PEPG = train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, PEPG_optimizer)

Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
{i+1}Epoch [1/10], Step [10/600], Loss: 2.8782799243927, Test Accuracy: 10.1%
{i+1}Epoch [1/10], Step [20/600], Loss: 2.505277156829834, Test Accuracy: 10.51%
{i+1}Epoch [1/10], Step [30/600], Loss: 3.732456922531128, Test Accuracy: 11.92%
{i+1}Epoch [1/10], Step [40/600], Loss: 2.9436402320861816, Test Accuracy: 8.05%
{i+1}Epoch [1/10], Step [50/600], Loss: 2.659672498703003, Test Accuracy: 13.01%
{i+1}Epoch [1/10], Step [60/600], Loss: 2.4585225582122803, Test Accuracy: 11.24%
{i+1}Epoch [1/10], Step [70/600], Loss: 2.5489184856414795, Test Accuracy: 11.73%
{i+1}Epoch [1/10]

In [6]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_PEPG)), y=test_acc_PEPG, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [7]:
#save pepg data
savetxt('data\\Results\\NN_training\\online_training\\PEPG_test_acc.csv', test_acc_PEPG, delimiter=',')

### We use CMA to train the FFNN
- This doesn't work at all this simple architecture has too many parameters ... so CMA is painfully slow.

In [22]:
#Using CMA-ES for training the NN
n_epochs =10
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
pop_size = 200
#specify we don't need the computation graph to keep track of the gradients, we will use CMAES to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

CMA_optimizer = CMA_opt(N_dim, pop_size, select_pop=int(pop_size/2), sigma_init=0.02, mean_init=init_pos)
CMA_optimizer.eigen_update_frequency = 10

test_acc_CMA,best_reward_CMA = train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, CMA_optimizer)


Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
{i+1}Epoch [1/10], Step [50/600], Loss: 1.9426745176315308, Test Accuracy: 51.71%
{i+1}Epoch [1/10], Step [100/600], Loss: 1.2132484912872314, Test Accuracy: 73.45%
{i+1}Epoch [1/10], Step [150/600], Loss: 0.8586930632591248, Test Accuracy: 82.86%
{i+1}Epoch [1/10], Step [200/600], Loss: 0.8019188642501831, Test Accuracy: 83.11%
{i+1}Epoch [1/10], Step [250/600], Loss: 0.7056857943534851, Test Accuracy: 85.49%
{i+1}Epoch [1/10], Step [300/600], Loss: 0.5936709046363831, Test Accuracy: 87.73%
{i+1}Epoch [1/10], Step [350/600], Loss: 0.6220501065254211, Test Accuracy: 87.4%
{i+1

In [4]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_CMA)), y=test_acc_CMA, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [None]:
#np.savetxt('data\\Results\\NN_training\\online_training\\CMA_200_0p1_eig100.csv', [test_acc_CMA,best_reward_CMA], delimiter=',')


In [6]:
#use SPSA to optimize The Neural network
n_epochs =20
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
#specify we don't need the computation graph to keep track of the gradients, we will use SPSA to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

SPSA_optimizer = SPSA_opt(init_pos,alpha=1e-3,epsilon=1e-5)
Adam = AdamOptimizer(init_pos, lr=1e-3, beta1=0.9, beta2=0.9, epsilon=1e-8)

test_acc_SPSA, best_reward_SPSA = train_online_SPSA_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, SPSA_optimizer,Adam)

Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)


ValueError: non-broadcastable output operand with shape (11274,) doesn't match the broadcast shape (11274,11274)

In [12]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_SPSA)), y=test_acc_SPSA, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [3]:
#use FD to optimize The Neural network
n_epochs =20
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
grad_dim = 100

#specify we don't need the computation graph to keep track of the gradients, we will use SPSA to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

FD_optimizer = FD_opt(init_pos,n_perturb=grad_dim,alpha=1e-3,epsilon=1e-7)
Adam = AdamOptimizer(init_pos, lr=1e-4, beta1=0.9, beta2=0.9, epsilon=1e-8)

test_acc_FD, best_reward_FD = train_online_FD_NN(NN_MNIST,N_dim, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, FD_optimizer,Adam)

Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
{i+1}Epoch [1/20], Step [100/600], Loss: 2.3067967891693115, Test Accuracy: 10.12%
{i+1}Epoch [1/20], Step [200/600], Loss: 2.3046770095825195, Test Accuracy: 10.16%
{i+1}Epoch [1/20], Step [300/600], Loss: 2.3043203353881836, Test Accuracy: 10.12%
{i+1}Epoch [1/20], Step [400/600], Loss: 2.303610324859619, Test Accuracy: 10.13%
{i+1}Epoch [1/20], Step [500/600], Loss: 2.303591251373291, Test Accuracy: 10.13%
{i+1}Epoch [1/20], Step [600/600], Loss: 2.303727388381958, Test Accuracy: 10.1%
{i+1}Epoch [2/20], Step [100/600], Loss: 2.303433656692505, Test Accuracy: 10.09%
{i+1}Ep

KeyboardInterrupt: 

In [8]:
np.savetxt('data\\Results\\NN_training\\online_training\\SPSA_test_acc.csv', test_acc_SPSA, delimiter=',')

In [10]:
#Using PSO for training the NN
n_epochs =10
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
pop_size = 200
#specify we don't need the computation graph to keep track of the gradients, we will use CMAES to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

#params dictionary
upper_bound = 0.3
lower_bound = -0.3

params = {'c_1': 2.5, 
          'c_2': 0.85,
          'w': 0.8,
          'Vmax': 0.15*(upper_bound-lower_bound),
          'upper_bound': upper_bound,
          'lower_bound': lower_bound,
          'pop_size' :pop_size,
          }

init_pos = (upper_bound - lower_bound) * np.random.rand(N_dim, pop_size) + lower_bound
V_init = 0.1 * np.random.rand(N_dim, pop_size)
PSO_optimizer = PSO_opt(X_init = init_pos,V_init = V_init,params=params)

test_acc_PSO,best_reward_PSO = train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, PSO_optimizer)

Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
{i+1}Epoch [1/10], Step [50/600], Loss: 2.3209269046783447, Test Accuracy: 15.87%
{i+1}Epoch [1/10], Step [100/600], Loss: 2.1361119747161865, Test Accuracy: 20.8%
{i+1}Epoch [1/10], Step [150/600], Loss: 2.1310675144195557, Test Accuracy: 26.35%
{i+1}Epoch [1/10], Step [200/600], Loss: 1.9755831956863403, Test Accuracy: 32.98%
{i+1}Epoch [1/10], Step [250/600], Loss: 1.853054404258728, Test Accuracy: 28.93%
{i+1}Epoch [1/10], Step [300/600], Loss: 2.098614454269409, Test Accuracy: 22.52%
{i+1}Epoch [1/10], Step [350/600], Loss: 1.920212745666504, Test Accuracy: 29.79%
{i+1}Ep

KeyboardInterrupt: 

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_PSO)), y=test_acc_PSO, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [5]:
model = NN_MNIST

# Initialize a list to store the figures
figs = []

# Iterate through each model parameter
for name, param in model.named_parameters():
    if 'weight' in name:  # Filter out only weight parameters
        # Flatten the weights
        weights = param.detach().cpu().numpy().flatten()
        
        # Create a histogram for the weights
        fig = go.Figure()
        fig.add_trace(go.Histogram(x=weights, name=name))
        
        # Update layout to add titles and improve readability
        fig.update_layout(
            title=f'Histogram of Weights for Layer: {name}',
            xaxis_title='Weight values',
            yaxis_title='Frequency',
            bargap=0.2
        )
        
        # Append the figure to the list
        figs.append(fig)

# Show all histograms
for fig in figs:
    fig.show()