In [33]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from CMA_obj import CMA_opt
from PEPG_obj import PEPG_opt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from SPSA_obj import SPSA_opt
from Finite_diff_grad import FD_opt
from ADAM_opt import AdamOptimizer
from PSO_obj import PSO_opt
from scipy.interpolate import interp1d
from numpy import asarray
from numpy import savetxt
from NN_utils_IRIS import *
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pandas as pd

#   Online Training of Neural Networks IRIS, Wine

- The NN class helper functions and training loop functions are defined in NN_utils, 

### Loading datasets
X is the input, Y the output

In [2]:
#Iris dataset
iris_df = pd.read_csv("data\\IRIS\\iris.csv")
# convert the last column 


iris_raw = iris_df.values

for i in range(len(iris_raw)):
    if iris_raw[i,-1] == 'Iris-setosa':
        iris_raw[i,-1] = 0
    elif iris_raw[i,-1] == 'Iris-versicolor':
        iris_raw[i,-1] = 1
    else:
        iris_raw[i,-1] = 2
        
iris_raw = iris_raw.astype(np.float32)
#remove the first column because it is just an index
iris_raw = iris_raw[:,1:]
#iris raw needs to be shuffled randomly because the data is ordered by class
np.random.shuffle(iris_raw)

# Convert to PyTorch tensors
X = torch.from_numpy(iris_raw[:, :-1])
Y = torch.from_numpy(iris_raw[:, -1]).unsqueeze(1)

# Create a single dataset
full_dataset = Custom_dataset(X, Y)

# Split into train and test sets, first set the size of the split
train_size = int(0.75 * len(full_dataset))
test_size = len(full_dataset) - train_size
# split into train and test sets using pytorch randomsplit
Iris_train, Iris_test = torch.utils.data.random_split(full_dataset, [train_size, test_size])

Iris_train_loader = torch.utils.data.DataLoader(dataset=Iris_train, batch_size=train_size, shuffle=True)
Iris_test_loader = torch.utils.data.DataLoader(dataset=Iris_test, batch_size=test_size, shuffle=False)

  self.features = torch.tensor(features, dtype=torch.float)
  self.labels = torch.tensor(labels, dtype=torch.long).squeeze()  # Convert and squeeze labels


In [5]:
len(Iris_test_loader)

1

In [4]:
#We Now create an instance of the NN class and move if to the GPU if available
n_neurons = 10
NN_IRIS = Neural_Net(input_size=4, hidden_size=n_neurons, n_classes=3)

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN_IRIS.parameters(), lr=0.001)

#training the full NN
n_epochs = 200
test_acc = train_pytorch_NN(NN_IRIS, n_epochs, Iris_train_loader, Iris_test_loader, loss, optimizer)

Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=3, bias=True)
  )
)
Epoch [1/200], Step [1/1], Loss: 1.068480372428894, Test Accuracy: 21.05263157894737%
Epoch [2/200], Step [1/1], Loss: 1.0603891611099243, Test Accuracy: 21.05263157894737%
Epoch [3/200], Step [1/1], Loss: 1.0526516437530518, Test Accuracy: 21.05263157894737%
Epoch [4/200], Step [1/1], Loss: 1.0452842712402344, Test Accuracy: 21.05263157894737%
Epoch [5/200], Step [1/1], Loss: 1.0383018255233765, Test Accuracy: 21.05263157894737%
Epoch [6/200], Step [1/1], Loss: 1.0317190885543823, Test Accuracy: 21.05263157894737%
Epoch [7/200], Step [1/1], Loss: 1.0255411863327026, Test Accuracy: 21.05263157894737%
Epoch [8/200], Step [1/1], Loss: 1.0197765827178955, Test Accuracy: 21.05263157894737%
Epoch [9/200], Step [1/1], Loss: 1.0144295692443848, Test Accuracy:

In [3]:
for i, (images, labels) in enumerate(Iris_train_loader):
    print(labels.shape)  # Should print something like torch.Size([batch_size])
    print(labels.dtype)

torch.Size([112])
torch.int64


In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc)), y=test_acc, mode='lines', name='Full NN'))
fig.update_layout(template='plotly_white', width=400, height=400,margin=dict(l=20, r=20, t=20, b=20))


In [12]:
# Training loop PEPG for MNIST: 


#NN_MNIST.reset_weights()
#NN_MNIST.NN_stack[0].requires_grad = True
n_epochs =100
NN_IRIS = Neural_Net(input_size=4, hidden_size=10, n_classes=3)
N_dim = NN_IRIS.count_parameters()
pop_size = 100

#specify we don't need the computation graph to keep track of the gradients, we will use pepg to update the weights
with torch.no_grad():
    for param in NN_IRIS.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters


init_pos = NN_IRIS.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

PEPG_optimizer = PEPG_opt(N_dim, pop_size, learning_rate=0.01, starting_mu=init_pos ,starting_sigma=0.1)

PEPG_optimizer.sigma_decay = 0.9999
PEPG_optimizer.sigma_alpha=0.2
PEPG_optimizer.sigma_limit=0.02
PEPG_optimizer.elite_ratio=0.1
PEPG_optimizer.weight_decay=0.005

test_acc_PEPG,best_reward_PEPG = train_online_pop_NN(NN_IRIS, n_epochs, Iris_train_loader, Iris_test_loader, loss, PEPG_optimizer)

Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=3, bias=True)
  )
)
{i+1}Epoch [1/100], Step [1/1], Loss: 0.9452974200248718, Test Accuracy: 60.526315789473685%
{i+1}Epoch [2/100], Step [1/1], Loss: 0.8118532299995422, Test Accuracy: 78.94736842105263%
{i+1}Epoch [3/100], Step [1/1], Loss: 0.9389926791191101, Test Accuracy: 63.1578947368421%
{i+1}Epoch [4/100], Step [1/1], Loss: 1.0415397882461548, Test Accuracy: 21.05263157894737%
{i+1}Epoch [5/100], Step [1/1], Loss: 1.02104914188385, Test Accuracy: 21.05263157894737%
{i+1}Epoch [6/100], Step [1/1], Loss: 1.029732584953308, Test Accuracy: 31.57894736842105%
{i+1}Epoch [7/100], Step [1/1], Loss: 0.8549807667732239, Test Accuracy: 78.94736842105263%
{i+1}Epoch [8/100], Step [1/1], Loss: 0.9713361859321594, Test Accuracy: 60.526315789473685%
{i+1}Epoch [9/100], Step [1/

In [13]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_PEPG)), y=test_acc_PEPG, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [None]:
#save pepg data
savetxt('data\\Results\\NN_training\\online_training\\PEPG_test_acc.csv', test_acc_PEPG, delimiter=',')

### We use CMA to train the FFNN
- This doesn't work at all this simple architecture has too many parameters ... so CMA is painfully slow.

In [17]:
#Using CMA-ES for training the NN
n_epochs =100
NN_IRIS = Neural_Net(input_size=4, hidden_size=10, n_classes=3)
N_dim = NN_IRIS.count_parameters()
pop_size = 10
#specify we don't need the computation graph to keep track of the gradients, we will use CMAES to update the weights
with torch.no_grad():
    for param in NN_IRIS.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_IRIS.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

CMA_optimizer = CMA_opt(N_dim, pop_size, select_pop=int(pop_size/2), sigma_init=0.1, mean_init=init_pos)
CMA_optimizer.eigen_update_frequency = 10

test_acc_CMA,best_reward_CMA = train_online_pop_NN(NN_IRIS, n_epochs, Iris_train_loader, Iris_test_loader, loss, CMA_optimizer)


Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=3, bias=True)
  )
)
{i+1}Epoch [1/100], Step [1/1], Loss: 1.1388428211212158, Test Accuracy: 34.21052631578947%
{i+1}Epoch [2/100], Step [1/1], Loss: 1.1070222854614258, Test Accuracy: 34.21052631578947%
{i+1}Epoch [3/100], Step [1/1], Loss: 1.0813188552856445, Test Accuracy: 34.21052631578947%
{i+1}Epoch [4/100], Step [1/1], Loss: 0.9951866269111633, Test Accuracy: 34.21052631578947%
{i+1}Epoch [5/100], Step [1/1], Loss: 1.008622646331787, Test Accuracy: 31.57894736842105%
{i+1}Epoch [6/100], Step [1/1], Loss: 0.8917995095252991, Test Accuracy: 63.1578947368421%
{i+1}Epoch [7/100], Step [1/1], Loss: 0.9529407620429993, Test Accuracy: 60.526315789473685%
{i+1}Epoch [8/100], Step [1/1], Loss: 0.9257021546363831, Test Accuracy: 57.89473684210526%
{i+1}Epoch [9/100], Step [1

In [18]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_CMA)), y=test_acc_CMA, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [93]:
#use SPSA to optimize The Neural network
n_epochs =1000
NN_IRIS = Neural_Net(input_size=4, hidden_size=10, n_classes=3)
N_dim = NN_IRIS.count_parameters()
#specify we don't need the computation graph to keep track of the gradients, we will use SPSA to update the weights
with torch.no_grad():
    for param in NN_IRIS.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_IRIS.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

SPSA_optimizer = SPSA_opt(init_pos,alpha=1e-3,epsilon=1e-5)
Adam = AdamOptimizer(init_pos, lr=1e-2, beta1=0.9, beta2=0.99, epsilon=1e-8)

test_acc_SPSA, best_reward_SPSA = train_online_SPSA_NN(NN_IRIS, n_epochs, Iris_train_loader, Iris_test_loader, loss, SPSA_optimizer,Adam)

Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=3, bias=True)
  )
)
{i+1}Epoch [1/1000], Step [1/1], Loss: 1.5929864645004272, Test Accuracy: 34.21052631578947%
{i+1}Epoch [2/1000], Step [1/1], Loss: 1.570778250694275, Test Accuracy: 34.21052631578947%
{i+1}Epoch [3/1000], Step [1/1], Loss: 1.5515416860580444, Test Accuracy: 34.21052631578947%
{i+1}Epoch [4/1000], Step [1/1], Loss: 1.5360982418060303, Test Accuracy: 34.21052631578947%
{i+1}Epoch [5/1000], Step [1/1], Loss: 1.514479637145996, Test Accuracy: 34.21052631578947%
{i+1}Epoch [6/1000], Step [1/1], Loss: 1.4864768981933594, Test Accuracy: 34.21052631578947%
{i+1}Epoch [7/1000], Step [1/1], Loss: 1.4634218215942383, Test Accuracy: 34.21052631578947%
{i+1}Epoch [8/1000], Step [1/1], Loss: 1.4251213073730469, Test Accuracy: 34.21052631578947%
{i+1}Epoch [9/1000],

In [94]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_SPSA)), y=test_acc_SPSA, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [102]:
#use FD to optimize The Neural network
n_epochs =500
NN_IRIS = Neural_Net(input_size=4, hidden_size=10, n_classes=3)
N_dim = NN_IRIS.count_parameters()
grad_dim = 50

#specify we don't need the computation graph to keep track of the gradients, we will use SPSA to update the weights
with torch.no_grad():
    for param in NN_IRIS.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_IRIS.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

FD_optimizer = FD_opt(init_pos,n_perturb=grad_dim,alpha=1e-3,epsilon=1e-7)
Adam = AdamOptimizer(init_pos, lr=1e-2, beta1=0.99, beta2=0.7, epsilon=1e-8)

test_acc_FD, best_reward_FD = train_online_FD_NN(NN_IRIS,N_dim, n_epochs,  Iris_train_loader, Iris_test_loader, loss, FD_optimizer,Adam)

Using cuda device
Neural_Net(
  (NN_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=4, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=3, bias=True)
  )
)
{i+1}Epoch [1/500], Step [1/1], Loss: 1.1106672286987305, Test Accuracy: 44.73684210526316%
{i+1}Epoch [2/500], Step [1/1], Loss: 1.0980488061904907, Test Accuracy: 44.73684210526316%
{i+1}Epoch [3/500], Step [1/1], Loss: 1.0857322216033936, Test Accuracy: 44.73684210526316%
{i+1}Epoch [4/500], Step [1/1], Loss: 1.072855830192566, Test Accuracy: 44.73684210526316%
{i+1}Epoch [5/500], Step [1/1], Loss: 1.0610471963882446, Test Accuracy: 44.73684210526316%
{i+1}Epoch [6/500], Step [1/1], Loss: 1.0514963865280151, Test Accuracy: 44.73684210526316%
{i+1}Epoch [7/500], Step [1/1], Loss: 1.0470468997955322, Test Accuracy: 44.73684210526316%
{i+1}Epoch [8/500], Step [1/1], Loss: 1.0497366189956665, Test Accuracy: 44.73684210526316%
{i+1}Epoch [9/500], Step [1

In [100]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_FD)), y=test_acc_FD, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [None]:
#Using PSO for training the NN
n_epochs =1
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
pop_size = 100
#specify we don't need the computation graph to keep track of the gradients, we will use CMAES to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

#params dictionary
upper_bound = 0.5
lower_bound = -0.5

params = {'c_1': 2.5, 
          'c_2': 0.85,
          'w': 0.7,
          'Vmax': 0.15*(upper_bound-lower_bound),
          'upper_bound': upper_bound,
          'lower_bound': lower_bound,
          'pop_size' :pop_size,
          }

init_pos = (upper_bound - lower_bound) * np.random.rand(N_dim, pop_size) + lower_bound
V_init = 0.1 * np.random.rand(N_dim, pop_size)
PSO_optimizer = PSO_opt(X_init = init_pos,V_init = V_init,params=params)

test_acc_PSO,best_reward_PSO = train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, PSO_optimizer)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_PSO)), y=test_acc_PSO, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [None]:
model = NN_MNIST

# Initialize a list to store the figures
figs = []

# Iterate through each model parameter
for name, param in model.named_parameters():
    if 'weight' in name:  # Filter out only weight parameters
        # Flatten the weights
        weights = param.detach().cpu().numpy().flatten()
        
        # Create a histogram for the weights
        fig = go.Figure()
        fig.add_trace(go.Histogram(x=weights, name=name))
        
        # Update layout to add titles and improve readability
        fig.update_layout(
            title=f'Histogram of Weights for Layer: {name}',
            xaxis_title='Weight values',
            yaxis_title='Frequency',
            bargap=0.2
        )
        
        # Append the figure to the list
        figs.append(fig)

# Show all histograms
for fig in figs:
    fig.show()

In [None]:

# Wine dataset
wine_df = pd.read_csv("data\\WINE\\winequality-red.csv")

wine_raw = wine_df.values.astype(np.float32)

# Convert to PyTorch tensors
X = torch.from_numpy(wine_raw[:, :-1])
Y = torch.from_numpy(wine_raw[:, -1]).unsqueeze(1)

# Create a single dataset
full_dataset = Custom_dataset(X, Y)

# Split into train and test sets, first set the size of the split
train_size = int(0.75 * len(full_dataset))
test_size = len(full_dataset) - train_size
# split into train and test sets using pytorch randomsplit

Wine_train, Wine_test = torch.utils.data.random_split(full_dataset, [train_size, test_size])


Wine_train_loader = torch.utils.data.DataLoader(dataset=Wine_train, batch_size=100, shuffle=True)
Wine_test_loader = torch.utils.data.DataLoader(dataset=Wine_test, batch_size=100, shuffle=False)
