In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from CMA_obj import CMA_opt
from PEPG_obj import PEPG_opt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from SPSA_obj import SPSA_opt
from ADAM_opt import AdamOptimizer
from PSO_obj import PSO_opt
from scipy.interpolate import interp1d
from numpy import asarray
from numpy import savetxt
from NN_utils import *
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pandas as pd

#   Online Training of Neural Networks

- The NN class helper functions and training loop functions are defined in NN_utils, 

### Loading datasets
X is the input, Y the output

In [2]:
# MNIST dataset
MNIST_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
MNIST_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_train, batch_size=100, shuffle=True)
test_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_test, batch_size=100, shuffle=False)

X_train_MNIST, Y_train_MNIST = next(iter(train_loader_MNIST))
X_test_MNIST, Y_test_MNIST = next(iter(test_loader_MNIST))

In [5]:
Mini_NN = Tiny_convnet()
N_dim = Mini_NN.count_parameters()
print('Number of parameters in the network: ', N_dim)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Mini_NN.parameters(), lr=0.001)

#training the full NN
n_epochs = 20
test_acc = train_pytorch_NN(Mini_NN, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

Number of parameters in the network:  11274
Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
Epoch [1/10], Step [100/600], Loss: 0.3023834824562073, Test Accuracy: 90.0%
Epoch [1/10], Step [200/600], Loss: 0.18753696978092194, Test Accuracy: 94.36%
Epoch [1/10], Step [300/600], Loss: 0.07241831719875336, Test Accuracy: 95.83%
Epoch [1/10], Step [400/600], Loss: 0.19058899581432343, Test Accuracy: 96.65%
Epoch [1/10], Step [500/600], Loss: 0.18027405440807343, Test Accuracy: 97.01%
Epoch [1/10], Step [600/600], Loss: 0.10318891704082489, Test Accuracy: 97.39%
Epoch [2/10], Step [100/600], Loss: 0.06281963735818863, Test Accur

In [6]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc)), y=test_acc, mode='lines', name='Full NN'))
fig.update_layout(template='plotly_white', width=400, height=400,margin=dict(l=20, r=20, t=20, b=20))


In [3]:
# Training loop PEPG for MNIST: 


#NN_MNIST.reset_weights()
#NN_MNIST.NN_stack[0].requires_grad = True
n_epochs =10
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
pop_size = 100

#specify we don't need the computation graph to keep track of the gradients, we will use pepg to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters


init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

PEPG_optimizer = PEPG_opt(N_dim, pop_size, learning_rate=0.01, starting_mu=init_pos ,starting_sigma=0.1)

PEPG_optimizer.sigma_decay = 0.9999
PEPG_optimizer.sigma_alpha=0.2
PEPG_optimizer.sigma_limit=0.02
PEPG_optimizer.elite_ratio=0.1
PEPG_optimizer.weight_decay=0.005

test_acc_PEPG,best_reward_PEPG = train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, PEPG_optimizer)

Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
Epoch [1/10], Step [100/600], Loss: 2.304324150085449, Test Accuracy: 14.99%
Epoch [1/10], Step [200/600], Loss: 2.2977232933044434, Test Accuracy: 15.38%
Epoch [1/10], Step [300/600], Loss: 2.003983736038208, Test Accuracy: 44.33%
Epoch [1/10], Step [400/600], Loss: 1.2355366945266724, Test Accuracy: 59.58%
Epoch [1/10], Step [500/600], Loss: 1.3815442323684692, Test Accuracy: 70.12%
Epoch [1/10], Step [600/600], Loss: 1.1501518487930298, Test Accuracy: 75.31%
Epoch [2/10], Step [100/600], Loss: 0.8681250214576721, Test Accuracy: 80.55%
Epoch [2/10], Step [200/600], Loss: 0.6

In [4]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(test_acc_PEPG)), y=test_acc_PEPG, mode='lines', name='PEPG'))
#change theme to white and set the sizer of the plot
fig.update_layout(template='plotly_white', width=400, height=300,margin=dict(l=20, r=20, t=20, b=20))
fig.update_xaxes(title_text="Epochs",type = 'log')
fig.update_yaxes(title_text="Accuracy [%]")

In [7]:
#save pepg data
savetxt('data\\Results\\NN_training\\online_training\\PEPG_test_acc.csv', test_acc_PEPG, delimiter=',')

### We use CMA to train the FFNN
- This doesn't work at all this simple architecture has too many parameters ... so CMA is painfully slow.

In [3]:
#Using CMA-ES for training the NN
n_epochs =1
NN_MNIST = Tiny_convnet()
N_dim = NN_MNIST.count_parameters()
pop_size = 10
#specify we don't need the computation graph to keep track of the gradients, we will use pepg to update the weights
with torch.no_grad():
    for param in NN_MNIST.parameters():
        param.requires_grad = False
loss = nn.CrossEntropyLoss()
# learning parameters

init_pos = NN_MNIST.get_params()

if init_pos.requires_grad:
    # Detach the tensor from the computation graph
    init_pos = init_pos.detach()
if init_pos.is_cuda:
    # Move the tensor to the CPU
    init_pos = init_pos.cpu()
init_pos = init_pos.numpy()

CMA_optimizer = CMA_opt(N_dim, pop_size, select_pop=int(pop_size/2), sigma_init=0.1, mean_init=init_pos)
CMA_optimizer.eigen_update_frequency = 100

test_acc_CMA,best_reward_CMA = train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, CMA_optimizer)


Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
0r1r2r3r4r5r6r7r8r9r10r11r12r13r14r15r16r17r18r19r20r21r22r23r24r25r26r27r28r29r30r31r32r33r34r35r36r37r38r39r40r41r42r43r44r45r46r47r48r49r50r51r52r53r54r55r56r57r58r59r60r61r62r63r64r65r66r67r68r69r70r71r72r73r74r75r76r77r78r79r80r81r82r83r84r85r86r87r88r89r90r91r92r93r94r95r96r97r98r99rEpoch [1/1], Step [100/600], Loss: 2.380072593688965, Test Accuracy: 9.73%
100r101r102r103r104r105r106r107r108r109r110r111r112r113r114r115r116r117r118r119r120r121r122r123r124r125r126r127r128r129r130r131r132r133r134r135r136r137r138r139r140r141r142r143r144r145r146r147r148r149r150r151r152r153r15

KeyboardInterrupt: 