In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from CMA_obj import CMA_opt
from PEPG_obj import PEPG_opt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from SPSA_obj import SPSA_opt
from Finite_diff_grad import FD_opt
from ADAM_opt import AdamOptimizer
from PSO_obj import PSO_opt
from scipy.interpolate import interp1d
from numpy import asarray
from numpy import savetxt
from NN_utils import *
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pandas as pd
from torchsummary import summary

np.random.seed(42)
torch.manual_seed(42)


<torch._C.Generator at 0x18685450b50>

# Online training test fashion MNIST

In [2]:
#load fashion MNIST DATASET
MNIST_train = datasets.FashionMNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
MNIST_test = datasets.FashionMNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_train, batch_size=100, shuffle=True)
test_loader_MNIST = torch.utils.data.DataLoader(dataset=MNIST_test, batch_size=100, shuffle=False)

X_train_MNIST, Y_train_MNIST = next(iter(train_loader_MNIST))
X_test_MNIST, Y_test_MNIST = next(iter(test_loader_MNIST))

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:06<00:00, 4014433.12it/s]


Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 1611828.58it/s]

Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz



100%|██████████| 4422102/4422102 [00:01<00:00, 3954408.74it/s]


Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<?, ?it/s]

Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw






In [3]:
Mini_NN = Tiny_convnet()
N_dim = Mini_NN.count_parameters()
print('Number of parameters in the network: ', N_dim)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Mini_NN.parameters(), lr=0.001)

#training the full NN
n_epochs = 5
test_acc = train_pytorch_NN(Mini_NN, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, optimizer)

Number of parameters in the network:  11274
Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
Epoch [1/5], Step [100/600], Loss: 0.617326021194458, Test Accuracy: 75.06%
Epoch [1/5], Step [200/600], Loss: 0.4337739944458008, Test Accuracy: 78.76%
Epoch [1/5], Step [300/600], Loss: 0.46650195121765137, Test Accuracy: 80.83%
Epoch [1/5], Step [400/600], Loss: 0.6987248063087463, Test Accuracy: 81.68%
Epoch [1/5], Step [500/600], Loss: 0.5264167785644531, Test Accuracy: 82.98%
Epoch [1/5], Step [600/600], Loss: 0.4977506995201111, Test Accuracy: 82.78%
Epoch [2/5], Step [100/600], Loss: 0.43587490916252136, Test Accuracy: 84.35%

In [4]:
#PEPG pop size scan

n_epochs =20

pop_vec = [5, 10, 20, 50, 100, 200, 300, 500]

test_acc_mat_PEPG = np.zeros((len(pop_vec),12*n_epochs))
#best_reward_mat_PEPG = np.zeros((len(pop_vec),600*n_epochs))

for i,k in enumerate(pop_vec):
    print(k)
    NN_MNIST = Tiny_convnet()
    N_dim = NN_MNIST.count_parameters()
    #specify we don't need the computation graph to keep track of the gradients, we will use SPSA to update the weights
    with torch.no_grad():
        for param in NN_MNIST.parameters():
            param.requires_grad = False
    loss = nn.CrossEntropyLoss()
    # learning parameters

    init_pos = NN_MNIST.get_params()

    if init_pos.requires_grad:
        # Detach the tensor from the computation graph
        init_pos = init_pos.detach()
    if init_pos.is_cuda:
        # Move the tensor to the CPU
        init_pos = init_pos.cpu()
    init_pos = init_pos.numpy()
    
    PEPG_optimizer = PEPG_opt(N_dim, pop_size = k, learning_rate=0.01, starting_mu=init_pos ,starting_sigma=0.1)

    PEPG_optimizer.sigma_decay = 0.9999
    PEPG_optimizer.sigma_alpha=0.2
    PEPG_optimizer.sigma_limit=0.02
    PEPG_optimizer.elite_ratio=0.1
    PEPG_optimizer.weight_decay=0.005

    test_acc_PEPG,best_reward_PEPG = train_online_pop_NN(NN_MNIST, n_epochs, train_loader_MNIST, test_loader_MNIST, loss, PEPG_optimizer)
    
    test_acc_mat_PEPG[i,:] = test_acc_PEPG
    #best_reward_mat_PEPG[i,:] = best_reward_PEPG

5
Using cuda device
Tiny_convnet(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=784, out_features=10, bias=True)
)
{i+1}Epoch [1/20], Step [50/600], Loss: 2.459984064102173, Test Accuracy: 10.36%
{i+1}Epoch [1/20], Step [100/600], Loss: 2.7072689533233643, Test Accuracy: 18.44%
{i+1}Epoch [1/20], Step [150/600], Loss: 2.306612253189087, Test Accuracy: 7.74%
{i+1}Epoch [1/20], Step [200/600], Loss: 2.384544610977173, Test Accuracy: 16.96%
{i+1}Epoch [1/20], Step [250/600], Loss: 2.4376559257507324, Test Accuracy: 13.31%
{i+1}Epoch [1/20], Step [300/600], Loss: 2.3221144676208496, Test Accuracy: 12.27%
{i+1}Epoch [1/20], Step [350/600], Loss: 2.345755100250244, Test Accuracy: 13.96%
{i+1}E

In [None]:
#plot population size scan of PEPG

fig = go.Figure()

best_acc_pop = np.max(test_acc_mat_PEPG, axis=1)

fig.add_trace(go.Scatter(x=pop_vec, y=best_acc_pop, mode='lines', name='PEPG'))


In [5]:
#plot population size ratio of PEPG

fig = go.Figure()

best_acc_pop = np.max(test_acc_mat_PEPG, axis=1)

fig.add_trace(go.Scatter(x=100*np.array(pop_vec)/11274, y=best_acc_pop, mode='lines', name='PEPG'))