# Testing the experiment classes


In [10]:
# from common.experiment import KubemlExperiment, History, TrainOptions, TrainRequest
import pandas as pd
import glob
import numpy as np
import pickle
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torchvision import transforms
import torch.utils.data as tdata

In [4]:
from torch import nn

class LeNet(nn.Module):
    """ Definition of the LeNet network as per the 1998 paper

    Credits to https://github.com/ChawDoe/LeNet5-MNIST-PyTorch for the
    convenience of the network definition and the train loop found there
    """

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(256, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, 10)
        self.relu5 = nn.ReLU()

    def forward(self, x):
        y = self.conv1(x)
        y = self.relu1(y)
        y = self.pool1(y)
        y = self.conv2(y)
        y = self.relu2(y)
        y = self.pool2(y)
        y = y.view(y.shape[0], -1)
        y = self.fc1(y)
        y = self.relu3(y)
        y = self.fc2(y)
        y = self.relu4(y)
        y = self.fc3(y)
        y = self.relu5(y)
        return y

In [None]:
n = LeNet()
summary(n, input_size=(32, 1, 28, 28))

# Load the dataset

In [28]:
x_train, labels = np.load('./datasets/cifar10/cifar10_x_train.npy'), np.load('./datasets/cifar10/cifar10_y_train.npy')

In [7]:
normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_transf = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    normalize
])

val_transf = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

In [11]:
from torch.nn.functional import nll_loss, cross_entropy

def train(model: nn.Module, device,
          train_loader: tdata.DataLoader,
          optimizer: torch.optim.Optimizer, epoch) -> float:
    """Loop used to train the network"""

    # create optimizer
    # optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-4, momentum=0.9)

    # load_state(optimizer)
    
    criterion = nn.CrossEntropyLoss()
    
    model.train()
    loss, tot = 0, 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)


        loss = cross_entropy(output, target)
        tot += loss.item()
        

        loss.backward()
        optimizer.step()
        

        if batch_idx % 30 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                   100. * batch_idx / len(train_loader), loss.item()))
            

    # save the optimizer state
    # save_state(optimizer)

    return tot/len(train_loader)


def validate(model, device, val_loader: tdata.DataLoader) -> (float, float):
    """Loop used to validate the network"""

    criterion =nn.CrossEntropyLoss()
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            test_loss += cross_entropy(output, target).item()  # sum up batch loss
            correct += predicted.eq(target).sum().item()

    test_loss /= len(val_loader)

    accuracy = 100. * correct / len(val_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))
    return accuracy, test_loss


# Functions to load and save state

In [20]:
import os
def load_state(optimizer):
    if os.path.isfile('state.pkl'):
        with open('state.pkl', 'rb') as f:
            state = pickle.load(f)
            update_state(optimizer, state)

    else:
        print('no state found')


def update_state(optimizer, state):
    state = {
      'param_groups': optimizer.state_dict()['param_groups'],
      'state': state
    }
    optimizer.load_state_dict(state)

def save_state(optimizer):
    print('saving optimizer state')
    with open('state.pkl', 'wb') as f:
        pickle.dump(optimizer.state_dict()['state'], f)

In [21]:
from torchvision.models.resnet import resnet18

torch.manual_seed(42) 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

# Create the model
# model = create_model(init=True).to(device)
model = resnet18().to(device)

In [33]:
class MnistData(tdata.Dataset):
    def __init__(self, feat, labels):
        self.feat = feat
        self.labels = labels.flatten()
        self.normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        self.transf = transforms.Compose([
            transforms.ToTensor(),
            self.normalize
            ])
        
    def __len__(self):
        return len(self.feat)
    
    def __getitem__(self, idx):
        return self.transf(self.feat[idx]), self.labels[idx].astype(np.int64)
    
train_data = MnistData(x_train, labels)

In [35]:
from torch import optim

train_loader = torch.utils.data.DataLoader(train_data, batch_size=128)
# val_loader = torch.utils.data.DataLoader(val_data, batch_size=128)

for epoch in range(4):
    # create the optimizer in each iteration and load state
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
    load_state(optimizer)

    print('Epoch', epoch)
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
    train(model, device, train_loader, optimizer, epoch)
    save_state(optimizer)
#     validate(model, device, val_loader)

Epoch 0
saving optimizer state
Epoch 1
saving optimizer state
Epoch 2
saving optimizer state
Epoch 3
saving optimizer state


In [39]:
optimizer.__dict__

{'defaults': {'lr': 0.1,
  'momentum': 0.9,
  'dampening': 0,
  'weight_decay': 0.0001,
  'nesterov': False},
 'state': defaultdict(dict,
             {Parameter containing:
              tensor([[[[-1.1173e-02,  2.3008e-02, -1.0198e-01,  ..., -2.7063e-01,
                         -9.6612e-02,  5.2868e-02],
                        [-7.2319e-02, -1.0036e-02, -1.1267e-01,  ..., -2.8526e-01,
                         -1.1592e-01,  2.3221e-02],
                        [-1.6277e-02, -2.2969e-03, -1.2320e-01,  ..., -2.7130e-01,
                         -2.7788e-02,  6.9224e-02],
                        ...,
                        [ 2.1365e-02, -8.3750e-02, -1.5994e-01,  ..., -1.0372e-01,
                          5.5082e-02,  1.5998e-01],
                        [-5.2936e-02, -1.2413e-01, -1.2807e-01,  ..., -1.0839e-01,
                          2.8457e-02,  1.5326e-01],
                        [-8.4318e-02, -4.6436e-02, -6.7843e-03,  ...,  4.0995e-02,
                          1.9105e-01,  