Experimentation using PyTorch to build a fashion outfit classifier
This will (likely) be converted to a script at the end to prevent any Jupyter overhead, but we'll see

In [1]:
# import standard PyTorch modules
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter # TensorBoard support

# import torchvision module to handle image manipulation
import torchvision
import torchvision.transforms as transforms

# calculate train time, writing train data to files etc.
import time
import pandas as pd
import json
from IPython.display import clear_output

from collections  import OrderedDict
from collections import namedtuple
from itertools import product


torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)     # On by default, leave it here for clarity

<torch.autograd.grad_mode.set_grad_enabled at 0x20053cd9278>

In [2]:
# Use standard FashionMNIST dataset
train_set = torchvision.datasets.FashionMNIST(
    root = '../data',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

In [3]:
train_set

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: ../data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

So the structure given in the tutorial is given to be very rigid - since we're dealing with fixed-resolution images and various convolves + poolings I'll need to keep the structure math correct. I'll play around with it and see if there's way I can improve it and why it might be working better (or most likely - worse). 

In [11]:
# Use standard FashionMNIST dataset
test_set = torchvision.datasets.FashionMNIST(
    root = '../data',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

In [12]:
test_set

Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: ../data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [4]:
# Build the neural network, expand on top of nn.Module
# Base network given by the tutorial
class Network(nn.Module):
  def __init__(self):
    super().__init__()

    # define layers
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

    self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
    self.fc2 = nn.Linear(in_features=120, out_features=60)
    self.out = nn.Linear(in_features=60, out_features=10)

  # define forward function
  def forward(self, t):
    # conv 1
    t = self.conv1(t) # transform the given tensor on the 1st convolutional layer
    t = F.relu(t) # compute the activation function on the tensor
    t = F.max_pool2d(t, kernel_size=2, stride=2) # pool the layer (it's now 1/2 the size it was (28-4 in each direction))

    # conv 2
    t = self.conv2(t) # repeat above
    t = F.relu(t)
    t = F.max_pool2d(t, kernel_size=2, stride=2)

    # fc1
    t = t.reshape(-1, 12*4*4) # Flatten out the final pooling layer 
    t = self.fc1(t)
    t = F.relu(t)

    # fc2
    t = self.fc2(t)
    t = F.relu(t)

    # output
    t = self.out(t)
    # don't need softmax here since we'll use cross-entropy as activation.

    return t

In [60]:
# Do some hyperparameter tuning
# put all hyper params into a OrderedDict, easily expandable
params = OrderedDict(
    lr = [.01, .005, .001, .0005],
    batch_size = [100, 250, 500, 1000],
    shuffle = [True, False],
    weight_decay = [0, 0.1]
)
epochs = 50

# Read in the hyper-parameters and return a Run namedtuple containing all the 
# combinations of hyper-parameters
class RunBuilder():
  @staticmethod
  def get_runs(params):

    Run = namedtuple('Run', params.keys())

    runs = []
    for v in product(*params.values()):
      runs.append(Run(*v))
    
    return runs

In [98]:
# Helper class, help track loss, accuracy, epoch time, run time, 
# hyper-parameters etc. Also record to TensorBoard and write into csv, json
class RunManager():
    def __init__(self):

        # tracking every epoch count, loss, accuracy, time
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None

        #Testing parameters
        self.epoch_loss_test = 0
        self.epoch_num_correct_test = 0
        self.test_score_time = 0

        # tracking every run count, run data, hyper-params used, time
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        # record model, loader and TensorBoard 
        self.network = None
        self.loader = None
        self.tb = None

    # record the count, hyper-param, model, loader of each run
    # record sample images and network graph to TensorBoard  
    def begin_run(self, run, network, loader):

        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1

        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images)

    # when run ends, close TensorBoard, zero epoch count
    def end_run(self):
        self.save_run()
        self.tb.close()
        self.epoch_count = 0


    def save_run(self):
        path = f"./saved_models/model-{self.run_count}.pth"
        print(f"saving model to {path}")
        torch.save(self.network.state_dict(), path)

    # zero epoch count, loss, accuracy, 
    def begin_epoch(self):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

        #Testing parameters
        self.epoch_loss_test = 0
        self.epoch_num_correct_test = 0
        self.test_score_time = 0    

    # 
    def end_epoch(self):
        # calculate epoch duration and run duration(accumulate)
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        # record epoch loss and accuracy
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)    

        # Record results on Testing parameters
        test_loss = self.epoch_loss_test / 10000
        test_accuracy = self.epoch_num_correct_test / 10000


        # Record epoch loss and accuracy to TensorBoard 
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        self.tb.add_scalar('Test Loss', test_loss, self.epoch_count)
        self.tb.add_scalar('Test Accuracy', test_accuracy, self.epoch_count)

        # Record params to TensorBoard
        for name, param in self.network.named_parameters():
          self.tb.add_histogram(name, param, self.epoch_count)
          self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)

        # Write into 'results' (OrderedDict) for all run related data
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["test_loss"] = test_loss
        results["test_accuracy"] = test_accuracy
        results["test_score_time"] = self.test_score_time / 10000 # time per image to score, in seconds I think
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration

        # Record hyper-params into 'results'
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient = 'columns')

        # display epoch information and show progress
        clear_output(wait=True)
        display(df)

    # accumulate loss of batch into entire epoch loss
    def track_loss(self, loss):
        # multiply batch size so variety of batch sizes can be compared
        self.epoch_loss += loss.item() * self.loader.batch_size

    # accumulate number of corrects of batch into entire epoch num_correct
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)

    # accumulate loss of batch into entire epoch loss
    def track_loss_test(self, loss):
        # multiply batch size so variety of batch sizes can be compared
        self.epoch_loss_test += loss.item() * 10000 # test loaded is a batch of 10000, so known factor. extra param?

    # accumulate number of corrects of batch into entire epoch num_correct
    def track_num_correct_test(self, preds, labels):
        self.epoch_num_correct_test += self._get_num_correct(preds, labels)

    def track_score_time_test(self, start_time, end_time):
        self.test_score_time = end_time - start_time

    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    # save end results of all runs into csv, json for further analysis
    def save(self, fileName):

        pd.DataFrame.from_dict(
            self.run_data, 
            orient = 'columns',
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [62]:
# This code is all from the tutorial, but I can probably improve on it by getting the runs to happen in parallel - test that
m = RunManager()

print(f"Total number of runs: {len(RunBuilder.get_runs(params))}")
print(f"Total number of epochs: {epochs*len(RunBuilder.get_runs(params))}")
print(f"Time required @ 15s / epoch: {15*epochs*len(RunBuilder.get_runs(params))} seconds, {(1/240)*epochs*len(RunBuilder.get_runs(params))} hours")

# get all runs from params using RunBuilder class
for run in RunBuilder.get_runs(params):

    # if params changes, following line of code should reflect the changes too
    network = Network()
    loader = torch.utils.data.DataLoader(train_set, batch_size = run.batch_size)
    optimizer = optim.Adam(network.parameters(), lr=run.lr, weight_decay = run.weight_decay)
    
    test_loader = torch.utils.data.DataLoader(test_set, batch_size = len(test_set))
    
    m.begin_run(run, network, loader)
    for epoch in range(epochs):
      
      m.begin_epoch()
    
      for batch in loader:
        
        images = batch[0]
        labels = batch[1]
        preds = network(images)
        loss = F.cross_entropy(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        m.track_loss(loss)
        m.track_num_correct(preds, labels)
      
      for batch in test_loader:
        test_images = batch[0]
        test_labels = batch[1]
        score_start_time = time.time()
        test_preds = network(test_images)
        score_end_time = time.time()
        test_loss = F.cross_entropy(test_preds, test_labels)
        
        m.track_loss_test(test_loss)
        m.track_num_correct_test(test_preds, test_labels)
        m.track_score_time_test(score_start_time, score_end_time)
        
    
      m.end_epoch()
    m.end_run()

# when all runs are done, save results to files
m.save('results')

Unnamed: 0,run,epoch,loss,accuracy,test_loss,test_accuracy,test_score_time,epoch duration,run duration,lr,batch_size,shuffle,weight_decay
0,1,1,0.571084,0.781250,0.474609,0.8240,0.000035,14.985999,15.112995,0.0100,100,True,0.0
1,1,2,0.384029,0.858733,0.422084,0.8468,0.000034,15.129178,30.306175,0.0100,100,True,0.0
2,1,3,0.357485,0.868433,0.410488,0.8483,0.000034,14.587510,44.956686,0.0100,100,True,0.0
3,1,4,0.345991,0.873317,0.413018,0.8531,0.000034,15.190000,60.214687,0.0100,100,True,0.0
4,1,5,0.328996,0.878567,0.421892,0.8592,0.000035,14.561999,74.835686,0.0100,100,True,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,64,46,2.302600,0.096200,2.302518,0.1000,0.000261,29.868891,974.734261,0.0005,1000,False,0.1
3196,64,47,2.302600,0.096200,2.302518,0.1000,0.000252,29.875001,1004.670263,0.0005,1000,False,0.1
3197,64,48,2.302600,0.096200,2.302518,0.1000,0.000257,29.505000,1034.232261,0.0005,1000,False,0.1
3198,64,49,2.302600,0.096200,2.302518,0.1000,0.000262,29.637001,1063.929262,0.0005,1000,False,0.1


I'm unsure if using this service is necessary since this is a windows machine and not colab, but giving it a go anyways

In [63]:
# Start a subprocess that starts tensorboard, but don't wait for it to exit before exiting
import subprocess

LOG_DIR = './runs'
cmd = f"tensorboard --logdir {LOG_DIR} --host 0.0.0.0 --port 6006 &"
tb_proc = subprocess.Popen(cmd, shell = True)

In [21]:
# tb_proc.kill() # End the Tensorboard process

Now that we've run everything (and can look at it if desired), pick the parameters that result in the lowest test loss and/or highest test accuracy and rerun it for however many epochs it takes to 'max out'. If the loss is still decreasing > 0.01 for a step, double the current max number

In [64]:
# load in the results
result_df = pd.read_json('results.json')
result_df.head(10)

Unnamed: 0,run,epoch,loss,accuracy,test_loss,test_accuracy,test_score_time,epoch duration,run duration,lr,batch_size,shuffle,weight_decay
0,1,1,0.571084,0.78125,0.474609,0.824,3.5e-05,14.985999,15.112995,0.01,100,True,0.0
1,1,2,0.384029,0.858733,0.422084,0.8468,3.4e-05,15.129178,30.306175,0.01,100,True,0.0
2,1,3,0.357485,0.868433,0.410488,0.8483,3.4e-05,14.58751,44.956686,0.01,100,True,0.0
3,1,4,0.345991,0.873317,0.413018,0.8531,3.4e-05,15.19,60.214687,0.01,100,True,0.0
4,1,5,0.328996,0.878567,0.421892,0.8592,3.5e-05,14.561999,74.835686,0.01,100,True,0.0
5,1,6,0.32275,0.879883,0.387615,0.8627,3.9e-05,14.572,89.470686,0.01,100,True,0.0
6,1,7,0.315733,0.882533,0.406133,0.8576,3.5e-05,14.516999,104.052686,0.01,100,True,0.0
7,1,8,0.308738,0.88525,0.368054,0.8733,3.6e-05,14.639998,118.762685,0.01,100,True,0.0
8,1,9,0.302874,0.887483,0.380699,0.8696,3.4e-05,14.466998,133.292686,0.01,100,True,0.0
9,1,10,0.300301,0.888233,0.377599,0.8667,3.4e-05,14.837,148.195685,0.01,100,True,0.0


In [65]:
max_epoch = max(result_df['epoch']) # same for everything so this is safe
trim_df = result_df[result_df['epoch']==max_epoch].reset_index(drop=True)

max_factors = ['accuracy','test_accuracy']
min_factors = ['loss', 'test_loss', 'test_score_time']
best_factors = {}
for factor in max_factors:
    max_val = trim_df[factor].max()
    max_run = trim_df.iloc[trim_df[factor].idxmax()]['run']
    best_factors[factor] = {'run':max_run, 'val':max_val}
    
for factor in min_factors:
    min_val = trim_df[factor].min()
    min_run = trim_df.iloc[trim_df[factor].idxmin()]['run']
    best_factors[factor] = {'run':min_run, 'val':min_val}
best_factors

{'accuracy': {'run': 33, 'val': 0.9449666666666661},
 'test_accuracy': {'run': 41, 'val': 0.8955000000000001},
 'loss': {'run': 33, 'val': 0.14635570105165202},
 'test_loss': {'run': 41, 'val': 0.30875414609909},
 'test_score_time': {'run': 56, 'val': 3.389716148376465e-05}}

Rather than decide what run is best sight-unseen, pick one by hand based on a number of factors:  
- Best training accuracy / loss
- Best test accuracy / loss (generalization)
- Was the loss still decreasing?
- similarity of parameters
This will be used to create a new run that will take the best parameters and save it off to use in the application

In [78]:
# Given a single model run, extract a) parameters of the run and b) the best factors
def best_features(df):
    # Assume some column names here
    params = ['lr', 'batch_size', 'shuffle', 'weight_decay']
    min_factors = ['loss', 'test_loss']
    max_factors = ['accuracy','test_accuracy']
    
    result_dict = OrderedDict()
    
    for param in params:
        result_dict[param] = df[param].min()
    
    for f in min_factors:
        result_dict[f] = df[f].min()
        if df[f].argmin() != df['epoch'].max():
            print(f"{f} regressed during the run!")
            print(f"{df[f].argmin()} - {df['epoch'].max()}")
            
    for f in max_factors:
        result_dict[f] = df[f].max()
        if df[f].argmax() != df['epoch'].max():
            print(f"{f} regressed during the run!")
            print(f"{df[f].argmax()} - {df['epoch'].max()}")
    return result_dict

In [81]:
best_train = 33 # Based on the above
best_train_features = best_features(result_df[result_df['run']==best_train])
best_train_features

loss regressed during the run!
49 - 50
test_loss regressed during the run!
18 - 50
accuracy regressed during the run!
49 - 50
test_accuracy regressed during the run!
15 - 50


OrderedDict([('lr', 0.001),
             ('batch_size', 100),
             ('shuffle', True),
             ('weight_decay', 0.0),
             ('loss', 0.14635570105165202),
             ('test_loss', 0.319693833589553),
             ('accuracy', 0.9449666666666661),
             ('test_accuracy', 0.8869)])

In [83]:
result_df[result_df['run']==best_train]

Unnamed: 0,run,epoch,loss,accuracy,test_loss,test_accuracy,test_score_time,epoch duration,run duration,lr,batch_size,shuffle,weight_decay
1600,33,1,0.799589,0.697,0.628467,0.7633,3.5e-05,14.372286,14.491285,0.001,100,True,0.0
1601,33,2,0.542897,0.7936,0.548165,0.8031,3.5e-05,14.827357,29.400642,0.001,100,True,0.0
1602,33,3,0.468001,0.8295,0.480319,0.8253,3.5e-05,14.84725,44.329893,0.001,100,True,0.0
1603,33,4,0.417899,0.848233,0.432517,0.8421,3.7e-05,14.823349,59.239241,0.001,100,True,0.0
1604,33,5,0.383459,0.860333,0.400061,0.8542,4.4e-05,14.913375,74.236618,0.001,100,True,0.0
1605,33,6,0.358139,0.868333,0.378867,0.8607,3.6e-05,15.018525,89.343145,0.001,100,True,0.0
1606,33,7,0.33956,0.874933,0.369679,0.8635,3.6e-05,14.86696,104.289104,0.001,100,True,0.0
1607,33,8,0.324624,0.88045,0.364067,0.8651,3.7e-05,15.038766,119.411872,0.001,100,True,0.0
1608,33,9,0.312916,0.88475,0.357986,0.8675,3.6e-05,14.934361,134.427235,0.001,100,True,0.0
1609,33,10,0.302602,0.888367,0.353483,0.8699,3.8e-05,15.069094,149.583329,0.001,100,True,0.0


In [82]:
best_test = 41
best_test_features = best_features(result_df[result_df['run']==best_test])
best_test_features

loss regressed during the run!
49 - 50
test_loss regressed during the run!
48 - 50
accuracy regressed during the run!
49 - 50
test_accuracy regressed during the run!
46 - 50


OrderedDict([('lr', 0.001),
             ('batch_size', 500),
             ('shuffle', True),
             ('weight_decay', 0.0),
             ('loss', 0.22064359498520603),
             ('test_loss', 0.30747616291046104),
             ('accuracy', 0.9195333333333331),
             ('test_accuracy', 0.8963000000000001)])

In [84]:
result_df[result_df['run']==best_test]

Unnamed: 0,run,epoch,loss,accuracy,test_loss,test_accuracy,test_score_time,epoch duration,run duration,lr,batch_size,shuffle,weight_decay
2000,41,1,1.156025,0.601367,0.742955,0.7146,3.8e-05,13.410141,13.81614,0.001,500,True,0.0
2001,41,2,0.660079,0.74255,0.643014,0.7553,3.5e-05,13.329875,27.231015,0.001,500,True,0.0
2002,41,3,0.585665,0.773633,0.572798,0.7842,3.5e-05,13.349674,40.66369,0.001,500,True,0.0
2003,41,4,0.536976,0.796617,0.527564,0.8059,3.5e-05,13.358806,54.108496,0.001,500,True,0.0
2004,41,5,0.497938,0.816367,0.491347,0.8214,3.7e-05,13.350804,67.544299,0.001,500,True,0.0
2005,41,6,0.465214,0.8315,0.464123,0.8324,3.5e-05,13.32779,80.955091,0.001,500,True,0.0
2006,41,7,0.438088,0.842317,0.440701,0.8426,3.6e-05,13.546607,94.583702,0.001,500,True,0.0
2007,41,8,0.416723,0.849733,0.422666,0.8491,3.5e-05,13.391113,108.058814,0.001,500,True,0.0
2008,41,9,0.400208,0.85635,0.40926,0.855,3.6e-05,13.469896,121.613763,0.001,500,True,0.0
2009,41,10,0.387057,0.86075,0.397019,0.8601,4.4e-05,13.495353,135.190118,0.001,500,True,0.0


### Model parameter testing conclusions:
- Lower learning rate is better overall
- Smaller batch is better, but can be improved
- Need to make sure that the training set isn't becoming overfit on small batches
- Improvements in train accuracy != improvements in test accuracy and generalization  

### Next steps: Take the previous learning and apply similar training conditions to a longer epoch

In [102]:
# Do some hyperparameter tuning
# put all hyper params into a OrderedDict, easily expandable
params = OrderedDict(
    lr = [ .001],
    batch_size = [100, 250, 500],
    shuffle = [True],
    weight_decay = [0]
)
epochs = 100


In [103]:
# This code is all from the tutorial, but I can probably improve on it by getting the runs to happen in parallel - test that
m = RunManager()

print(f"Total number of runs: {len(RunBuilder.get_runs(params))}")
print(f"Total number of epochs: {epochs*len(RunBuilder.get_runs(params))}")
print(f"Time required @ 15s / epoch: {15*epochs*len(RunBuilder.get_runs(params))} seconds, {(1/240)*epochs*len(RunBuilder.get_runs(params))} hours")

# get all runs from params using RunBuilder class
for run in RunBuilder.get_runs(params):

    # if params changes, following line of code should reflect the changes too
    network = Network()
    loader = torch.utils.data.DataLoader(train_set, batch_size = run.batch_size)
    optimizer = optim.Adam(network.parameters(), lr=run.lr, weight_decay = run.weight_decay)
    
    test_loader = torch.utils.data.DataLoader(test_set, batch_size = len(test_set))
    
    m.begin_run(run, network, loader)
    for epoch in range(epochs):
      
      m.begin_epoch()
    
      for batch in loader:
        
        images = batch[0]
        labels = batch[1]
        preds = network(images)
        loss = F.cross_entropy(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        m.track_loss(loss)
        m.track_num_correct(preds, labels)
      
      for batch in test_loader:
        test_images = batch[0]
        test_labels = batch[1]
        score_start_time = time.time()
        test_preds = network(test_images)
        score_end_time = time.time()
        test_loss = F.cross_entropy(test_preds, test_labels)
        
        m.track_loss_test(test_loss)
        m.track_num_correct_test(test_preds, test_labels)
        m.track_score_time_test(score_start_time, score_end_time)
        
    
      m.end_epoch()
    m.end_run()

# when all runs are done, save results to files
m.save('results_final')

Unnamed: 0,run,epoch,loss,accuracy,test_loss,test_accuracy,test_score_time,epoch duration,run duration,lr,batch_size,shuffle,weight_decay
0,1,1,0.790605,0.697617,0.606031,0.7704,0.000045,17.145000,17.284993,0.001,100,True,0
1,1,2,0.518677,0.805767,0.510038,0.8094,0.000037,17.385999,34.732993,0.001,100,True,0
2,1,3,0.452223,0.833617,0.466107,0.8270,0.000039,16.850999,51.641993,0.001,100,True,0
3,1,4,0.411550,0.849233,0.420051,0.8433,0.000038,16.514999,68.219992,0.001,100,True,0
4,1,5,0.380166,0.860017,0.391371,0.8569,0.000039,16.981999,85.262993,0.001,100,True,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,3,96,0.160363,0.940683,0.349402,0.8921,0.000053,17.901916,1512.258783,0.001,500,True,0
296,3,97,0.159678,0.941350,0.345761,0.8921,0.000056,19.274727,1531.608511,0.001,500,True,0
297,3,98,0.158716,0.942250,0.347496,0.8922,0.000070,18.176013,1549.881505,0.001,500,True,0
298,3,99,0.158371,0.941917,0.346041,0.8924,0.000048,17.276356,1567.237863,0.001,500,True,0


saving model to ./saved_models/model-3.pth


In [104]:
final_df = pd.read_json('results_final.json')
final_df[final_df['epoch']==100]

Unnamed: 0,run,epoch,loss,accuracy,test_loss,test_accuracy,test_score_time,epoch duration,run duration,lr,batch_size,shuffle,weight_decay
99,1,100,0.08584,0.966467,0.728042,0.879,3.7e-05,16.819,1757.332458,0.001,100,True,0
199,2,100,0.116327,0.955617,0.475596,0.8833,4.2e-05,19.122095,1507.775643,0.001,250,True,0
299,3,100,0.159123,0.940817,0.347214,0.892,5.7e-05,16.8202,1584.144063,0.001,500,True,0


TEsting the load - this will actually be in the ap after this

In [105]:
model = Network()
model.load_state_dict(torch.load('saved_models/model-1.pth'))
model.eval()

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [107]:
print(Network())

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)
