In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

import os 

import matplotlib.pyplot as plt


In [2]:
#NameFile
class NameFile():
    @staticmethod#static? iets met dat je m kan callen using the class itself. don't need an instance of the class, to call the method. i guess dat je m dus niet eerst hoeft te initieren alszijnde type x? 
    def nameit(params): #ga er dus vanuit dat je alleen keys and values hebt, dus alleen 0 en 1 voor die ene index. wellicht gaat dit dus ooit mis, makkelijk te herstellen

        items_hier = list(params.items()) #keys+values
        num_k=0 #amount of keys present
        comment = '' #the string for the filename
        
        #make the comment by looping over keys and values
        for k in params.keys():
            comment += f'{items_hier[num_k][0]}=' #add the key
            for v in items_hier[num_k][1]:
                comment +=(f'{v}_') #add the values
            num_k+=1 #for indexing next loop to get next keys/values in list
        comment+='results'
        return comment

#RunBuilder
class RunBuilder():
    @staticmethod#static? iets met dat je m kan callen using the class itself. don't need an instance of the class, to call the method. i guess dat je m dus niet eerst hoeft te initieren alszijnde type x? 
    def get_runs(params):
        
        Run = namedtuple('Run',params.keys())#die ordereddicttionary heeft keys and values. dit heb je wel eens eerder gezien I guess.
                                             #blijkbaar maakt ie een mooie string als je die keys zo oproept. test dat even.
        
        runs = []
        for v in product(*params.values()): #dit doet dus iets dat ie per value combo nieuwe iteratie doet
            runs.append(Run(*v))
            
        return runs

#RunManager
class RunManager():
    def __init__(self):
        
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None

    def begin_run(self,run,network,loader): #die self is dus gewoon de variabel naam links van de streep
        #start time for a run, parameters run added, run_count+1 (stays same for all epochs)
        #network copied, loader copied, name given in tb. 
        self.run_start_time = time.time()
        
        self.run_params = run
        self.run_count += 1
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')
        
        images,labels = next(iter(self.loader)) #misschien wel gewoon plaatjes inladen voor foto'tje in tensorboard
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images',grid)
        self.tb.add_graph(
            self.network
            ,images.to(getattr(run,'device','cpu')))
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0

    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch(self):
        
        epoch_duration = time.time()-self.epoch_start_time
        run_duration = time.time()-self.run_start_time
        
        loss = self.epoch_loss/len(self.loader.dataset)
        accuracy = self.epoch_num_correct/len(self.loader.dataset)
        
        self.tb.add_scalar('Loss',loss,self.epoch_count)
        self.tb.add_scalar('Accuracy',accuracy,self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name,param,self.epoch_count)
            self.tb.add_histogram(f'{name}.grad',param.grad,self.epoch_count)
            
        results = OrderedDict()
        results["run"]=self.run_count
        results["epoch"]=self.epoch_count
        results["loss"]=loss
        results["accuracy"]=accuracy
        results["epoch duration"]=epoch_duration
        results["run duration"]=run_duration
                            
        for k,v in self.run_params._asdict().items():  #deze komen uit run, je batch_size & lr
            results[k] = v #geloof dat je hier dus voor elke run met andere batch size etc. maar 1 lr en batchsize toevoegt, vandaar dat dit niet in de loop zit
        self.run_data.append(results) #1 batch_size en lr bij de results bij, en vervolgens voeg je al je results toe aan wat je metadata i guess
        df = pd.DataFrame.from_dict(self.run_data,orient='columns') #dit zorgt dat het in een leuk tabelletje staat
                                
        clear_output(wait=True)
        display(df)
                                
    def track_loss(self,loss):
            self.epoch_loss += loss.item()* self.loader.batch_size
                                    
    def track_num_correct(self,preds,labels):
            self.epoch_num_correct += self._get_num_correct(preds,labels)
        
    @torch.no_grad()
    def _get_num_correct(self,preds,labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self,fileName,ResDir,file_num):
        
        os.mkdir(f'{ResDir}\{file_num}')
        
        pd.DataFrame.from_dict(
        self.run_data
        ,orient = 'columns'
        ).to_csv(f'{ResDir}\{file_num}\{fileName}.csv')
        
        with open(f'{ResDir}\{file_num}\{fileName}.json','w',encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [3]:
torch.manual_seed(50)
network1 = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    ,nn.ReLU()
    ,nn.MaxPool2d(kernel_size=2,stride=2)
    ,nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    ,nn.ReLU()
    ,nn.MaxPool2d(kernel_size=2,stride=2)
    ,nn.Flatten(start_dim=1)
    ,nn.Linear(in_features=12*4*4,out_features=120)
    ,nn.ReLU()
    ,nn.Linear(in_features=120,out_features=60)
    ,nn.ReLU()
    ,nn.Linear(in_features=60,out_features=10)   
)

In [4]:
torch.manual_seed(50)
network2 = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    ,nn.ReLU()
    ,nn.MaxPool2d(kernel_size=2,stride=2)
    ,nn.BatchNorm2d(6)
    ,nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    ,nn.ReLU()
    ,nn.MaxPool2d(kernel_size=2,stride=2)
    ,nn.Flatten(start_dim=1)
    ,nn.Linear(in_features=12*4*4,out_features=120)
    ,nn.ReLU()
    ,nn.BatchNorm1d(120)
    ,nn.Linear(in_features=120,out_features=60)
    ,nn.ReLU()
    ,nn.Linear(in_features=60,out_features=10)   
)

In [5]:
train_set = torchvision.datasets.FashionMNIST(
root='./data'
    ,download=True
    ,train=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)


In [6]:
loader = DataLoader(train_set,batch_size=len(train_set),num_workers=1)
data=next(iter(loader))
mean = data[0].mean()
std = data[0].std()
mean,std

(tensor(0.2861), tensor(0.3530))

In [7]:
train_set_normal = torchvision.datasets.FashionMNIST(
root='./data'
    ,download=True
    ,train=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
        ,transforms.Normalize(mean,std)
    ])
)


In [8]:
trainsets = {
    'not_normal': train_set
    ,'normal':train_set_normal
}

In [9]:
networks = {
    'no_batch_norm': network1
    ,'batch_norm':network2
}

In [10]:
params = OrderedDict(
    lr = [.01]
    ,batch_size = [1000]
    ,num_workers = [1]
    ,device = ['cuda']
    ,trainset = list(trainsets.keys())
    ,network=list(networks.keys())
)

ResDir = 'runs_results'
file_num = '1308_1439'
m=RunManager()

for run in RunBuilder.get_runs(params):
    
    device=torch.device(run.device)
    network = networks[run.network].to(device)
    loader = DataLoader(trainsets[run.trainset],batch_size=run.batch_size,num_workers=run.num_workers)
    optimiser = optim.Adam(network.parameters(),lr=run.lr)
    
    m.begin_run(run,network,loader)
    for epoch in range(20):
        m.begin_epoch()
        for batch in loader:
            
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images)
            loss = F.cross_entropy(preds,labels)
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds,labels)
        m.end_epoch()
    m.end_run()
m.save(NameFile.nameit(params),ResDir,file_num) #mooie naam voor de results file met alle parameter values erin

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset,network
0,1,1,1.010034,0.606967,7.325416,13.160287,0.01,1000,1,cuda,not_normal,no_batch_norm
1,1,2,0.545249,0.788833,7.605963,20.910635,0.01,1000,1,cuda,not_normal,no_batch_norm
2,1,3,0.465293,0.828217,6.844383,27.916469,0.01,1000,1,cuda,not_normal,no_batch_norm
3,1,4,0.411397,0.849217,7.194037,35.216199,0.01,1000,1,cuda,not_normal,no_batch_norm
4,1,5,0.374156,0.862267,7.476776,42.786971,0.01,1000,1,cuda,not_normal,no_batch_norm
...,...,...,...,...,...,...,...,...,...,...,...,...
75,4,16,0.125689,0.952133,9.969344,165.065828,0.01,1000,1,cuda,normal,batch_norm
76,4,17,0.120762,0.952700,10.230132,175.442173,0.01,1000,1,cuda,normal,batch_norm
77,4,18,0.115337,0.955033,9.984033,185.624281,0.01,1000,1,cuda,normal,batch_norm
78,4,19,0.115360,0.955300,9.918997,195.681020,0.01,1000,1,cuda,normal,batch_norm


In [17]:
df = pd.DataFrame.from_dict(m.run_data,orient='columns').sort_values('accuracy',ascending=False) #dit zorgt dat het in een leuk tabelletje staat
pd.set_option('display.max_rows',None)
df

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset,network
79,4,20,0.110408,0.957317,10.035356,205.923431,0.01,1000,1,cuda,normal,batch_norm
78,4,19,0.11536,0.9553,9.918997,195.68102,0.01,1000,1,cuda,normal,batch_norm
77,4,18,0.115337,0.955033,9.984033,185.624281,0.01,1000,1,cuda,normal,batch_norm
76,4,17,0.120762,0.9527,10.230132,175.442173,0.01,1000,1,cuda,normal,batch_norm
75,4,16,0.125689,0.952133,9.969344,165.065828,0.01,1000,1,cuda,normal,batch_norm
74,4,15,0.137869,0.948017,10.467396,154.903531,0.01,1000,1,cuda,normal,batch_norm
73,4,14,0.139621,0.9462,9.497421,144.227819,0.01,1000,1,cuda,normal,batch_norm
72,4,13,0.145431,0.944733,10.374502,134.604298,0.01,1000,1,cuda,normal,batch_norm
71,4,12,0.146747,0.943683,10.059436,124.028419,0.01,1000,1,cuda,normal,batch_norm
70,4,11,0.15568,0.940317,9.997989,113.749509,0.01,1000,1,cuda,normal,batch_norm
