In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter

print(torch.__version__)
print(torchvision.__version__)

import tensorflow
print(tensorflow.__version__)

1.5.1
0.6.1
2.3.0


In [2]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [3]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)

        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t,dim=1)

        return t

In [4]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data/FashionMNIST', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()
                                                                                         ])
)

In [5]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100,shuffle=True)
optimiser = optim.Adam(network.parameters(),lr=0.01)

images,labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images',grid)
tb.add_graph(network,images)

for epoch in range(10):
    
    total_loss=0
    total_correct=0
    
    for batch in train_loader:
        images, labels = batch
        
        preds = network(images)
        loss = F.cross_entropy(preds,labels)
        
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        
        total_loss+=loss.item()
        total_correct+=get_num_correct(preds,labels)
        

    tb.add_scalar('Loss',total_loss,epoch)
    tb.add_scalar('Number Correct',total_correct,epoch)
    tb.add_scalar('Accuracy',total_correct/len(train_set),epoch)

    for name, weight in network.named_parameters():
        tb.add_histogram(name,weight,epoch)
        tb.add_histogram(f'{name}.grad',weight.grad,epoch)
        

    print("epoch",epoch,"total_correct:",total_correct,"loss:",total_loss )

tb.close()

epoch 0 total_correct: 47510 loss: 329.6513590812683
epoch 1 total_correct: 51515 loss: 227.80737529695034
epoch 2 total_correct: 52111 loss: 210.97554579377174
epoch 3 total_correct: 52678 loss: 198.9040331840515
epoch 4 total_correct: 52815 loss: 194.06669433414936
epoch 5 total_correct: 52958 loss: 190.5855732858181
epoch 6 total_correct: 52999 loss: 188.99202919006348
epoch 7 total_correct: 53246 loss: 182.23907610028982
epoch 8 total_correct: 53294 loss: 181.2956200018525
epoch 9 total_correct: 53278 loss: 181.30406533926725


In [6]:
 for name, weight in network.named_parameters():
    print(name,weight.shape)     #grad kan niet direct achter name, omdat name hier de string is, niet meer de parameter. bij weight nog wel, dus gaat daar goed.

conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([12, 6, 5, 5])
conv2.bias torch.Size([12])
fc1.weight torch.Size([120, 192])
fc1.bias torch.Size([120])
fc2.weight torch.Size([60, 120])
fc2.bias torch.Size([60])
out.weight torch.Size([10, 60])
out.bias torch.Size([10])


In [7]:
for name, weight in network.named_parameters():
    print(f'{name}.grad',weight.grad.shape)

conv1.weight.grad torch.Size([6, 1, 5, 5])
conv1.bias.grad torch.Size([6])
conv2.weight.grad torch.Size([12, 6, 5, 5])
conv2.bias.grad torch.Size([12])
fc1.weight.grad torch.Size([120, 192])
fc1.bias.grad torch.Size([120])
fc2.weight.grad torch.Size([60, 120])
fc2.bias.grad torch.Size([60])
out.weight.grad torch.Size([10, 60])
out.bias.grad torch.Size([10])


In [8]:
#we resetten gewoon vrolijk
batch_size_list = [100, 1000, 10000] #we zien dat hoe groter batch size, hoe slechter de guess
lr_list = [0.01, .001, .0001, .00001] #we zien dat ook kleinste lr 't beste werkt. we skippen nog niet 'genoeg iig' de optima

for batch_size in batch_size_list:#dit werkt dus zonder te initieren, hij begrijpt zelf wat ie moet zijn. zelfde voor lr
    for lr in lr_list:
        network = Network() #reset dus elke keer ook de weights op deze manier denk ik
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
        optimiser = optim.Adam(network.parameters(),lr=lr)

        images,labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images)

        comment = f' batch_size={batch_size} lr ={lr}'#geloof dat je dit alles kunt noemen, parameters zelfde naam geven is super stupide, les 1
        tb = SummaryWriter(comment=comment)
        tb.add_image('images',grid)
        tb.add_graph(network,images)
        
        for epoch in range(2):
    
            total_loss=0
            total_loss_b=0
            total_correct=0
    
            for batch in train_loader:
                images, labels = batch
        
                preds = network(images)
                loss = F.cross_entropy(preds,labels)
            
                optimiser.zero_grad()
                loss.backward() #zoals je ziet wordt er al in epoch 1 geoptimised. dus de initial guess wordt wel zeker aangetast door de learning rate
                optimiser.step()
        
                total_loss+=loss.item()*batch_size #loss is average. to be able to compare the total if we worked with different batch sizes in different, we have to make average -> total
                total_correct+=get_num_correct(preds,labels)
        

            tb.add_scalar('Loss',total_loss,epoch)
            tb.add_scalar('Number Correct',total_correct,epoch)
            tb.add_scalar('Accuracy',total_correct/len(train_set),epoch)

            for name, weight in network.named_parameters():
                tb.add_histogram(name,weight,epoch)
                tb.add_histogram(f'{name}.grad',weight.grad,epoch)
        

            print("batch size",batch_size,"lr",lr,"epoch",epoch,"total_correct:",total_correct,"loss:",total_loss )
    
    tb.close()

batch size 100 lr 0.01 epoch 0 total_correct: 47030 loss: 33979.10651564598
batch size 100 lr 0.01 epoch 1 total_correct: 51170 loss: 23542.933164536953
batch size 100 lr 0.001 epoch 0 total_correct: 41441 loss: 49015.39563238621
batch size 100 lr 0.001 epoch 1 total_correct: 47837 loss: 32082.994809746742
batch size 100 lr 0.0001 epoch 0 total_correct: 30387 loss: 84251.64348483086
batch size 100 lr 0.0001 epoch 1 total_correct: 42173 loss: 47428.92200946808
batch size 100 lr 1e-05 epoch 0 total_correct: 6381 loss: 137812.89863586426
batch size 100 lr 1e-05 epoch 1 total_correct: 19705 loss: 131932.2633743286
batch size 1000 lr 0.01 epoch 0 total_correct: 34338 loss: 66970.43001651764
batch size 1000 lr 0.01 epoch 1 total_correct: 46293 loss: 35105.012238025665
batch size 1000 lr 0.001 epoch 0 total_correct: 26925 loss: 94913.51974010468
batch size 1000 lr 0.001 epoch 1 total_correct: 41615 loss: 48108.62183570862
batch size 1000 lr 0.0001 epoch 0 total_correct: 9892 loss: 137229.3114

In [9]:
from itertools import product


In [10]:
parameters = dict(
    lr= [0.01, 0.001]
    ,batch_size = [10, 100, 1000]
    ,shuffel = [True, False]
)

In [11]:
param_values = [v for v in parameters.values()]   #return value list v for each v in the parameter dictionary. basically sort of creating a tensor where concatenating different v's after one another? 
param_values #so lr/batch_size/shuffle are al v's


[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [12]:
parameters

{'lr': [0.01, 0.001], 'batch_size': [10, 100, 1000], 'shuffel': [True, False]}

In [13]:
#param = [v in parameters.values()] #dit werkt dus niet

In [14]:
for lr, batch_size, shuffle in product(*param_values): #star here tells product to treat each value in the list as argument, instead of treating the list itself as the argument
    print(lr,batch_size,shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [15]:
for lr in (param_values):
    print(lr)

[0.01, 0.001]
[10, 100, 1000]
[True, False]


In [16]:
for lr, batch_size, shuffle in product(*param_values): #star here tells product to treat each value in the list as argument, instead of treating the list itself as the argument
    comment_smart = f'lr{lr},batch_size{batch_size},shuffle {shuffle}''

SyntaxError: unexpected EOF while parsing (<ipython-input-16-21bedce64674>, line 1)