In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter

In [2]:
def get_correct_num(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [23]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
    
    def forward(self, t):
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        
        return t
        

In [4]:
train_data = torchvision.datasets.FashionMNIST(
    root='.\data',
    train=True,
    download=True,
    transform=transforms.Compose([transforms.ToTensor()]))

In [5]:
data_loader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True)

### Starting out with TensorBoard (Network Graph and Images)

In [6]:
tb = SummaryWriter()

network = Network()
images, labels = next(iter(data_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

In [12]:
batch_size_list = [100, 1000, 10000]
lr_list = [0.01, 0.001, 0.0001, 0.00001]

for batch_size in batch_size_list:
    for lr in lr_list:
        
        network = Network()
        
        data_loader = torch.utils.data.DataLoader(
            train_data, batch_size=batch_size
        )
        
        optimizer = optim.Adam(network.parameters(), lr=lr)

        images, labels = next(iter(data_loader))
        grid = torchvision.utils.make_grid(images)

        comment = f'batch_size={batch_size} lr={lr}'
        tb = SummaryWriter(comment=comment)
        tb.add_image('image', grid)
        tb.add_graph(network, images)

        for epoch in range(10):

            total_loss = 0
            total_correct = 0

            for batch in data_loader:
                images, labels = batch

                preds = network(images)
                loss = F.cross_entropy(preds, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                total_loss += loss.item() * batch_size
                total_correct += get_correct_num(preds, labels)

            tb.add_scalar('Loss', total_loss, epoch)
            tb.add_scalar('Number Correct', total_correct, epoch)
            tb.add_scalar('Accuracy', total_correct / len(train_data), epoch)

            tb.add_histogram('conv.bias', network.conv1.bias, epoch)
            tb.add_histogram('conv.weight', network.conv1.weight, epoch)
            tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

            for name, weight in network.named_paramters():
                tb.add_histogram(name, weight, epoch)
                tb.add_histogram(f'{name}.grad', weight.grad, epoch)

            print('epoch:', epoch, 'total_correct:', total_correct, 'loss:', total_loss)

epoch: 0 total_correct: 41888 loss: 484.102543592453
epoch: 1 total_correct: 47954 loss: 320.9751943349838
epoch: 2 total_correct: 50107 loss: 273.5793734937906
epoch: 3 total_correct: 51023 loss: 248.29755449295044
epoch: 4 total_correct: 51774 loss: 228.58574455976486
epoch: 5 total_correct: 52053 loss: 216.77543891966343
epoch: 6 total_correct: 52448 loss: 206.3550055772066
epoch: 7 total_correct: 52739 loss: 197.25840355455875
epoch: 8 total_correct: 52965 loss: 190.6756676286459
epoch: 9 total_correct: 53191 loss: 184.59308277070522


In [13]:
for name, weight in network.named_parameters():
    print(f'{name}.grad', weight.grad.shape)

conv1.weight.grad torch.Size([6, 1, 5, 5])
conv1.bias.grad torch.Size([6])
conv2.weight.grad torch.Size([12, 6, 5, 5])
conv2.bias.grad torch.Size([12])
fc1.weight.grad torch.Size([120, 192])
fc1.bias.grad torch.Size([120])
fc2.weight.grad torch.Size([60, 120])
fc2.bias.grad torch.Size([60])
out.weight.grad torch.Size([10, 60])
out.bias.grad torch.Size([10])


In [14]:
from itertools import product

In [15]:
parameters = dict(
    lr = [0.01, 0.001],
    batch_size = [10, 100, 1000],
    shuffle = [True, False]
)

In [17]:
param_values = [v for v in parameters.values()]
param_values

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [18]:
for lr, batch_size, shuffle in product(*param_values):
    print(lr, batch_size, shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [24]:
for lr, batch_size, shuffle in product(*param_values):
    network = Network()

    data_loader = torch.utils.data.DataLoader(
            train_data, batch_size=batch_size
    )

    optimizer = optim.Adam(network.parameters(), lr=lr)

    images, labels = next(iter(data_loader))
    grid = torchvision.utils.make_grid(images)

    comment = f'batch_size={batch_size} lr={lr} shuffle={shuffle}'
    tb = SummaryWriter(comment=comment)
    tb.add_image('image', grid)
    tb.add_graph(network, images)
    
    for epoch in range(10):

        total_loss = 0
        total_correct = 0

        for batch in data_loader:
            images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * batch_size
            total_correct += get_correct_num(preds, labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_data), epoch)

        tb.add_histogram('conv.bias', network.conv1.bias, epoch)
        tb.add_histogram('conv.weight', network.conv1.weight, epoch)
        tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

        for name, weight in network.named_paramters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)

        print('epoch:', epoch, 'total_correct:', total_correct, 'loss:', total_loss)

KeyboardInterrupt: 