In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F # for activation functions. On the other hand, for adding act. func. we can use torch.nn as well.
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
import torchvision

In [18]:
class CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1)) # values has been chosen arbitrarily.
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.fc1 = nn.Linear(16*7*7, num_classes) # 16 => output channel of conv2 layer, 7*7 means after two pooling layer with 2x2 kernel, 28x28 input will be decrease to 7x7.

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)

        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)

        return x

def save_checkpoint(state, filename="./my_checkpoint.pt"):
    print("Saving Checkpoint")
    torch.save(state, filename)

def load_checkpoint(checkpoint):
    print("Loading Checkpoint")
    print(checkpoint.keys())
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])


model = CNN()
x = torch.rand(64,1,28,28)
print(model(x).shape)

torch.Size([64, 10])


In [19]:
# device setup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [20]:
# hyperparameters

in_channel = 1
num_classes = 10
"""
learning_rate = 0.001
batch_size = 64
"""
num_epochs = 1
load_model = False

In [29]:
# LOAD DATA

train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True) # ToTensor() converts original data that loaded from dataset library as np.array, to tensor. 
"""
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) # Shuffle makes us sure about do not have same images on batches on different epochs.
"""
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [31]:
# INIT MODEl
"""
model = CNN().to(device) # send to GPU if you have CUDA.  
"""
criterion = nn.CrossEntropyLoss()
"""
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
writer = SummaryWriter(f"runs/CIFAR10_tensorboard")
"""

batch_sizes = [128, 1024]
learning_rates = [0.001, 0.0001]
classes = ["0","1","2","3","4","5","6","7","8","9"]

if load_model:
    load_checkpoint(torch.load('my_checkpoint.pt'))

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        model = CNN().to(device)
        model.train()
        step = 0
        writer = SummaryWriter(f"runs/MNIST/MiniBatchSize {batch_size}, LR {learning_rate}")
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) 
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        for epoch in range(num_epochs):
            losses = []
            accuracies = []
            if epoch % 3 == 0:
                checkpoint = {'state_dict': model.state_dict(), 'optimizer':optimizer.state_dict()} # state dict is a python object that stores model's ( it can store optimizers parameters as well) parameters like conv1-bias, weight; conv2-bias weight and fc-bias weights etc.
                save_checkpoint(checkpoint)

            for batch_idx, (data, targets) in enumerate(train_loader): # data = img tensor, targets =                                                                           labels tensor in related batch.
                data = data.to(device=device)
                targets = targets.to(device=device)

                # forward
                scores = model(data)
                loss = criterion(scores, targets)
                losses.append(loss.item())
                
                features = data.reshape(data.shape[0], -1)
                class_labels = [classes[label] for label in targets]
                img_grid = torchvision.utils.make_grid(data)
                writer.add_image('mnist-image', img_grid) # images during training process
                writer.add_histogram('fc1', model.fc1.weight)
                _, predictions = scores.max(1)
                num_correct = (predictions == targets).sum()
                running_train_acc = float(num_correct) / float(data.shape[0])
                accuracies.append(running_train_acc)

                # backward
                optimizer.zero_grad() # set gradient putput to zero for re-calculate gradient in every loop. We DO NOT want get interacted by previous gradient value.
                loss.backward() # compute gradient for every parameter

                # gradient descent or adam step 
                optimizer.step() # performs parameter update based on current computed gradient.
                writer.add_scalar("Trainin Loss :", loss, global_step=step) 
                writer.add_scalar("Trainin ACC :", running_train_acc, global_step=step)
                writer.add_hparams({'lr':learning_rate, 'bsize':batch_size},
                                   {'accuracy':sum(accuracies)/len(accuracies),
                                    'loss':sum(losses)/len(losses)})  
                if batch_idx == 460: # 60000 (all images in dataset) / 128 (batch size)
                    writer.add_embedding(features, metadata=class_labels, label_img=data,                                             global_step=batch_idx)

                # add_scalar provives graphical comparision tool, add_hparams provides checkbox-stuff comparision tool.
                step += 1


            mean_loss = sum(losses) / len(losses) # avarage of losses list.
            print(f"Loss at epoch {epoch} was {mean_loss:.5f}")

Saving Checkpoint
Loss at epoch 0 was 0.44953
Saving Checkpoint
Loss at epoch 0 was 1.54886
Saving Checkpoint
Loss at epoch 0 was 1.57180
Saving Checkpoint
Loss at epoch 0 was 2.24985


In [8]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking acc on training data")
    else:
        print("Checking acc on test data")

    num_correct = 0
    num_samples = 0
    model.eval() # switch to evaluating ( inference ) mode

    with torch.no_grad(): # this context manager guarantees us that we are NOT CALCULATING gradient during this process.
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)  # scores output shape = 64x10
            value, predictions = scores.max(1) # we take max value of second dimension for getting which number have been predicted by NN.
            num_correct += (predictions == y).sum()  
            num_samples += predictions.size(0)   
        print(f"Got{num_correct} / {num_samples} with acc, {float(num_correct)/float(num_samples)*100:.2f}") # 95.78 etc.
    
    model.train()
    
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking acc on training data
Got59639 / 60000 with acc, 99.40
Checking acc on test data
Got9870 / 10000 with acc, 98.70
