# SLP and MLP in Pytorch



## Import Necessary modules

In [4]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn                              # neural network module
import torch.nn.functional as F
import torch.optim as optim                        # optimization module
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import Dataset

from torch.utils.tensorboard import SummaryWriter  # logging module
from torchvision.utils import make_grid, save_image

import numpy as np
import time

In [5]:
print(torch.__version__)

1.10.0+cu102


## Custom Dataset loader

In [6]:
class CustomDataset(Dataset):
    """ Custom dataset for flattened 10x10 csv dataset """

    # Initialize data
    def __init__(self, fname, transform=None):
        self.xy = np.genfromtxt(fname, delimiter=',', skip_header=1, dtype=np.uint8)
        self.transform = transform

    def __getitem__(self, index):
        x = self.xy[index, 1:].reshape(10,10,1) # H W C
        y = self.xy[index, 0]
        y = torch.as_tensor(y, dtype=torch.long)
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return self.xy.shape[0]

## Parameters and Hyperparameters

In [7]:
# torch parameters
SEED = 60            # reproducability
# NN Parameters
EPOCHS = 200         # number of epochs
LR = 0.01            # learning rate
MOMENTUM = 0.9       # momentum for the SGD optimizer (how much of the past gradients)
GAMMA = 0.1          # learning rate scheduler (how much to decrease learning rate)
BATCH_SIZE = 64      # number of images to load per iteration

In [8]:
# manual seed to reproduce the same results
torch.manual_seed(SEED)

<torch._C.Generator at 0x27d4e8ab5b0>

In [9]:
# transform input data type from ndarray to tensor values between 0,1
transform = transforms.Compose([
    transforms.ToTensor(), 
])

In [10]:
# read the datasets
tr_dataset   = CustomDataset('data/training.csv', transform=transform)
# prepare loader for the training dataset
train_loader = torch.utils.data.DataLoader(tr_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

# download the dataset if not already downloaded and set necessery transforms
test_dataset = CustomDataset('data/testing.csv', transform=transform)
# prepare loader for the test dataset
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [79]:
w = 3
kernel_size = 2
stride =  4
padding = 0
nextw = (( w - kernel_size  + padding * 2) // stride) + 1
print(nextw) 

1


## Define the Network

In [80]:
class Network1(nn.Module):
    
    # [ ( W - kernel_size + 2 * padding ) / stride ] + 1
    def __init__(self):
        super(Network1, self).__init__()
        self.conv1 = nn.Conv2d( in_channels = 1,
                                out_channels= 16,
                                kernel_size = 2,
                                stride = 4,
                                padding = 0 )
        self.conv2 = nn.Conv2d( in_channels = 16,
                                out_channels= 32,
                                kernel_size = 2,
                                stride = 4,
                                padding = 0 ) 
        
        self.fcl    = nn.Linear(32*1*1, 10)

    def forward(self,x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.flatten(x, 1)
        x = self.fcl(x)
        return torch.log_softmax(x, dim=1)

class Network2(nn.Module):
    def __init__(self):
        super(Network2, self).__init__()
        # [( W - kernel_size + 2*padding ) / stride] + 1
        self.conv1 = nn.Conv2d( in_channels = 1,
                                out_channels= 16,
                                kernel_size = 3,
                                stride = 2,
                                padding = 3 )
       # [( 10 - 3 + 2*2 ) / 2] + 1 = 6
        self.fcl    = nn.Linear(16*7*7, 10)

    def forward(self,x):
        x = torch.relu(self.conv1(x))
        x = torch.flatten(x, 1)
        x = self.fcl(x)
        return torch.log_softmax(x, dim=1)

class Network3(nn.Module):
    def __init__(self):
        super(Network3, self).__init__()
        # [( W - kernel_size + 2*padding ) / stride] + 1
        self.relu = nn.LeakyReLU(0.2, inplace=True)
        self.conv1 = nn.Conv2d( in_channels = 1,
                                out_channels= 16,
                                kernel_size = 2,
                                stride = 4,
                                padding = 0 )
       # [( 10 - 3 + 2*2 ) / 2] + 1 = 6
        self.fcl    = nn.Linear(16*3*3, 10)

    def forward(self,x):
        x = self.relu(self.conv1(x))
        x = torch.flatten(x, 1)
        x = self.fcl(x)
        return torch.log_softmax(x, dim=1)
        

## Create a network instance and move it to the device you want to run computations on

In [81]:
# create the network
net = Network3()

# print network parameter names and their size
for name, param in net.named_parameters():
  print(name, param.size())

# check if CUDA is available
cuda = torch.cuda.is_available()  
device = torch.device("cuda:0" if cuda else "cpu")
if cuda:
    print("CUDA is available")

# if cuda is available move the network to gpu
net.to(device)


conv1.weight torch.Size([16, 1, 2, 2])
conv1.bias torch.Size([16])
conv2.weight torch.Size([32, 16, 2, 2])
conv2.bias torch.Size([32])
fcl.weight torch.Size([10, 32])
fcl.bias torch.Size([10])
CUDA is available


Network1(
  (conv1): Conv2d(1, 16, kernel_size=(2, 2), stride=(4, 4))
  (conv2): Conv2d(16, 32, kernel_size=(2, 2), stride=(4, 4))
  (fcl): Linear(in_features=32, out_features=10, bias=True)
)

## Specify the loss function and the optimizer

In [82]:
# specify the loss to be used
# softmax is internally computed.
loss_fn = nn.CrossEntropyLoss()
# specify the optimizer to update the weights during backward pass
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=MOMENTUM)
# change learning rate over time
scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=GAMMA)


## Define training function

In [83]:
def train_net():
  # put the network in training mode
  net.train()
  # keep record of the loss value
  epoch_loss = 0.0
  # use training data as batches
  for xt, rt in train_loader:
    # move training instances and corresponding labels into gpu if cuda is available
    xt, rt = xt.to(device), rt.to(device)
    # clear the previously accumulated gradients
    optimizer.zero_grad()
    # forward the network
    yt = net(xt)
    # calculate loss
    loss = loss_fn(yt, rt)
    # make a backward pass, calculate gradients
    loss.backward()
    # update weights
    optimizer.step()
    # accumulate loss
    epoch_loss += loss.item()
  return epoch_loss
  

## Define test function

In [84]:
def eval_net(loader):
  # put the network in evaluation mode
  net.eval()
  # keep record of the loss value
  total_loss = 0.0
  # number of correctly classified instances
  correct = 0
  # disable gradient tracking
  with torch.no_grad():
    for xt, rt in loader:
      # move training instances and corresponding labels into gpu if cuda is available
      xt, rt = xt.to(device), rt.to(device)
      # save_image(xt, f'images/sample_grid.png')  # save 8 images
      # x = 8/0
      # forward the network
      yt = net(xt)
      # calculate loss
      loss = loss_fn(yt, rt)
      # accumulate loss
      total_loss += loss.item()
      # get predicted classes
      pred = yt.argmax(dim=1)
      # accumulate correctly classified image counts
      correct += (pred == rt).sum().item()
      #correct += pred.eq(rt.view_as(pred)).sum().item()
  return correct/len(loader.dataset), total_loss 
  

## 9.Train the network

In [85]:
# initialize the logger instance
# by default creates run directory inside current folder
writer = SummaryWriter()           
# train the network
for epoch in range(1,EPOCHS+2):
  # train network for one epoch
  train_net()
  scheduler.step()
  # get accuracy and loss on the training dataset
  tr_ac, tr_loss = eval_net(train_loader)
  # get accuracy and loss on the test dataset
  tt_ac, tt_loss = eval_net(test_loader)
  # save stats
  writer.add_scalars("Loss", {"tr_loss": tr_loss, "tt_loss":tt_loss} , epoch)
  writer.add_scalars("Accuracy", {"tr_acc": tr_ac, "tt_acc":tt_ac}, epoch)

  if (epoch-1) % 10 == 0:
    print("Epoch", epoch, "Tr Acc:",tr_ac, "Tt_Ac", tt_ac)


  writer.flush()
writer.close()


Epoch 1 Tr Acc: 0.1 Tt_Ac 0.1
Epoch 11 Tr Acc: 0.373 Tt_Ac 0.349
Epoch 21 Tr Acc: 0.524 Tt_Ac 0.447
Epoch 31 Tr Acc: 0.562 Tt_Ac 0.491
Epoch 41 Tr Acc: 0.608 Tt_Ac 0.519
Epoch 51 Tr Acc: 0.62 Tt_Ac 0.542
Epoch 61 Tr Acc: 0.638 Tt_Ac 0.551
Epoch 71 Tr Acc: 0.652 Tt_Ac 0.562
Epoch 81 Tr Acc: 0.66 Tt_Ac 0.549
Epoch 91 Tr Acc: 0.673 Tt_Ac 0.581
Epoch 101 Tr Acc: 0.703 Tt_Ac 0.588
Epoch 111 Tr Acc: 0.701 Tt_Ac 0.591
Epoch 121 Tr Acc: 0.705 Tt_Ac 0.591
Epoch 131 Tr Acc: 0.706 Tt_Ac 0.597
Epoch 141 Tr Acc: 0.708 Tt_Ac 0.596
Epoch 151 Tr Acc: 0.707 Tt_Ac 0.597
Epoch 161 Tr Acc: 0.711 Tt_Ac 0.599
Epoch 171 Tr Acc: 0.713 Tt_Ac 0.597
Epoch 181 Tr Acc: 0.713 Tt_Ac 0.597
Epoch 191 Tr Acc: 0.715 Tt_Ac 0.602
Epoch 201 Tr Acc: 0.716 Tt_Ac 0.602


## Save the model

In [86]:
# save the network model

torch.save(net.state_dict(), 'model/slp.pth')


## Visualize results on tensorboard

In [87]:
%load_ext tensorboard
%tensorboard --logdir runs
# open http://localhost:6006/ to view the results


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 7696), started 17:21:27 ago. (Use '!kill 7696' to kill it.)

In [88]:
#!kill 4081