# SLP and MLP in Pytorch



## Import Necessary modules

In [343]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn                              # neural network module
import torch.nn.functional as F
import torch.optim as optim                        # optimization module
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import Dataset

from torch.utils.tensorboard import SummaryWriter  # logging module
from torchvision.utils import make_grid, save_image

import numpy as np

In [344]:
print(torch.__version__)

1.10.1+cu113


## Custom Dataset loader

In [345]:
class CustomDataset(Dataset):
    """ Custom dataset for flattened 10x10 csv dataset """

    # Initialize data
    def __init__(self, fname, transform=None):
        self.xy = np.genfromtxt(fname, delimiter=',', skip_header=1, dtype=np.uint8)
        self.transform = transform

    def __getitem__(self, index):
        x = self.xy[index, 1:].reshape(10,10,1) # H W C
        y = self.xy[index, 0]
        y = torch.as_tensor(y, dtype=torch.long)
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return self.xy.shape[0]

## Parameters and Hyperparameters

In [346]:
# torch parameters
SEED = 60            # reproducability
# NN Parameters
EPOCHS = 200         # number of epochs
LR = 0.01            # learning rate
MOMENTUM = 0.9       # momentum for the SGD optimizer (how much of the past gradients)
GAMMA = 0.1          # learning rate scheduler (how much to decrease learning rate)
BATCH_SIZE = 64      # number of images to load per iteration

In [347]:
# manual seed to reproduce the same results
torch.manual_seed(SEED)

<torch._C.Generator at 0x13d673883f0>

In [348]:
# transform input data type from ndarray to tensor values between 0,1
transform = transforms.Compose([
    transforms.ToTensor(), 
])

In [349]:
# read the datasets
tr_dataset   = CustomDataset('data/training.csv', transform=transform)
# prepare loader for the training dataset
train_loader = torch.utils.data.DataLoader(tr_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

# download the dataset if not already downloaded and set necessery transforms
test_dataset = CustomDataset('data/testing.csv', transform=transform)
# prepare loader for the test dataset
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

## Define the Network

In [350]:
# All networks derive from the base class nn.Module
class N1(nn.Module):
    # get input and output dimensions as input
    # ====== ENCODER PART ======       
    # MNIST image is 1x28x28 (CxHxW)
    # Pytorch convolution expects input data in the form BxCxHxW 
    # B: Batch size
    # C: number of channels gray scale images have 1 channel
    # W: width of the image 
    # H: height of the image

    # use 32 3x3 filters with padding
    # padding is set to 1 so that image W,H is not changed after convolution
    # stride is 2 so filters will move 2 pixels for next calculation  
    # W after conv2d  [(W - Kernelw + 2*padding)/stride] + 1
    # after convolution we'll have Bx32 14x14 feature maps (28-3+2)/2 + 1 = 14
    # (28-+9)/1 + 1
    def __init__(self) :
        # all derived classes must call __init__ method of super class
        super(N1, self).__init__()

        # [(W - Kernelw + 2*padding)/stride] + 1
        # 10 - 3 + 6)/1 + 1 = 14
        # 10 - 4 + 6)/2 + 1 = 7

        # 10 - 3 + 6)/1 + 1 = 14
        # 10 - 7 + 6)/1 + 1 = 10

        self.c1 = nn.Conv2d(
                            in_channels=1,
                            out_channels=16,
                            kernel_size=3,
                            stride = 1,
                            padding=3
        )

        # 14 - 3 + 6 )/1 + 1 = 18
        # 7 - 4 + 6)/2 + 1 = 5

        # 14 - 3 + 6)/1 + 1 = 18
        # 10 - 7 + 6)/1 + 1 = 10
        
        self.c2 = nn.Conv2d(
                            in_channels=16,
                            out_channels=32,
                            kernel_size=3,
                            stride=1,
                            padding = 3
        )

        # 18 - 3 + 6 ) / 1 + 1 = 21

        # 18 - 3 + 6)/1 + 1 = 22
        # 10 - 7 + 6)/1 + 1 = 10

        self.c3 = nn.Conv2d(
                            in_channels=32,
                            out_channels=64,
                            kernel_size=3,
                            stride=1,
                            padding = 3
        )

        self.fc1 = nn.Linear(64*22*22,10)
        self.model = nn.Sequential(
            self.c1,
            self.c2,
            self.c3,
            self.fc1
        )

    # forward method should get the input and return the output
    def forward(self,x):
        x = torch.relu(self.c1(x))
        x = torch.relu(self.c2(x))
        x = torch.relu(self.c3(x))
        x = torch.flatten(x,1)
        x = (self.fc1(x))
        return torch.log_softmax(x,dim=1)

# Network with 2 convolutional layers different kernel_size, stride and padding
class N2(nn.Module):
    # get input and output dimensions as input
    def __init__(self):
        # all derived classes must call init method of super class
        super(N2, self).__init__()

        # [(W - Kernelw + 2*padding)/stride] + 1
        # 10 - 3 + 6) / 1 + 1 = 14
        # 10 - 4 + 6) / 2 + 1 = 7

        # 10 - 3 + 6) / 1 + 1 = 14
        # 10 - 7 + 6) / 1 + 1 = 10

        self.conv1 = nn.Conv2d(
                            in_channels=1,
                            out_channels=16,
                            kernel_size=3,
                            stride=1,
                            padding=3
        )

        self.fc1 = nn.Linear(16*14*14, 10)
        self.model = nn.Sequential(
            self.conv1,
            self.fc1
        )
        
    # forward method should get the input and return the output
    def forward(self,x):
        x = torch.sigmoid(self.conv1(x))
        x = torch.flatten(x, 1)
        x = (self.fc1(x))
        return torch.log_softmax(x, dim=1)


class N3(nn.Module):
    # get input and output dimensions as input
    def __init__(self):
        # all derived classes must call __init__ method of super class
        super(N3, self).__init__()
        self.rel    = nn.ReLU()
        self.conv1 = nn.Conv2d(
                            in_channels=1,
                            out_channels=16,
                            kernel_size=3,
                            stride=1,
                            padding=3
        )
        # [(W - Kernelw + 2*padding)/stride] + 1
        # 10 - 4 + 6) / 1 + 1 = 13
        # 10 - 4 + 6) / 2 + 1 = 7

        # 10 - 3 + 6) / 1 + 1 = 14
        # 10 - 7 + 6) / 1 + 1 = 10

        self.fc1 = nn.Linear(16*14*14, 10)

        
    # forward method should get the input and return the output
    def forward(self,x):
        x = self.rel(self.conv1(x))
        x = torch.flatten(x,1)
        x = self.fc1(x)
        return torch.log_softmax(x, dim=1)        

## Create a network instance and move it to the device you want to run computations on

In [351]:
# create the networkasdawasdawdasdasdasdasd
net = N1()


# print network parameter names and their size
for name, param in net.named_parameters():
  print(name, param.size())

# check if CUDA is available
cuda = torch.cuda.is_available()  
device = torch.device("cuda:0" if cuda else "cpu")
if cuda:
  print("CUDA is avaliable")

# if cuda is available move the network to gpu
net.to(device)


c1.weight torch.Size([16, 1, 3, 3])
c1.bias torch.Size([16])
c2.weight torch.Size([32, 16, 3, 3])
c2.bias torch.Size([32])
c3.weight torch.Size([64, 32, 3, 3])
c3.bias torch.Size([64])
fc1.weight torch.Size([10, 28224])
fc1.bias torch.Size([10])
CUDA is avaliable


N1(
  (c1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
  (c2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
  (c3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
  (fc1): Linear(in_features=28224, out_features=10, bias=True)
  (model): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
    (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
    (3): Linear(in_features=28224, out_features=10, bias=True)
  )
)

## Specify the loss function and the optimizer

In [352]:
# specify the loss to be used
# softmax is internally computed.
loss_fn = nn.CrossEntropyLoss()
# specify the optimizer to update the weights during backward pass
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=MOMENTUM)
# change learning rate over time
scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=GAMMA)


## Define training function

In [353]:
def train_net():
  # put the network in training mode
  net.train()
  # keep record of the loss value
  epoch_loss = 0.0
  # use training data as batches
  for xt, rt in train_loader:
    # move training instances and corresponding labels into gpu if cuda is available
    xt, rt = xt.to(device), rt.to(device)
    # clear the previously accumulated gradients
    optimizer.zero_grad()
    # forward the network
    yt = net(xt)
    # calculate loss
    loss = loss_fn(yt, rt)
    # make a backward pass, calculate gradients
    loss.backward()
    # update weights
    optimizer.step()
    # accumulate loss
    epoch_loss += loss.item()
  return epoch_loss
  

## Define test function

In [354]:
def eval_net(loader):
  # put the network in evaluation mode
  net.eval()
  # keep record of the loss value
  total_loss = 0.0
  # number of correctly classified instances
  correct = 0
  # disable gradient tracking
  with torch.no_grad():
    for xt, rt in loader:
      # move training instances and corresponding labels into gpu if cuda is available
      xt, rt = xt.to(device), rt.to(device)
      # save_image(xt, f'images/sample_grid.png')  # save 8 images
      # x = 8/0
      # forward the network
      yt = net(xt)
      # calculate loss
      loss = loss_fn(yt, rt)
      # accumulate loss
      total_loss += loss.item()
      # get predicted classes
      pred = yt.argmax(dim=1)
      # accumulate correctly classified image counts
      correct += (pred == rt).sum().item()
      #correct += pred.eq(rt.view_as(pred)).sum().item()
  return correct/len(loader.dataset), total_loss 
  

## 9.Train the network

In [355]:
# initialize the logger instance
# by default creates run directory inside current folder
writer = SummaryWriter()           
# train the network
for epoch in range(1,EPOCHS+1):
  # train network for one epoch
  train_net()
  scheduler.step()
  # get accuracy and loss on the training dataset
  tr_ac, tr_loss = eval_net(train_loader)
  # get accuracy and loss on the test dataset
  tt_ac, tt_loss = eval_net(test_loader)
  # save stats
  writer.add_scalars("Loss", {"tr_loss": tr_loss, "tt_loss":tt_loss} , epoch)
  writer.add_scalars("Accuracy", {"tr_acc": tr_ac, "tt_acc":tt_ac}, epoch)

  if (epoch-1) % 10 == 0:
    print("Epoch", epoch, "Tr Acc:",tr_ac, "Tt_Ac", tt_ac)


  writer.flush()
writer.close()


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x30976 and 28224x10)

## Save the model

In [None]:
# save the network model
torch.save(net.state_dict(), 'model/mlp.pth')


## Visualize results on tensorboard

In [None]:

%load_ext tensorboard
%tensorboard --logdir runs
# open http://localhost:6006/ to view the results


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 8108), started 19:54:41 ago. (Use '!kill 8108' to kill it.)

In [None]:
#!kill 4081

In [None]:
weights.size()

NameError: name 'weights' is not defined