# SLP and MLP in Pytorch



## Import Necessary modules

In [1]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn                              # neural network module
import torch.nn.functional as F
import torch.optim as optim                        # optimization module
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import Dataset

from torch.utils.tensorboard import SummaryWriter  # logging module
from torchvision.utils import make_grid, save_image

import numpy as np

  warn(f"Failed to load image Python extension: {e}")


In [2]:
print(torch.__version__)

1.10.1


## Custom Dataset loader

In [3]:
class CustomDataset(Dataset):
    """ Custom dataset for flattened 10x10 csv dataset """

    # Initialize data
    def __init__(self, fname, transform=None):
        self.xy = np.genfromtxt(fname, delimiter=',', skip_header=1, dtype=np.uint8)
        self.transform = transform

    def __getitem__(self, index):
        x = self.xy[index, 1:].reshape(10,10,1) # H W C
        y = self.xy[index, 0]
        y = torch.as_tensor(y, dtype=torch.long)
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return self.xy.shape[0]

## Parameters and Hyperparameters

In [4]:
# torch parameters
SEED = 60            # reproducability
# NN Parameters
EPOCHS = 200         # number of epochs
LR = 0.01            # learning rate
MOMENTUM = 0.9       # momentum for the SGD optimizer (how much of the past gradients)
GAMMA = 0.1          # learning rate scheduler (how much to decrease learning rate)
BATCH_SIZE = 64      # number of images to load per iteration
d = 100              # number of input features 
K = 10               # number of output features
H = None             # H=None for SLP else MLP

In [5]:
# manual seed to reproduce the same results
torch.manual_seed(SEED)

<torch._C.Generator at 0x227ac442810>

In [6]:
# transform input data type from ndarray to tensor values between 0,1
transform = transforms.Compose([
    transforms.ToTensor(), 
])

In [7]:
# read the datasets
tr_dataset   = CustomDataset('data/training.csv', transform=transform)
# prepare loader for the training dataset
train_loader = torch.utils.data.DataLoader(tr_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

# download the dataset if not already downloaded and set necessery transforms
test_dataset = CustomDataset('data/testing.csv', transform=transform)
# prepare loader for the test dataset
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

## Define the Network

In [8]:
# All networks derive from the base class nn.Module

# Conv-FC-FC
class class1(nn.Module):
    # get input and output dimensions as input
    def __init__(self, H):
        # all derived classes must call __init__ method of super class
        super(class1, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=1,
                            out_channels=128, 
                            kernel_size=3,  
                            stride=3,
                            padding=2),
            
            nn.ReLU(),
            nn.Flatten(start_dim=1),
            nn.Linear(128*4*4,H),
            nn.ReLU(),
            nn.Linear(H,10)
            )
    
    # forward method should get the input and return the output
    def forward(self,x):
        batch_size = x.shape[0] 
        # flatten the image from BxCxHXW to Bx100
        #x = x.view(batch_size, -1)
        x = self.model(x)
        # softmax is internally done inside cross entropy loss
        return torch.log_softmax(x, dim=1)
    
    
    
# Conv-Conv-FC 
class class2(nn.Module):
    # get input and output dimensions as input
    def __init__(self,H):
        # all derived classes must call __init__ method of super class
        super(class2, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=1,    
                out_channels=64, 
                kernel_size=3,    
                stride=2,
                padding=2),
            nn.Conv2d(in_channels=64,    
                out_channels=128, 
                kernel_size=3,    
                stride=2,
                padding=1),
            nn.ReLU(),
            nn.Flatten(start_dim=1),
            nn.Linear(128*3*3,10)
        )
    
    # forward method should get the input and return the output
    def forward(self,x):
        batch_size = x.shape[0] 
        # flatten the image from BxCxHXW to Bx100
        #x = x.view(batch_size, -1)
        x = self.model(x)
        # softmax is internally done inside cross entropy loss
        return torch.log_softmax(x, dim=1)
        
#Conv-Conv-FC-FC
class class3(nn.Module):
    # get input and output dimensions as input
    def __init__(self, H):
        # all derived classes must call __init__ method of super class
        super(class3, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=1,    
                out_channels=64, 
                kernel_size=3,    
                stride=2,
                padding=2),
            nn.Conv2d(in_channels=64,    
                out_channels=128, 
                kernel_size=3,    
                stride=2,
                padding=1),
            nn.ReLU(),
            nn.Flatten(start_dim=1),
            nn.Linear(128*3*3,H),
            nn.ReLU(),
            nn.Linear(H,10)
        )
    
    # forward method should get the input and return the output
    def forward(self,x):
        batch_size = x.shape[0] 
        # flatten the image from BxCxHXW to Bx100
        #x = x.view(batch_size, -1)
        x = self.model(x)
        # softmax is internally done inside cross entropy loss
        return torch.log_softmax(x, dim=1)
        
        

## Create a network instance and move it to the device you want to run computations on

In [9]:
# create the network

#net = class1(H = 25)
#net = class2(H = 70)
net = class3(H = 50)

# print network parameter names and their size
for name, param in net.named_parameters():
  print(name, param.size())

# check if CUDA is available
cuda = torch.cuda.is_available()  
device = torch.device("cuda:0" if cuda else "cpu")

# if cuda is available move the network to gpu
net.to(device)


model.0.weight torch.Size([64, 1, 3, 3])
model.0.bias torch.Size([64])
model.1.weight torch.Size([128, 64, 3, 3])
model.1.bias torch.Size([128])
model.4.weight torch.Size([50, 1152])
model.4.bias torch.Size([50])
model.6.weight torch.Size([10, 50])
model.6.bias torch.Size([10])


  return torch._C._cuda_getDeviceCount() > 0


class3(
  (model): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
    (1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (2): ReLU()
    (3): Flatten(start_dim=1, end_dim=-1)
    (4): Linear(in_features=1152, out_features=50, bias=True)
    (5): ReLU()
    (6): Linear(in_features=50, out_features=10, bias=True)
  )
)

## Specify the loss function and the optimizer

In [10]:
# specify the loss to be used
# softmax is internally computed.
loss_fn = nn.CrossEntropyLoss()
# specify the optimizer to update the weights during backward pass
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=MOMENTUM)
# change learning rate over time
scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=GAMMA)


## Define training function

In [11]:
def train_net():
  # put the network in training mode
  net.train()
  # keep record of the loss value
  epoch_loss = 0.0
  # use training data as batches
  for xt, rt in train_loader:
    # move training instances and corresponding labels into gpu if cuda is available
    xt, rt = xt.to(device), rt.to(device)
    # clear the previously accumulated gradients
    optimizer.zero_grad()
    # forward the network
    yt = net(xt)
    # calculate loss
    loss = loss_fn(yt, rt)
    # make a backward pass, calculate gradients
    loss.backward()
    # update weights
    optimizer.step()
    # accumulate loss
    epoch_loss += loss.item()
  return epoch_loss
  

## Define test function

In [12]:
def eval_net(loader):
  # put the network in evaluation mode
  net.eval()
  # keep record of the loss value
  total_loss = 0.0
  # number of correctly classified instances
  correct = 0
  # disable gradient tracking
  with torch.no_grad():
    for xt, rt in loader:
      # move training instances and corresponding labels into gpu if cuda is available
      xt, rt = xt.to(device), rt.to(device)
      # save_image(xt, f'images/sample_grid.png')  # save 8 images
      # x = 8/0
      # forward the network
      yt = net(xt)
      # calculate loss
      loss = loss_fn(yt, rt)
      # accumulate loss
      total_loss += loss.item()
      # get predicted classes
      pred = yt.argmax(dim=1)
      # accumulate correctly classified image counts
      correct += (pred == rt).sum().item()
      #correct += pred.eq(rt.view_as(pred)).sum().item()
  return correct/len(loader.dataset), total_loss 
  

## 9.Train the network

In [13]:
# initialize the logger instance
# by default creates run directory inside current folder
writer = SummaryWriter()           
# train the network
for epoch in range(1,EPOCHS+1):
  # train network for one epoch
  train_net()
  scheduler.step()
  # get accuracy and loss on the training dataset
  tr_ac, tr_loss = eval_net(train_loader)
  # get accuracy and loss on the test dataset
  tt_ac, tt_loss = eval_net(test_loader)
  # save stats
  writer.add_scalars("Loss", {"tr_loss": tr_loss, "tt_loss":tt_loss} , epoch)
  writer.add_scalars("Accuracy", {"tr_acc": tr_ac, "tt_acc":tt_ac}, epoch)

  if (epoch-1) % 10 == 0:
    print("Epoch", epoch, "Tr Acc:",tr_ac, "Tt_Ac", tt_ac)

    """
  # run only if SLP
  if H is None:
    weights = net.model.weight                  # 10x100
    weights = weights.view(10, 10, 10)          # 10x10x10
    weights = weights.unsqueeze(dim=1)          # 10x1x10x10 B C H W
    mean_images = make_grid(weights, normalize=True)
    writer.add_image("Images/mean_images", mean_images, epoch)
    """

  writer.flush()
writer.close()

Epoch 1 Tr Acc: 0.189 Tt_Ac 0.157
Epoch 11 Tr Acc: 0.846 Tt_Ac 0.782
Epoch 21 Tr Acc: 0.921 Tt_Ac 0.848
Epoch 31 Tr Acc: 0.957 Tt_Ac 0.855
Epoch 41 Tr Acc: 0.98 Tt_Ac 0.872
Epoch 51 Tr Acc: 0.995 Tt_Ac 0.87
Epoch 61 Tr Acc: 0.998 Tt_Ac 0.874
Epoch 71 Tr Acc: 1.0 Tt_Ac 0.873
Epoch 81 Tr Acc: 1.0 Tt_Ac 0.871
Epoch 91 Tr Acc: 1.0 Tt_Ac 0.88
Epoch 101 Tr Acc: 1.0 Tt_Ac 0.878
Epoch 111 Tr Acc: 1.0 Tt_Ac 0.878
Epoch 121 Tr Acc: 1.0 Tt_Ac 0.88
Epoch 131 Tr Acc: 1.0 Tt_Ac 0.88
Epoch 141 Tr Acc: 1.0 Tt_Ac 0.878
Epoch 151 Tr Acc: 1.0 Tt_Ac 0.879
Epoch 161 Tr Acc: 1.0 Tt_Ac 0.88
Epoch 171 Tr Acc: 1.0 Tt_Ac 0.879
Epoch 181 Tr Acc: 1.0 Tt_Ac 0.881
Epoch 191 Tr Acc: 1.0 Tt_Ac 0.881


## Save the model

In [14]:
# save the network model
"""
if H is None:
    torch.save(net.state_dict(), 'model/slp.pth')
else:"""
torch.save(net.state_dict(), 'model/mlp.pth')


## Visualize results on tensorboard

In [15]:
%load_ext tensorboard
%tensorboard --logdir runs
# open http://localhost:6006/ to view the results


In [16]:
#%reload_ext tensorboard

In [17]:
!kill 16152

'kill' is not recognized as an internal or external command,
operable program or batch file.


In [18]:
#weights.size()