


#**Ex.4 HW1 - Training LeNet-5 with pytorch**

---


# General setup



```
# This is formatted as code
```

Mounting the drive

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


Installing all the needed lib.


In [2]:
!pip install torch numpy torchvision matplotlib tabulate



[link text](https://)Adding (importing) the requried liberies

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from tabulate import tabulate

**Hyperparameters**

In [4]:
epochs = 25
batch_size = 100
learning_rate = 1e-3
Moment = 0.7 ## for SGD optimizer
optimizerType = 2 #1 for SGD and 2 for Adam
# Connect to GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dropoutPer = 0.1
WDparam = 1e-5 #from the paper or learning_rate / epochs

**Dataset** loading

In [5]:
# FashionMNIST Dataset
train_dataset = dsets.FashionMNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.FashionMNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 12014594.80it/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 205152.70it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3819292.97it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 19244453.65it/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



#**LeNet-5 Classes**

---


> diffrence from the original LeNet-5 paper
*   The network's structure is dapted to work with 28x28 images.
*   For the training, Cross-Entropy loss function is used.
*   Output layer uses softmax for caculating the probabilities.
*   Activation function : tanh -> ReLU

In [6]:
# Neural Network Model (LeNet-5 for 28x28 image, ReLU as activation function)
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        # the 1st conv. Layer k = 5, Cout = 6 Cin = 1
        self.conv1 = nn.Conv2d(1, 6, 5) #24X24 feature map
         # pooling size 2
        self.pool = nn.MaxPool2d(2, 2) #12X12
        # the 2nd conv. Layer k = 5, Cout = 16 Cin = 6
        self.conv2 = nn.Conv2d(6, 16, 5) # 8X8 feature-Map , after the pooling 4X4
        # The 1st fully-connected Layer, Input size = 256, Output = 120
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        # The 2nd fully-connected Layer, Input size = 120, Output = 84
        self.fc2 = nn.Linear(120, 84)
        # The 3rd fully-connected Layer, Input size = 84, Output = 10
        self.fc3 = nn.Linear(84, 10)
        # dropout


    def forward(self, x):

      #Activation func. is ReLU
        #claculation for the first conv. after pooling
        x = self.pool(F.relu(self.conv1(x)))
        #claculation for the first conv. after pooling
        x = self.pool(F.relu(self.conv2(x)))
        #represent x as 1d vector
        x = x.view(-1, 16 * 4 * 4) #for 32X32 images change to 5*5*16
        #applaying the fully cinnected layers (also ReLU activation)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        #x = self.fc3(x)
        x =  F.log_softmax(self.fc3(x), dim=1)
        return x



LeNet-5 with dropout

In [7]:
class LeNet5D(nn.Module):
    def __init__(self,dropoutPer):
        super(LeNet5D, self).__init__()
        # the 1st conv. Layer k = 5, Cout = 6 Cin = 1
        self.conv1 = nn.Conv2d(1, 6, 5) #24X24 feature map
         # pooling size 2
        self.pool = nn.MaxPool2d(2, 2) #12X12
        # the 2nd conv. Layer k = 5, Cout = 16 Cin = 6
        self.conv2 = nn.Conv2d(6, 16, 5) # 8X8 feature-Map , after the pooling 4X4
        # The 1st fully-connected Layer, Input size = 256, Output = 120
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        # The 2nd fully-connected Layer, Input size = 120, Output = 84
        self.fc2 = nn.Linear(120, 84)
        # The 3rd fully-connected Layer, Input size = 84, Output = 10
        self.fc3 = nn.Linear(84, 10)
        # dropout
        self.dropout1d = nn.Dropout(p=dropoutPer)
        self.dropout2d = nn.Dropout2d(p=dropoutPer)

    def forward(self, x):

      #Activation func. is ReLU
        #x = self.dropoutIn(x)
        #claculation for the first conv. after pooling
        x = self.dropout2d(self.conv1(x))
        x = self.pool(F.relu(x))
        #claculation for the first conv. after pooling
        x = self.dropout2d(self.conv2(x))
        x = self.pool(F.relu(x))
        #represent x as 1d vector
        x = x.view(-1, 16 * 4 * 4) #for 32X32 images change to 5*5*16
        #applaying the fully cinnected layers (also ReLU activation)
        x = self.dropout1d(self.fc1(x))
        x = F.relu(x)
        x = self.dropout1d(self.fc2(x))
        x = F.relu(x)
        x =  F.log_softmax(self.fc3(x), dim=1)
        return x

LeNet with BN Class definition

In [8]:
class LeNet5BN(nn.Module):
    def __init__(self):
        super(LeNet5BN, self).__init__()
        # the 1st conv. Layer k = 5, Cout = 6 Cin = 1
        self.conv1 = nn.Conv2d(1, 6, 5) #24X24 feature map
        self.bnconv1 = nn.BatchNorm2d(6)
         # pooling size 2
        self.pool = nn.MaxPool2d(2, 2) #12X12
        # the 2nd conv. Layer k = 5, Cout = 16 Cin = 6
        self.conv2 = nn.Conv2d(6, 16, 5) # 8X8 feature-Map , after the pooling 4X4
        self.bnconv2 = nn.BatchNorm2d(16)
        # The 1st fully-connected Layer, Input size = 256, Output = 120
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.bnfc1 = nn.BatchNorm1d(120)
        # The 2nd fully-connected Layer, Input size = 120, Output = 84
        self.fc2 = nn.Linear(120, 84)
        self.bnfc2 = nn.BatchNorm1d(84)
        # The 3rd fully-connected Layer, Input size = 84, Output = 10
        self.fc3 = nn.Linear(84, 10)
        # BN



    def forward(self, x):

      #Activation func. is ReLU
        #claculation for the first conv. after pooling
        x = self.bnconv1(self.conv1(x))
        x = self.pool(F.relu(x))


        #x = self.pool(self.bnconv1(F.relu(self.conv1(x))))
        #claculation for the first conv. after pooling

        x = self.bnconv2(self.conv2(x))
        x = self.pool(F.relu(x))
        #x = self.pool(self.bnconv2(F.relu(self.conv2(x))))
        #represent x as 1d vector
        x = x.view(-1, 16 * 4 * 4) #for 32X32 images change to 5*5*16
        #applaying the fully cinnected layers (also ReLU activation)
        x = self.bnfc1(self.fc1(x))
        x = F.relu(x)
        x = self.bnfc2(self.fc2(x))
        x = F.relu(x)

        #x = self.bnfc1(F.relu(self.fc1(x)))
        #x = self.bnfc2(F.relu(self.fc2(x)))
        #x = self.fc3(x)
        x =  F.log_softmax(self.fc3(x), dim=1)
        return x



# Traning function

In [9]:
def TrainNN(model,device,epochs,optimizer,criterion,train_loader,test_loader):
  AccTrainList = []
  AccTestList = []
  train_losses, test_losses = [], []
  # Train the network
  for e in range(epochs):
    running_loss = 0
    for i, (images, labels) in enumerate(train_loader):
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      loss = criterion(model(images), labels)
      loss.backward()
      optimizer.step()
      running_loss += loss.item()
    else:
      test_loss = 0
      accTest = 0
      train_loss = 0
      accTrain = 0
      with torch.no_grad():
        model.eval()
        for images, labels in test_loader:
          images, labels = images.to(device), labels.to(device)
          log_ps = model(images)
          test_loss += criterion(log_ps, labels)
          #ps = log_ps
          ps = torch.exp(log_ps)
          top_p, top_class = ps.topk(1, dim=1)
          equals = top_class == labels.view(*top_class.shape)
          accTest += torch.mean(equals.type(torch.FloatTensor))
        for images, labels in train_loader:
          images, labels = images.to(device), labels.to(device)
          log_ps = model(images)
          train_loss += criterion(log_ps, labels)
          ps = torch.exp(log_ps)
          top_p, top_class = ps.topk(1, dim=1)
          equals = top_class == labels.view(*top_class.shape)
          accTrain += torch.mean(equals.type(torch.FloatTensor))
    print('>> epoch: {}, train_loss: {:.3f}, test_loss: {:.3f}, train_Acc: {:.1f}, test_Acc: {:.1f} ' .format(e+1, train_loss/len(train_loader),test_loss/len(test_loader),100*accTrain/len(train_loader),100*accTest/len(test_loader)))
    model.train()
    train_losses.append(train_loss/len(train_loader))
    test_losses.append(test_loss/len(test_loader))
    AccTrainList.append(accTrain/len(train_loader))
    AccTestList.append(accTest/len(test_loader))
  del loss
  return model,AccTestList,AccTrainList,test_losses,train_losses

# ***Training without regularization***

In [None]:
modelNoReg = LeNet5().to(device)
# Loss
criterion = nn.CrossEntropyLoss()
#Optimizer
if optimizerType == 1:
  optimizer = torch.optim.SGD(modelNoReg.parameters(),lr=learning_rate,momentum= Moment)
else:
  optimizer = torch.optim.Adam(modelNoReg.parameters(),lr=learning_rate)
modelNoReg.train()
print('Start Training')
modelNoReg,AccTestList,AccTrainList,test_losses,train_losses = TrainNN(modelNoReg,device,epochs,optimizer,criterion,train_loader,test_loader)
print('Finished Training')
# Save the Model
torch.save(modelNoReg.state_dict(), '/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5model.pkl')
#plot
plt.plot(range(1,epochs+1),train_losses, label = 'Train')
plt.plot(range(1,epochs+1),test_losses, label = 'Test')
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.ylabel('lossValue')
plt.xlabel('epochs')
plt.legend()
plt.title('Loss')
plt.grid(True)
plt.figure()
plt.plot(range(1,epochs+1),AccTrainList, label ='Train')
plt.plot(range(1,epochs+1),AccTestList, label ='Test')
plt.ylabel('AccValue')
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.xlabel('epochs')
plt.title('Acc')
plt.grid(True)
plt.legend()
plt.show()

Start Training
>> epoch: 1, train_loss: 0.528, test_loss: 0.550, train_Acc: 79.9, test_Acc: 78.9 
>> epoch: 2, train_loss: 0.435, test_loss: 0.461, train_Acc: 84.2, test_Acc: 83.0 
>> epoch: 3, train_loss: 0.370, test_loss: 0.400, train_Acc: 86.5, test_Acc: 85.5 
>> epoch: 4, train_loss: 0.340, test_loss: 0.372, train_Acc: 87.5, test_Acc: 86.6 
>> epoch: 5, train_loss: 0.325, test_loss: 0.360, train_Acc: 88.0, test_Acc: 86.8 


# ***Training with dropout***

In [None]:
modelDropout = LeNet5D(dropoutPer).to(device)
# Loss
criterion = nn.CrossEntropyLoss()
#Optimizer
if optimizerType == 1:
  optimizer = torch.optim.SGD(modelDropout.parameters(),lr=learning_rate,momentum= Moment)
else:
  optimizer = torch.optim.Adam(modelDropout.parameters(),lr=learning_rate)
modelDropout.train()
print('Start Training')
modelDropout,AccTestListD,AccTrainListD,test_lossesD,train_lossesD = TrainNN(modelDropout,device,epochs,optimizer,criterion,train_loader,test_loader)
print('Finished Training')
# Save the Model
torch.save(modelDropout.state_dict(), '/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5modelDropout.pkl')
#plot
plt.plot(range(1,epochs+1),train_lossesD, label = 'Train')
plt.plot(range(1,epochs+1),test_lossesD, label = 'Test')
plt.ylabel('lossValue')
plt.xlabel('epochs')
plt.legend()
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.title('Loss With Dropout')
plt.grid(True)
plt.figure()
plt.plot(range(1,epochs+1),AccTrainListD, label ='Train')
plt.plot(range(1,epochs+1),AccTestListD, label ='Test')
plt.ylabel('AccValue')
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.xlabel('epochs')
plt.title('Acc With Dropout')
plt.grid(True)
plt.legend()
plt.show()

# ***Training with Weight Decay***

In [None]:

modelWD = LeNet5().to(device)
# Loss
criterion = nn.CrossEntropyLoss()
#Optimizer
if optimizerType == 1:
  optimizer = torch.optim.SGD(modelWD.parameters(),lr=learning_rate,momentum= Moment,weight_decay = WDparam)
else:
  optimizer = torch.optim.Adam(modelWD.parameters(),lr=learning_rate,weight_decay = WDparam)
modelWD.train()
print('Start Training')
modelWD,AccTestListWD,AccTrainListWD,test_lossesWD,train_lossesWD = TrainNN(modelWD,device,epochs,optimizer,criterion,train_loader,test_loader)
print('Finished Training')
# Save the Model
torch.save(modelWD.state_dict(), '/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5WDmodel.pkl')
#plot
plt.plot(range(1,epochs+1),train_lossesWD, label = 'Train')
plt.plot(range(1,epochs+1),test_lossesWD, label = 'Test')
plt.ylabel('lossValue')
plt.xlabel('epochs')
plt.legend()
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.title('Loss with Weight Decay')
plt.grid(True)
plt.figure()
plt.plot(range(1,epochs+1),AccTrainListWD, label ='Train')
plt.plot(range(1,epochs+1),AccTestListWD, label ='Test')
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.ylabel('AccValue')
plt.xlabel('epochs')
plt.title('Acc with Weight Decay')
plt.grid(True)
plt.legend()
plt.show()

# ***Training With Batch Normalization***

In [None]:
modelBN = LeNet5BN().to(device)
# Loss
criterion = nn.CrossEntropyLoss()
#Optimizer
if optimizerType == 1:
  optimizer = torch.optim.SGD(modelBN.parameters(),lr=learning_rate,momentum= Moment)
else:
  optimizer = torch.optim.Adam(modelBN.parameters(),lr=learning_rate)
modelBN.train()
print('Start Training')
modelBN,AccTestListBN,AccTrainListBN,test_lossesBN,train_lossesBN = TrainNN(modelBN,device,epochs,optimizer,criterion,train_loader,test_loader)
print('Finished Training')

# Save the Model
torch.save(modelBN.state_dict(), '/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5modelBN.pkl')
#plot
plt.plot(range(1,epochs+1),train_lossesBN, label = 'Train')
plt.plot(range(1,epochs+1),test_lossesBN, label = 'Test')
plt.ylabel('lossValue')
plt.xlabel('epochs')
plt.legend()
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.title('Loss With BN')
plt.grid(True)
plt.figure()
plt.plot(range(1,epochs+1),AccTrainListBN, label ='Train')
plt.plot(range(1,epochs+1),AccTestListBN, label ='Test')
plt.ylabel('AccValue')
plt.xlabel('epochs')
plt.xticks(np.arange(0, epochs+1, 2.0))
plt.title('Acc With BN')
plt.grid(True)
plt.legend()
plt.show()

# ***Accuracy Caculation and Comparison (for Saved Models)***


Run the following sections including the sub-sections
1.   General setup
2.   LeNet-5 Classes




## load a saved models
First mount the drive and check the modele path.



In [None]:
modelNoReg = LeNet5().to(device)
modelNoReg.load_state_dict(torch.load('/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5model.pkl'))

modelDropout = LeNet5D(dropoutPer).to(device)
modelDropout.load_state_dict(torch.load('/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5modelDropout.pkl'))

modelWD = LeNet5().to(device)
modelWD.load_state_dict(torch.load('/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5WDmodel.pkl'))

modelBN = LeNet5BN().to(device)
modelBN.load_state_dict(torch.load('/content/drive/My Drive/ex1_300746930_034915504/models/LeNet5modelBN.pkl'))


## Test and compare the models

In [None]:
accTestNR = 0
accTestD = 0
accTestWD = 0
accTestBN = 0
modelNoReg.eval()
modelDropout.eval()
modelWD.eval()
modelBN.eval()
for images, labels in test_loader:
  images, labels = images.to(device), labels.to(device)

  log_psNR = modelNoReg(images)
  psNR = torch.exp(log_psNR)
  top_pNR, top_classNR = psNR.topk(1, dim=1)
  equalsNR = top_classNR == labels.view(*top_classNR.shape)
  accTestNR += torch.mean(equalsNR.type(torch.FloatTensor))

  log_psD = modelDropout(images)
  psD = torch.exp(log_psD)
  top_pD, top_classD = psD.topk(1, dim=1)
  equalsD = top_classD == labels.view(*top_classD.shape)
  accTestD += torch.mean(equalsD.type(torch.FloatTensor))

  log_psWD = modelWD(images)
  psWD = torch.exp(log_psWD)
  top_pWD, top_classWD = psWD.topk(1, dim=1)
  equalsWD = top_classWD == labels.view(*top_classWD.shape)
  accTestWD += torch.mean(equalsWD.type(torch.FloatTensor))

  log_psBN = modelBN(images)
  psBN = torch.exp(log_psBN)
  top_pBN, top_classBN = psBN.topk(1, dim=1)
  equalsBN = top_classBN == labels.view(*top_classBN.shape)
  accTestBN += torch.mean(equalsBN.type(torch.FloatTensor))

accTrainNR = 0
accTrainD = 0
accTrainWD = 0
accTrainBN = 0
for images, labels in train_loader:
  images, labels = images.to(device), labels.to(device)

  log_psNR = modelNoReg(images)
  psNR = torch.exp(log_psNR)
  top_pNR, top_classNR = psNR.topk(1, dim=1)
  equalsNR = top_classNR == labels.view(*top_classNR.shape)
  accTrainNR += torch.mean(equalsNR.type(torch.FloatTensor))

  log_psD = modelDropout(images)
  psD = torch.exp(log_psD)
  top_pD, top_classD = psD.topk(1, dim=1)
  equalsD = top_classD == labels.view(*top_classD.shape)
  accTrainD += torch.mean(equalsD.type(torch.FloatTensor))

  log_psWD = modelWD(images)
  psWD = torch.exp(log_psWD)
  top_pWD, top_classWD = psWD.topk(1, dim=1)
  equalsWD = top_classWD == labels.view(*top_classWD.shape)
  accTrainWD += torch.mean(equalsWD.type(torch.FloatTensor))

  log_psBN = modelBN(images)
  psBN = torch.exp(log_psBN)
  top_pBN, top_classBN = psBN.topk(1, dim=1)
  equalsBN = top_classBN == labels.view(*top_classBN.shape)
  accTrainBN += torch.mean(equalsBN.type(torch.FloatTensor))
C1 = 100/len(test_loader)
C2 = 100/len(train_loader)
T = tabulate([['Without Regularization', C1*accTestNR, C2*accTrainNR],
              ['Dropout', C1*accTestD, C2*accTrainD],
              ['Weight Decay', C1*accTestWD, C2*accTrainWD],
              ['Batch Normalization', C1*accTestBN, C2*accTrainBN]]
             ,headers=['Model', 'Test-accuracy ', 'Train-accuracy'])
print(T)