In [50]:
import torch #Import the torch library
from torch.utils.data import Dataset, DataLoader #DataLoader is a wrapper around the Dataset
from torchvision import datasets #datasets will allow us to import the MNIST library
from torchvision.transforms import ToTensor #ToTensor is the transform function
from torch import nn #Neural network module from pytorch
from torchsummary import summary
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score,accuracy_score,precision_recall_fscore_support
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.stats import mode, norm, skew, kurtosis
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split


In [51]:
#Standard code to check if GPU/cuda is available
if torch.cuda.is_available():
    device=torch.device(type="cuda",index=0)
else:
    device=torch.device(type="cpu",index=0)

print(torch.cuda.is_available())

print(device)

True
cuda:0


In [52]:
Metrics_models = pd.DataFrame(columns=["Model","Optimizer", "Accuracy", "Precision weighted", "Recall weighted", "F1 Score weighted"])

In [53]:
class CustomTrainDataset(Dataset):
  def __init__(self, path, transform):
    super().__init__()
    self.data=pd.read_csv(path, header="infer").values
    self.length = self.data.shape[0]
    self.transform = transform

  def __len__(self):
      return self.length

  def __getitem__(self, idx):
      flatimage = self.data[idx, 1:].astype(np.uint8)
      image = self.transform(np.reshape(flatimage, (28,28,1)))
      label = self.data[idx,0]
      return image, label

class CustomTestDataset(Dataset):
  def __init__(self, path, transform):
    super().__init__()
    self.data = pd.read_csv(path, header="infer").values
    self.length = self.data.shape[0]
    self.transform = transform

  def __len__(self):
    return self.length

  def __getitem__(self, idx):
    flatimage = self.data[idx,:].astype(np.uint8)
    image = self.transform(np.reshape(flatimage, (28,28,1)))
    return image


train_dataset=CustomTrainDataset('train.csv', ToTensor())
test_dataset=CustomTestDataset('test.csv', ToTensor())

batch_size=64

train_dl=DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test_dl=DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
)

In [54]:
class DigitRecognizer_1(nn.Module):
  def __init__(self):
    super().__init__()
    self.relu = nn.ReLU()

    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(8)
    self.mp1 = nn.MaxPool2d(kernel_size=(2,2), stride=2, padding=0)

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=1, padding=0)
    self.bn2 = nn.BatchNorm2d(16)

    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=1, padding=0)
    self.bn3 = nn.BatchNorm2d(32)

    self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=1, padding=0)
    self.bn4 = nn.BatchNorm2d(64)

    self.flatten = nn.Flatten()

    self.lin1 = nn.Linear(in_features=3136, out_features=10)
    self.bn5 = nn.BatchNorm1d(num_features=10)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.mp1(x)

    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)

    x = self.conv3(x)
    x = self.bn3(x)
    x = self.relu(x)

    x = self.conv4(x)
    x = self.bn4(x)
    x = self.relu(x)

    x = self.flatten(x)

    x = self.lin1(x)
    output = self.bn5(x)

    return output

In [55]:
for i,(X,y) in enumerate(train_dl):
    print(X.shape)
    break

torch.Size([64, 1, 28, 28])


In [56]:


def train_one_epoch(dataloader, model, loss_fn, optimizer):
    model.train()
    epoch_loss = 0
    correct_predictions = 0
    total_samples = 0
    size=len(train_dl.dataset)
    all_predictions = []
    all_targets = []
    for batch,(imgs, labels) in enumerate(dataloader):

        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        pred = model(imgs)
        loss = loss_fn(pred, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * imgs.size(0)
        correct_predictions += (torch.argmax(pred, dim=1) == labels).sum().item()
        total_samples += imgs.size(0)
        
        all_predictions.extend(pred.argmax(1).cpu().numpy())
        all_targets.extend(labels.cpu().numpy())
        
        
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(imgs)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        # print(f"correct predictions = {correct_predictions}")
        # print(f"Epoch Accuracy = {correct_predictions/(16*len(dataloader))}")
    
    
    accuracy = accuracy_score(all_targets, all_predictions)
    precision = precision_score(all_targets, all_predictions, average='weighted')
    recall = recall_score(all_targets, all_predictions , average='weighted')
    f1 = f1_score(all_targets, all_predictions, average='weighted')
    
    
    
    epoch_loss /= total_samples
    epoch_acc = correct_predictions / total_samples * 100

    return epoch_loss, epoch_acc, [accuracy,precision,recall,f1]

def evaluate(dataloader, model, path):
    model.eval()
    data = pd.read_csv(path)

    with torch.no_grad():
        for i, imgs in enumerate(dataloader):
            imgs = imgs.to(device)
            pred = model(imgs)
            pred_labels = torch.argmax(pred, dim=1).cpu().numpy()

            start_index = i * dataloader.batch_size
            end_index = min(start_index + dataloader.batch_size, len(data))
            data.iloc[start_index:end_index, 1] = pred_labels

    data.to_csv('submission.csv', index=False)
    # return data.head()


In [57]:
model_1 = DigitRecognizer_1().to(device)
loss_fn = nn.CrossEntropyLoss()
lr = 0.001
optimizer_1 = torch.optim.Adam(model_1.parameters(), lr=lr)
n_epochs = 5



In [58]:
def runEpoch(epochs,train_dl,test_dl, model, loss_fn, optimizer):
    for epoch in range(epochs):
        print("Epoch No:", epoch + 1)
        train_epoch_loss, train_epoch_acc,metrics = train_one_epoch(train_dl, model, loss_fn, optimizer)
        print("Training:", "Epoch Loss:", train_epoch_loss, "Epoch Accuracy:", train_epoch_acc)
        print("--------------------------------------------------")
        if epoch==0:
            Metrics_models.loc[len(Metrics_models.index)] = [type(model).__name__,type(optimizer).__name__,metrics[0],metrics[1],metrics[2],metrics[3]]

            
    # Assuming you have defined test_dl and sample_submission.csv somewhere
    # evaluate(test_dl, model_1, 'sample_submission.csv')

In [59]:
runEpoch(5,train_dl,test_dl,model_1,loss_fn,optimizer_1)

Epoch No: 1
loss: 2.715724  [   64/42000]
loss: 0.449643  [ 6464/42000]
loss: 0.371747  [12864/42000]
loss: 0.335580  [19264/42000]
loss: 0.284811  [25664/42000]
loss: 0.256936  [32064/42000]
loss: 0.194660  [38464/42000]
Training: Epoch Loss: 0.355768775559607 Epoch Accuracy: 96.51428571428572
--------------------------------------------------
Epoch No: 2
loss: 0.207810  [   64/42000]
loss: 0.206465  [ 6464/42000]
loss: 0.177034  [12864/42000]
loss: 0.150026  [19264/42000]
loss: 0.155498  [25664/42000]
loss: 0.123928  [32064/42000]
loss: 0.088414  [38464/42000]
Training: Epoch Loss: 0.14958731091590155 Epoch Accuracy: 98.67619047619047
--------------------------------------------------
Epoch No: 3
loss: 0.061371  [   64/42000]
loss: 0.078914  [ 6464/42000]
loss: 0.093656  [12864/42000]
loss: 0.101357  [19264/42000]
loss: 0.069342  [25664/42000]
loss: 0.083370  [32064/42000]
loss: 0.086087  [38464/42000]
Training: Epoch Loss: 0.09530063644477299 Epoch Accuracy: 98.95238095238095
------

In [60]:
class DigitRecognizer_BN_after(nn.Module):
  def __init__(self):
    super().__init__()
    self.relu = nn.ReLU()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(8)
    self.mp1 = nn.MaxPool2d(kernel_size=(2,2), stride=2, padding=0)

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=1, padding=0)
    self.bn2 = nn.BatchNorm2d(16)

    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=1, padding=0)
    self.bn3 = nn.BatchNorm2d(32)

    self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=1, padding=0)
    self.bn4 = nn.BatchNorm2d(64)

    self.flatten = nn.Flatten()

    self.lin1 = nn.Linear(in_features=3136, out_features=10)
    self.bn5 = nn.BatchNorm1d(num_features=10)

  def forward(self, x):
# 28
    x = self.conv1(x)
    x = self.relu(x)
    x = self.bn1(x)
# 26
    x = self.mp1(x)
# 13

    x = self.conv2(x)
    x = self.relu(x)
    x = self.bn2(x)
# 11
    x = self.conv3(x)
    x = self.relu(x)
    x = self.bn3(x)
# 9

    x = self.conv4(x)
    x = self.relu(x)
    x = self.bn4(x)
# 7

    x = self.flatten(x)

    x = self.lin1(x)
    output = self.bn5(x)

    return output

In [61]:
model_2 = DigitRecognizer_BN_after().to(device)
loss_fn = nn.CrossEntropyLoss()
lr = 0.001
optimizer_2 = torch.optim.Adam(model_2.parameters(), lr=lr)
n_epochs = 5

In [62]:
runEpoch(5,train_dl,test_dl,model_2,loss_fn,optimizer_2)

Epoch No: 1
loss: 2.750093  [   64/42000]
loss: 0.529578  [ 6464/42000]
loss: 0.369238  [12864/42000]
loss: 0.287249  [19264/42000]
loss: 0.227973  [25664/42000]
loss: 0.203345  [32064/42000]
loss: 0.270999  [38464/42000]
Training: Epoch Loss: 0.3711061352548145 Epoch Accuracy: 96.08095238095238
--------------------------------------------------
Epoch No: 2
loss: 0.201173  [   64/42000]
loss: 0.148688  [ 6464/42000]
loss: 0.122565  [12864/42000]
loss: 0.141465  [19264/42000]
loss: 0.127681  [25664/42000]
loss: 0.101477  [32064/42000]
loss: 0.110354  [38464/42000]
Training: Epoch Loss: 0.15046192565418426 Epoch Accuracy: 98.64761904761905
--------------------------------------------------
Epoch No: 3
loss: 0.082981  [   64/42000]
loss: 0.094173  [ 6464/42000]
loss: 0.074916  [12864/42000]
loss: 0.087308  [19264/42000]
loss: 0.073080  [25664/42000]
loss: 0.094597  [32064/42000]
loss: 0.077946  [38464/42000]
Training: Epoch Loss: 0.09622669954526993 Epoch Accuracy: 98.84761904761905
-----

In [63]:
class DigitRecognizer_multipleMax(nn.Module):
  def __init__(self):
    super().__init__()
    self.relu = nn.ReLU()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(8)
    self.mp1 = nn.MaxPool2d(kernel_size=(2,2), stride=2, padding=0)

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=1, padding=0)
    self.bn2 = nn.BatchNorm2d(16)

    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=1, padding=0)
    self.bn3 = nn.BatchNorm2d(32)

    self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=1, padding=0)
    self.bn4 = nn.BatchNorm2d(64)

    self.flatten = nn.Flatten()

    self.lin1 = nn.Linear(in_features=9*32, out_features=10)
    self.bn5 = nn.BatchNorm1d(num_features=10)

  def forward(self, x):
# 28
    # print(x.shape)
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    # print(x.shape)
# 26
    x = self.mp1(x)
# 13

    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
    # print(x.shape)
# 11
    x = self.mp1(x)
    # print(x.shape)
# 5

    x = self.conv3(x)
    x = self.bn3(x)
    x = self.relu(x)
    # print(x.shape)
# 3
    x = self.flatten(x)

    x = self.lin1(x)
    output = self.bn5(x)

    return output

In [64]:
model_3 = DigitRecognizer_multipleMax().to(device)
loss_fn = nn.CrossEntropyLoss()
lr = 0.001
optimizer_3 = torch.optim.Adam(model_3.parameters(), lr=lr)
n_epochs = 5

In [65]:
runEpoch(5,train_dl,test_dl,model_3,loss_fn,optimizer_3)

Epoch No: 1
loss: 2.799722  [   64/42000]
loss: 0.601445  [ 6464/42000]
loss: 0.521483  [12864/42000]
loss: 0.575596  [19264/42000]
loss: 0.326626  [25664/42000]
loss: 0.303047  [32064/42000]
loss: 0.212942  [38464/42000]
Training: Epoch Loss: 0.44240846858705796 Epoch Accuracy: 93.97142857142858
--------------------------------------------------
Epoch No: 2
loss: 0.328449  [   64/42000]
loss: 0.207988  [ 6464/42000]
loss: 0.226716  [12864/42000]
loss: 0.174810  [19264/42000]
loss: 0.166036  [25664/42000]
loss: 0.193991  [32064/42000]
loss: 0.181299  [38464/42000]
Training: Epoch Loss: 0.18085272239503405 Epoch Accuracy: 98.00952380952381
--------------------------------------------------
Epoch No: 3
loss: 0.121468  [   64/42000]
loss: 0.095561  [ 6464/42000]
loss: 0.132781  [12864/42000]
loss: 0.096810  [19264/42000]
loss: 0.079853  [25664/42000]
loss: 0.103751  [32064/42000]
loss: 0.237287  [38464/42000]
Training: Epoch Loss: 0.11754645533788771 Epoch Accuracy: 98.44761904761906
----

In [66]:
class DigitRecognizer_7replaces3_3(nn.Module):
  def __init__(self):
    super().__init__()
    self.relu = nn.ReLU()
# 28 28
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(8)
# 26 26
    self.mp1 = nn.MaxPool2d(kernel_size=(2,2),stride=2)
# self.mp2 = nn.MaxPool2d(kernel_size=(2,2),stride=2,)
# 13 13

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(7,7), stride=1, padding=0)
    self.bn2 = nn.BatchNorm2d(16)

# 7 7

    self.flatten = nn.Flatten()

    self.lin1 = nn.Linear(in_features=49*16, out_features=10)
    self.bn5 = nn.BatchNorm1d(num_features=10)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.mp1(x)

    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)

    x = self.flatten(x)

    x = self.lin1(x)
    output = self.bn5(x)

    return output

In [67]:
model_4 = DigitRecognizer_7replaces3_3().to(device)
loss_fn = nn.CrossEntropyLoss()
lr = 0.001
optimizer_4 = torch.optim.Adam(model_4.parameters(), lr=lr)
n_epochs = 5

In [68]:
runEpoch(5,train_dl,test_dl,model_4,loss_fn,optimizer=optimizer_4)

Epoch No: 1
loss: 2.752306  [   64/42000]
loss: 0.529922  [ 6464/42000]
loss: 0.429241  [12864/42000]
loss: 0.322398  [19264/42000]
loss: 0.349856  [25664/42000]
loss: 0.306063  [32064/42000]
loss: 0.291411  [38464/42000]
Training: Epoch Loss: 0.42602495627176196 Epoch Accuracy: 94.85238095238095
--------------------------------------------------
Epoch No: 2
loss: 0.224680  [   64/42000]
loss: 0.261949  [ 6464/42000]
loss: 0.204805  [12864/42000]
loss: 0.217725  [19264/42000]
loss: 0.250112  [25664/42000]
loss: 0.189655  [32064/42000]
loss: 0.123136  [38464/42000]
Training: Epoch Loss: 0.18604893887610663 Epoch Accuracy: 97.96190476190476
--------------------------------------------------
Epoch No: 3
loss: 0.104963  [   64/42000]
loss: 0.146142  [ 6464/42000]
loss: 0.096784  [12864/42000]
loss: 0.110188  [19264/42000]
loss: 0.103956  [25664/42000]
loss: 0.123943  [32064/42000]
loss: 0.094289  [38464/42000]
Training: Epoch Loss: 0.11976131957485563 Epoch Accuracy: 98.47857142857143
----

In [69]:
class DigitRecognizer_Stride_2(nn.Module):
  def __init__(self):
    super().__init__()
    self.relu = nn.ReLU()
# 28 28
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(8)
# 26 26

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=2, padding=0)
    self.bn2 = nn.BatchNorm2d(16)

# 12 12
    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=1, padding=0)
    self.bn3 = nn.BatchNorm2d(32)

# 10 10
    self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=1, padding=0)
    self.bn4 = nn.BatchNorm2d(64)
# 8 8

    self.flatten = nn.Flatten()

    self.lin1 = nn.Linear(in_features=64*64, out_features=10)
    self.bn5 = nn.BatchNorm1d(num_features=10)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)

    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)

    x = self.conv3(x)
    x = self.bn3(x)
    x = self.relu(x)

    x = self.conv4(x)
    x = self.bn4(x)
    x = self.relu(x)

    x = self.flatten(x)

    x = self.lin1(x)
    output = self.bn5(x)

    return output
   

In [70]:
model_5 = DigitRecognizer_Stride_2().to(device)
loss_fn = nn.CrossEntropyLoss()
lr = 0.001
optimizer_5 = torch.optim.Adam(model_5.parameters(), lr=lr)


In [71]:
runEpoch(5,train_dl,test_dl,model_5,loss_fn,optimizer=optimizer_5)

Epoch No: 1
loss: 2.929906  [   64/42000]
loss: 0.529168  [ 6464/42000]
loss: 0.409521  [12864/42000]
loss: 0.320103  [19264/42000]
loss: 0.252958  [25664/42000]
loss: 0.282731  [32064/42000]
loss: 0.254412  [38464/42000]
Training: Epoch Loss: 0.3681756896064395 Epoch Accuracy: 96.19047619047619
--------------------------------------------------
Epoch No: 2
loss: 0.150420  [   64/42000]
loss: 0.254112  [ 6464/42000]
loss: 0.171289  [12864/42000]
loss: 0.153892  [19264/42000]
loss: 0.087193  [25664/42000]
loss: 0.123845  [32064/42000]
loss: 0.127872  [38464/42000]
Training: Epoch Loss: 0.1538095245361328 Epoch Accuracy: 98.5547619047619
--------------------------------------------------
Epoch No: 3
loss: 0.155948  [   64/42000]
loss: 0.079777  [ 6464/42000]
loss: 0.091710  [12864/42000]
loss: 0.092513  [19264/42000]
loss: 0.062515  [25664/42000]
loss: 0.069590  [32064/42000]
loss: 0.063268  [38464/42000]
Training: Epoch Loss: 0.09666542614073981 Epoch Accuracy: 98.9547619047619
--------

In [72]:
Metrics_models

Unnamed: 0,Model,Optimizer,Accuracy,Precision weighted,Recall weighted,F1 Score weighted
0,DigitRecognizer_1,Adam,0.965143,0.965148,0.965143,0.965134
1,DigitRecognizer_BN_after,Adam,0.96081,0.960838,0.96081,0.960807
2,DigitRecognizer_multipleMax,Adam,0.939714,0.939685,0.939714,0.939651
3,DigitRecognizer_7replaces3_3,Adam,0.948524,0.948471,0.948524,0.948478
4,DigitRecognizer_Stride_2,Adam,0.961905,0.961881,0.961905,0.961882


In [73]:
class DigitRecognizer_4(nn.Module):
  def __init__(self):
    super().__init__()
    self.relu = nn.ReLU()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(8)
    self.drop = nn.Dropout(p=0.1,inplace=True)
    self.mp1 = nn.MaxPool2d(kernel_size=(2,2),stride=2)

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=1, padding=0)
    self.bn2 = nn.BatchNorm2d(16)

    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=1, padding=0)
    self.bn3 = nn.BatchNorm2d(32)

    self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=1, padding=0)
    self.bn4 = nn.BatchNorm2d(64)
# 8 8

    self.flatten = nn.Flatten()

    self.lin1 = nn.Linear(in_features=49*64, out_features=10)
    self.bn5 = nn.BatchNorm1d(num_features=10)

  def forward(self, x):
# 28 28
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
# 26 26

    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
# 24 24
    x = self.mp1(x)
# 12 12

    x = self.conv3(x)
    x = self.bn3(x)
    x = self.relu(x)
# 10 10

    x = self.conv4(x)
    x = self.bn4(x)
    x = self.relu(x)

    x = self.flatten(x)

    x = self.lin1(x)
    output = self.bn5(x)

    return output

In [74]:
model_6 = DigitRecognizer_4().to(device)
loss_fn = nn.CrossEntropyLoss()
lr = 0.001
optimizer_6 = torch.optim.Adam(model_6.parameters(), lr=lr)


In [75]:
# runEpoch(5,train_dl,test_dl,model_6,loss_fn,optimizer=optimizer_6)
summary(model_6,input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
         MaxPool2d-4            [-1, 8, 13, 13]               0
            Conv2d-5           [-1, 16, 11, 11]           1,168
       BatchNorm2d-6           [-1, 16, 11, 11]              32
              ReLU-7           [-1, 16, 11, 11]               0
            Conv2d-8             [-1, 32, 9, 9]           4,640
       BatchNorm2d-9             [-1, 32, 9, 9]              64
             ReLU-10             [-1, 32, 9, 9]               0
           Conv2d-11             [-1, 64, 7, 7]          18,496
      BatchNorm2d-12             [-1, 64, 7, 7]             128
             ReLU-13             [-1, 64, 7, 7]               0
          Flatten-14                 [-

In [76]:
class DigitRecognizer_BN_Before(nn.Module):
  def __init__(self):
    super().__init__()
    self.relu = nn.ReLU()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(1)
    self.mp1 = nn.MaxPool2d(kernel_size=(2,2), stride=2, padding=0)

    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=1, padding=0)
    self.bn2 = nn.BatchNorm2d(8)

    self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=1, padding=0)
    self.bn3 = nn.BatchNorm2d(16)

    self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=1, padding=0)
    self.bn4 = nn.BatchNorm2d(32)

    self.flatten = nn.Flatten()

    self.lin1 = nn.Linear(in_features=3136, out_features=10)
    self.bn5 = nn.BatchNorm1d(num_features=10)

  def forward(self, x):
# 28
    x = self.bn1(x)
    x = self.conv1(x)
    x = self.relu(x)
# 26
    x = self.mp1(x)
# 13

    x = self.bn2(x)
    x = self.conv2(x)
    x = self.relu(x)
# 11
    x = self.bn3(x)
    x = self.conv3(x)
    x = self.relu(x)
# 9

    x = self.bn4(x)
    x = self.conv4(x)
    x = self.relu(x)
# 7

    x = self.flatten(x)

    x = self.lin1(x)
    output = self.bn5(x)

    return output

In [77]:
model_7 = DigitRecognizer_BN_Before().to(device)
loss_fn = nn.CrossEntropyLoss()
lr = 0.001
optimizer_7 = torch.optim.Adam(model_7.parameters(), lr=lr)


In [78]:
runEpoch(5,train_dl,test_dl,model_7,loss_fn,optimizer=optimizer_7)

Epoch No: 1
loss: 2.633011  [   64/42000]
loss: 0.443395  [ 6464/42000]
loss: 0.328901  [12864/42000]
loss: 0.325078  [19264/42000]
loss: 0.211009  [25664/42000]
loss: 0.280229  [32064/42000]
loss: 0.245642  [38464/42000]
Training: Epoch Loss: 0.3604148984523047 Epoch Accuracy: 96.28809523809524
--------------------------------------------------
Epoch No: 2
loss: 0.142361  [   64/42000]
loss: 0.165094  [ 6464/42000]
loss: 0.158224  [12864/42000]
loss: 0.140088  [19264/42000]
loss: 0.145510  [25664/42000]
loss: 0.113163  [32064/42000]
loss: 0.128847  [38464/42000]
Training: Epoch Loss: 0.1510012861036119 Epoch Accuracy: 98.53571428571428
--------------------------------------------------
Epoch No: 3
loss: 0.077072  [   64/42000]
loss: 0.059543  [ 6464/42000]
loss: 0.082279  [12864/42000]
loss: 0.105914  [19264/42000]
loss: 0.233150  [25664/42000]
loss: 0.072978  [32064/42000]
loss: 0.100398  [38464/42000]
Training: Epoch Loss: 0.09248969374951863 Epoch Accuracy: 98.97142857142858
------

In [79]:
Metrics_models

Unnamed: 0,Model,Optimizer,Accuracy,Precision weighted,Recall weighted,F1 Score weighted
0,DigitRecognizer_1,Adam,0.965143,0.965148,0.965143,0.965134
1,DigitRecognizer_BN_after,Adam,0.96081,0.960838,0.96081,0.960807
2,DigitRecognizer_multipleMax,Adam,0.939714,0.939685,0.939714,0.939651
3,DigitRecognizer_7replaces3_3,Adam,0.948524,0.948471,0.948524,0.948478
4,DigitRecognizer_Stride_2,Adam,0.961905,0.961881,0.961905,0.961882
5,DigitRecognizer_BN_Before,Adam,0.962881,0.962875,0.962881,0.962863
