In [21]:
from google.colab import drive
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [22]:
print(torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

True


In [23]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
df_train = pd.read_csv("/content/drive/MyDrive/mnist_train.csv")
df_train.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
X_train = df_train.drop('label', axis=1).values
y_train = df_train['label'].values

X_train = torch.tensor(X_train, dtype=torch.float32).view(-1, 1, 28, 28)
y_train = torch.tensor(y_train, dtype=torch.long)

dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

In [26]:
df_test_val = pd.read_csv("/content/drive/MyDrive/mnist_test.csv")
df_test_val.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
df_val, df_test = train_test_split(df_test_val, test_size=0.5, random_state=42)

X_val = df_val.drop('label', axis=1).values
y_val = df_val['label'].values

X_val = torch.tensor(X_val, dtype=torch.float32).view(-1, 1, 28, 28)
y_val = torch.tensor(y_val, dtype=torch.long)

dataset = TensorDataset(X_val, y_val)
val_loader = DataLoader(dataset, batch_size=64, shuffle=False)

In [28]:
X_test = df_test.drop('label', axis=1).values
y_test = df_test['label'].values

X_test = torch.tensor(X_test, dtype=torch.float32).view(-1, 1, 28, 28)
y_test = torch.tensor(y_test, dtype=torch.long)

dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(dataset, batch_size=64, shuffle=False)

In [35]:
class CNN_dropout(nn.Module):
    def __init__(self, rate):
        super().__init__()
        self.dropout = nn.Dropout(rate)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3)
        self.relu1 = nn.ReLU()
        self.maxPool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
        self.relu2 = nn.ReLU()
        self.maxPool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.fc1 = nn.Linear(32 * 5* 5, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self,x):
        x = self.dropout(x)
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxPool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxPool2(x)
        x = torch.flatten(x, 1) # flattening the matrix
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x

In [36]:
def trainCNN(Model):
    model = Model.to(device)
    lossType = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(20):
      running_loss = 0
      #tupple of the inputs and their label
      for i, data in enumerate(train_loader,0):
        optimizer.zero_grad() #resetting gradients
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) #using gpu
        outputs = model(inputs) #forward propagation
        loss = lossType(outputs, labels)
        loss.backward() #backward propagation
        optimizer.step()
        running_loss += loss.item()

        if (i + 1) % 500 == 0 or (i + 1) % 999 == 0:
                print(f"[Epoch {epoch+1}, Batch {i+1}] loss: {running_loss / 100:.4f}")
                running_loss = 0.0
    return model


In [37]:
def evaluateValCNN(model):
    total=0
    correct=0
    with torch.no_grad():
        for data in val_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device) #using gpu
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = (100*correct)/total
    return accuracy

In [38]:
def evaluateTestCNN(model):
    total=0
    correct=0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device) #using gpu
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = (100*correct)/total
    print(f"Accuracy: {accuracy}") #99.18 with momentum = 0.9, without 98.52 no droprate
    return accuracy

In [39]:
rates = [0, 0.1, 0.3, 0.5, 0.7] # 0 drop rate is no reg
def tuneDropOutRates(modelClass):
    accuracies = []
    for Rate in rates:
        modelType = modelClass(Rate)
        model = trainCNN(modelType)
        accuracy = evaluateValCNN(model)
        accuracies.append(accuracy)
    for i in range(len(accuracies)):
        print(f"For rate: {rates[i]} Accuracy = {accuracies[i]}\n")

#For rate: 0 Accuracy = 99.2
# For rate: 0.1 Accuracy = 98.9
# For rate: 0.3 Accuracy = 98.5
# For rate: 0.5 Accuracy = 97.68
# For rate: 0.7 Accuracy = 94.92

In [34]:
tuneDropOutRates(modelClass=CNN_dropout)

[Epoch 1, Batch 500] loss: 1.7959
[Epoch 2, Batch 500] loss: 0.3419
[Epoch 3, Batch 500] loss: 0.2431
[Epoch 4, Batch 500] loss: 0.1833
[Epoch 5, Batch 500] loss: 0.1573
[Epoch 6, Batch 500] loss: 0.1185
[Epoch 7, Batch 500] loss: 0.1041
[Epoch 8, Batch 500] loss: 0.0969
[Epoch 9, Batch 500] loss: 0.0685
[Epoch 10, Batch 500] loss: 0.0662
[Epoch 11, Batch 500] loss: 0.0554
[Epoch 12, Batch 500] loss: 0.0494
[Epoch 13, Batch 500] loss: 0.0468
[Epoch 14, Batch 500] loss: 0.0371
[Epoch 15, Batch 500] loss: 0.0319
[Epoch 16, Batch 500] loss: 0.0206
[Epoch 17, Batch 500] loss: 0.0276
[Epoch 18, Batch 500] loss: 0.0191
[Epoch 19, Batch 500] loss: 0.0112
[Epoch 20, Batch 500] loss: 0.0127
[Epoch 1, Batch 500] loss: 2.4563
[Epoch 2, Batch 500] loss: 0.4137
[Epoch 3, Batch 500] loss: 0.3069
[Epoch 4, Batch 500] loss: 0.2680
[Epoch 5, Batch 500] loss: 0.2343
[Epoch 6, Batch 500] loss: 0.1847
[Epoch 7, Batch 500] loss: 0.1861
[Epoch 8, Batch 500] loss: 0.1720
[Epoch 9, Batch 500] loss: 0.1431
[Ep

In [40]:
class CNN_batchNormalization(nn.Module):
    def __init__(self, rate):
        super().__init__()
        self.dropout = nn.Dropout(rate)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3)
        self.norm1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.maxPool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
        self.norm2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()
        self.maxPool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.fc1 = nn.Linear(32 * 5* 5, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self,x):
        x = self.dropout(x)
        x = self.conv1(x)
        x = self.norm1(x)
        x = self.relu1(x)
        x = self.maxPool1(x)
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.relu2(x)
        x = self.maxPool2(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x

In [41]:
tuneDropOutRates(modelClass=CNN_batchNormalization)
# For rate: 0 Accuracy = 99.12
# For rate: 0.1 Accuracy = 98.94
# For rate: 0.3 Accuracy = 98.74
# For rate: 0.5 Accuracy = 97.62
# For rate: 0.7 Accuracy = 95.84

[Epoch 1, Batch 500] loss: 3.1989
[Epoch 2, Batch 500] loss: 0.5916
[Epoch 3, Batch 500] loss: 0.4171
[Epoch 4, Batch 500] loss: 0.3368
[Epoch 5, Batch 500] loss: 0.2814
[Epoch 6, Batch 500] loss: 0.2532
[Epoch 7, Batch 500] loss: 0.2409
[Epoch 8, Batch 500] loss: 0.2060
[Epoch 9, Batch 500] loss: 0.1973
[Epoch 10, Batch 500] loss: 0.1815
[Epoch 11, Batch 500] loss: 0.1638
[Epoch 12, Batch 500] loss: 0.1551
[Epoch 13, Batch 500] loss: 0.1534
[Epoch 14, Batch 500] loss: 0.1389
[Epoch 15, Batch 500] loss: 0.1285
[Epoch 16, Batch 500] loss: 0.1225
[Epoch 17, Batch 500] loss: 0.1226
[Epoch 18, Batch 500] loss: 0.1078
[Epoch 19, Batch 500] loss: 0.1073
[Epoch 20, Batch 500] loss: 0.0944
[Epoch 1, Batch 500] loss: 3.2264
[Epoch 2, Batch 500] loss: 0.7323
[Epoch 3, Batch 500] loss: 0.5306
[Epoch 4, Batch 500] loss: 0.4505
[Epoch 5, Batch 500] loss: 0.3859
[Epoch 6, Batch 500] loss: 0.3365
[Epoch 7, Batch 500] loss: 0.3093
[Epoch 8, Batch 500] loss: 0.2740
[Epoch 9, Batch 500] loss: 0.2689
[Ep

In [44]:
model_no_normalization = trainCNN(CNN_dropout(rate=0))
model_normalization = trainCNN(CNN_batchNormalization(rate=0))
print(evaluateTestCNN(model_no_normalization))
print(evaluateTestCNN(model_normalization))

[Epoch 1, Batch 500] loss: 1.7280
[Epoch 2, Batch 500] loss: 0.3702
[Epoch 3, Batch 500] loss: 0.2630
[Epoch 4, Batch 500] loss: 0.1947
[Epoch 5, Batch 500] loss: 0.1612
[Epoch 6, Batch 500] loss: 0.1337
[Epoch 7, Batch 500] loss: 0.1099
[Epoch 8, Batch 500] loss: 0.0921
[Epoch 9, Batch 500] loss: 0.0861
[Epoch 10, Batch 500] loss: 0.0557
[Epoch 11, Batch 500] loss: 0.0544
[Epoch 12, Batch 500] loss: 0.0408
[Epoch 13, Batch 500] loss: 0.0353
[Epoch 14, Batch 500] loss: 0.0305
[Epoch 15, Batch 500] loss: 0.0220
[Epoch 16, Batch 500] loss: 0.0257
[Epoch 17, Batch 500] loss: 0.0152
[Epoch 18, Batch 500] loss: 0.0118
[Epoch 19, Batch 500] loss: 0.0051
[Epoch 20, Batch 500] loss: 0.0028
[Epoch 1, Batch 500] loss: 2.9043
[Epoch 2, Batch 500] loss: 0.5946
[Epoch 3, Batch 500] loss: 0.4247
[Epoch 4, Batch 500] loss: 0.3461
[Epoch 5, Batch 500] loss: 0.2975
[Epoch 6, Batch 500] loss: 0.2496
[Epoch 7, Batch 500] loss: 0.2325
[Epoch 8, Batch 500] loss: 0.2051
[Epoch 9, Batch 500] loss: 0.2008
[Ep