In [22]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [23]:
train_data = pd.read_csv('mnist_train.csv')
test_data = pd.read_csv('mnist_test.csv')

In [24]:
train_labels = train_data['label']
train_features = train_data.drop('label', axis=1)

In [25]:
test_labels = test_data['label']
test_features = test_data.drop('label', axis=1)

In [26]:
train_features = np.array(train_features) / 255.0
train_labels = np.array(train_labels)

In [27]:
test_features = np.array(test_features) / 255.0
test_labels = np.array(test_labels)

In [28]:
train_features, val_features, train_labels, val_labels = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

In [29]:
train_features = torch.from_numpy(train_features).type(torch.FloatTensor)
train_labels = torch.from_numpy(train_labels).type(torch.LongTensor)

val_features = torch.from_numpy(val_features).type(torch.FloatTensor)
val_labels = torch.from_numpy(val_labels).type(torch.LongTensor)

test_features = torch.from_numpy(test_features).type(torch.FloatTensor)
test_labels = torch.from_numpy(test_labels).type(torch.LongTensor)

In [30]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x

model = Net()

In [31]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
train_loss_list = []
val_loss_list = []
train_acc_list = []
val_acc_list = []

num_epochs = 10
batch_size = 32

train_dataset = torch.utils.data.TensorDataset(train_features, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = torch.utils.data.TensorDataset(val_features, val_labels)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

for epoch in range(num_epochs):
    train_loss = 0.0
    train_total = 0
    train_correct = 0
    
    val_loss = 0.0
    val_total = 0
    val_correct = 0
    
    model.train()
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    
    model.eval()
    for i, (inputs, labels) in enumerate(val_loader):
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        if np.isnan(loss.item()):
            continue
        
        val_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()
    
    train_loss_list.append(train_loss/len(train_loader))
    val_loss_list.append(val_loss/len(val_loader))
    train_acc_list.append(train_correct/train_total)
    val_acc_list.append(val_correct/val_total)
    
    train_accuracy = train_correct / train_total if train_total != 0 else 0.0
    val_accuracy = val_correct / val_total if val_total != 0 else 0.0

    train_acc_list.append(train_accuracy)
    val_acc_list.append(val_accuracy)
    print('Epoch [{}/{}], Training Loss: {:.4f}, Validation Loss: {:.4f}, Training Accuracy: {:.4f}, Validation Accuracy: {:.4f}'.format(epoch+1, num_epochs, train_loss/len(train_loader), val_loss/len(val_loader), train_correct/train_total, val_correct/val_total))
    

Epoch [1/10], Training Loss: 0.0928, Validation Loss: 0.2407, Training Accuracy: 0.9794, Validation Accuracy: 0.9570
Epoch [2/10], Training Loss: 0.0842, Validation Loss: 0.2038, Training Accuracy: 0.9803, Validation Accuracy: 0.9675
Epoch [3/10], Training Loss: 0.0830, Validation Loss: 0.2576, Training Accuracy: 0.9816, Validation Accuracy: 0.9654
Epoch [4/10], Training Loss: 0.0827, Validation Loss: 0.2751, Training Accuracy: 0.9814, Validation Accuracy: 0.9679


In [None]:
print('Train Loss: {:.4f}, Train Accuracy: {:.4f}'.format(train_loss_list[-1], train_acc_list[-1]))
print('Validation Loss: {:.4f}, Validation Accuracy: {:.4f}'.format(val_loss_list[-1], val_acc_list[-1]))

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10, 10))

axs[0].plot(train_loss_list, label='Training Loss')
axs[0].plot(val_loss_list, label='Validation Loss')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Loss')
axs[0].legend()

axs[1].plot(train_acc_list, label='Training Accuracy')
axs[1].plot(val_acc_list, label='Validation Accuracy')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Accuracy')
axs[1].legend()

plt.show()

In [None]:
class Net1(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.layer_norm1 = nn.LayerNorm(256)
        self.layer_norm2 = nn.LayerNorm(128)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.layer_norm1(self.relu(self.fc1(x)))
        x = self.dropout(x)
        x = self.layer_norm2(self.relu(self.fc2(x)))
        x = self.dropout(x)
        x = self.softmax(self.fc3(x))
        return x

model = Net1()

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
train_loss_list = []
val_loss_list = []
train_acc_list = []
val_acc_list = []

num_epochs = 10
batch_size = 32

train_dataset = torch.utils.data.TensorDataset(train_features, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = torch.utils.data.TensorDataset(val_features, val_labels)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

for epoch in range(num_epochs):
    train_loss = 0.0
    train_total = 0
    train_correct = 0
    
    val_loss = 0.0
    val_total = 0
    val_correct = 0
    
    model.train()
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    
    model.eval()
    for i, (inputs, labels) in enumerate(val_loader):
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        if np.isnan(loss.item()):
            continue
        
        val_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()
    
    train_loss_list.append(train_loss/len(train_loader))
    val_loss_list.append(val_loss/len(val_loader))
    train_acc_list.append(train_correct/train_total)
    val_acc_list.append(val_correct/val_total)
    
    train_accuracy = train_correct / train_total if train_total != 0 else 0.0
    val_accuracy = val_correct / val_total if val_total != 0 else 0.0

    train_acc_list.append(train_accuracy)
    val_acc_list.append(val_accuracy)
    print('Epoch [{}/{}], Training Loss: {:.4f}, Validation Loss: {:.4f}, Training Accuracy: {:.4f}, Validation Accuracy: {:.4f}'.format(epoch+1, num_epochs, train_loss/len(train_loader), val_loss/len(val_loader), train_correct/train_total, val_correct/val_total))

In [None]:
print('Train Loss: {:.4f}, Train Accuracy: {:.4f}'.format(train_loss_list[-1], train_acc_list[-1]))
print('Validation Loss: {:.4f}, Validation Accuracy: {:.4f}'.format(val_loss_list[-1], val_acc_list[-1]))

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10, 10))

axs[0].plot(train_loss_list, label='Training Loss')
axs[0].plot(val_loss_list, label='Validation Loss')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Loss')
axs[0].legend()

axs[1].plot(train_acc_list, label='Training Accuracy')
axs[1].plot(val_acc_list, label='Validation Accuracy')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Accuracy')
axs[1].legend()

plt.show()

In [None]:
class Net2(nn.Module):
    def __init__(self, dropout_prob):
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_prob)
        self.layer_norm1 = nn.LayerNorm(256)
        self.layer_norm2 = nn.LayerNorm(128)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.layer_norm1(self.relu(self.fc1(x)))
        x = self.dropout(x)
        x = self.layer_norm2(self.relu(self.fc2(x)))
        x = self.dropout(x)
        x = self.softmax(self.fc3(x))
        return x

In [None]:
learning_rates = [0.001, 0.0005, 0.0001]
dropout_probs = [0.2, 0.4, 0.6]

In [None]:
for lr in learning_rates:
    for dropout_prob in dropout_probs:
        print('Training model with learning rate={} and dropout probability={}'.format(lr, dropout_prob))
        model = Net2(dropout_prob)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)

        train_loss_list = []
        val_loss_list = []
        train_acc_list = []
        val_acc_list = []

        num_epochs = 10
        batch_size = 32

        train_dataset = torch.utils.data.TensorDataset(train_features, train_labels)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        val_dataset = torch.utils.data.TensorDataset(val_features, val_labels)
        val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        for epoch in range(num_epochs):
            train_loss = 0.0
            train_total = 0
            train_correct = 0

            val_loss = 0.0
            val_total = 0
            val_correct = 0

            model.train()
            for i, (inputs, labels) in enumerate(train_loader):
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()

            model.eval()
            for i, (inputs, labels) in enumerate(val_loader):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if np.isnan(loss.item()):
                    continue

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

            train_loss_list.append(train_loss/len(train_loader))
            val_loss_list.append(val_loss/len(val_loader))
            train_acc_list.append(train_correct/train_total)
            val_acc_list.append(val_correct/val_total)
            
            train_accuracy = train_correct / train_total if train_total != 0 else 0.0
            val_accuracy = val_correct / val_total if val_total != 0 else 0.0

            train_acc_list.append(train_accuracy)
            val_acc_list.append(val_accuracy)
            
            print('Epoch [{}/{}], Training Loss: {:.4f}, Validation Loss: {:.4f}, Training Accuracy: {:.4f}, Validation Accuracy: {:.4f}'.format(epoch+1, num_epochs, train_loss/len(train_loader), val_loss/len(val_loader), train_correct/train_total, val_correct/val_total))

In [None]:
print('Train Loss: {:.4f}, Train Accuracy: {:.4f}'.format(train_loss_list[-1], train_acc_list[-1]))
print('Validation Loss: {:.4f}, Validation Accuracy: {:.4f}'.format(val_loss_list[-1], val_acc_list[-1]))

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10, 10))

axs[0].plot(train_loss_list, label='Training Loss')
axs[0].plot(val_loss_list, label='Validation Loss')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Loss')
axs[0].legend()

axs[1].plot(train_acc_list, label='Training Accuracy')
axs[1].plot(val_acc_list, label='Validation Accuracy')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Accuracy')
axs[1].legend()

plt.show()