In [21]:
import numpy as np
import struct
from array import array
from os.path import join


class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        if labels_filepath is not None:
            with open(labels_filepath, 'rb') as file:
                magic, size = struct.unpack(">II", file.read(8))
                if magic != 2049:
                    raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
                labels = array("B", file.read())

        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            

        return images, labels

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, None)
        return (x_train, y_train), x_test        

input_path = './MNIST_Data_Set'
training_images_filepath = join(input_path, 'train-images.idx3-ubyte')
training_labels_filepath = join(input_path, 'train-labels.idx1-ubyte')
test_images_filepath = join(input_path, 't10k-images.idx3-ubyte')

mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath)
(x_train, y_train), x_test = mnist_dataloader.load_data()

for i in range(len(x_train)):
    flattened_img = np.array(x_train[i]).flatten()
    x_train[i] = flattened_img


In [22]:
np.set_printoptions(linewidth=115)

print(x_train[0])

print(str(y_train[0]))

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   3  18  18  18 126 136 175  26 166 255 247 127   0   0   0   0
   0   0   0   0   0   0   0   0  30  36  94 154 170 253 253 253 253 253 225 172 253 242 195  64   0   0   0   0
   0   0   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251  93  82  82  56  39   0   0   0   0   0
   0   0   0   0   0   0   0  18 219 253 253 253 253 253 198 182 247 241   0   0   0   0   0   0

In [23]:
from sklearn.model_selection import train_test_split

# Split the data into training and test sets
x_train = np.array(x_train)
y_train = np.array(y_train)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) # 80% training and 20% test


In [24]:
import optuna
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Subset
import torch.nn.functional as F
from sklearn.model_selection import KFold
import numpy as np
import torch.optim as optim

# Check if GPU is available
print(torch.cuda.is_available())
print(torch.version.cuda)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters: learning rate, number of hidden layers, number of neurons in each hidden layer, activation function, batch size, number of epochs, optimizer

# Define CNN model
class CNN(nn.Module):
    def __init__(self, num_conv_layers, num_filters, num_fc_layers, num_neurons, dropout_conv, dropout_fc):
        super(CNN, self).__init__()
        self.convs = nn.ModuleList()
        self.convs.append(nn.Conv2d(1, num_filters[0], kernel_size=3))
        for i in range(1, num_conv_layers):
            self.convs.append(nn.Conv2d(num_filters[i-1], num_filters[i], kernel_size=3))
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout_conv = nn.Dropout2d(p=dropout_conv)
        
        conv_output_size = 28
        for _ in range(num_conv_layers):
            conv_output_size = (conv_output_size - 2) // 2  # Kernel size = 3, stride = 1, padding = 0, followed by pooling
        self.fc_input_size = num_filters[-1] * conv_output_size * conv_output_size
        
        self.fcs = nn.ModuleList()
        self.fcs.append(nn.Linear(self.fc_input_size, num_neurons[0]))
        for i in range(1, num_fc_layers):
            self.fcs.append(nn.Linear(num_neurons[i-1], num_neurons[i]))
        self.dropout_fc = nn.Dropout(p=dropout_fc)
        self.output = nn.Linear(num_neurons[-1], 10)
    
    def forward(self, x):
        for conv in self.convs:
            x = F.relu(conv(x))
            x = self.pool(x)
        x = self.dropout_conv(x)
        x = x.view(-1, self.fc_input_size)
        for fc in self.fcs:
            x = F.relu(fc(x))
            x = self.dropout_fc(x)
        x = self.output(x)
        return x

# Optuna Stuff for Hyperparameter Optimization
"""
def objective(trial):
    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 3)
    num_fc_layers = trial.suggest_int('num_fc_layers', 1, 3)
    num_neurons = [trial.suggest_int(f'num_neurons_{i}', 16, 128) for i in range(num_fc_layers)]
    dropout_conv = trial.suggest_float('dropout_conv', 0.1, 0.5)
    dropout_fc = trial.suggest_float('dropout_fc', 0.1, 0.5)
    lr = trial.suggest_float('lr', 1e-5, 1e-1)
    # batch_size = trial.suggest_int('batch_size', 16, 128)
    epochs = trial.suggest_int('epochs', 10, 100)
    num_filters = [trial.suggest_int(f'num_filters_{i}', 16, 64) for i in range(num_conv_layers)]
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])

    # Create the model
    model = CNN(num_conv_layers, num_filters, num_fc_layers, num_neurons, dropout_conv, dropout_fc).to(device)
    
    # Choose the optimizer
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    
    # Loss function
    criterion = nn.CrossEntropyLoss()
    
    # Training loop
    num_epochs = epochs
    for epoch in range(num_epochs):
        model.train()
        for batch in train_loader:
            x_batch, y_batch = batch
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch in val_loader:
                x_batch, y_batch = batch
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                outputs = model(x_batch)
                _, predicted = torch.max(outputs.data, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()
        
        accuracy = correct / total
        
        # Report intermediate results to Optuna
        trial.report(accuracy, epoch)
        
        # Prune trial if needed
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return accuracy

# Load your data
x_train_tensor = torch.tensor(x_train, dtype=torch.float32).view(-1, 1, 28, 28)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
x_val_tensor = torch.tensor(x_val, dtype=torch.float32).view(-1, 1, 28, 28)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=40, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=40, shuffle=False)

# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize', pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=50)
"""

True
12.6


"\ndef objective(trial):\n    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 3)\n    num_fc_layers = trial.suggest_int('num_fc_layers', 1, 3)\n    num_neurons = [trial.suggest_int(f'num_neurons_{i}', 16, 128) for i in range(num_fc_layers)]\n    dropout_conv = trial.suggest_float('dropout_conv', 0.1, 0.5)\n    dropout_fc = trial.suggest_float('dropout_fc', 0.1, 0.5)\n    lr = trial.suggest_float('lr', 1e-5, 1e-1)\n    # batch_size = trial.suggest_int('batch_size', 16, 128)\n    epochs = trial.suggest_int('epochs', 10, 100)\n    num_filters = [trial.suggest_int(f'num_filters_{i}', 16, 64) for i in range(num_conv_layers)]\n    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])\n\n    # Create the model\n    model = CNN(num_conv_layers, num_filters, num_fc_layers, num_neurons, dropout_conv, dropout_fc).to(device)\n    \n    # Choose the optimizer\n    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)\n    \n    # Loss function

In [25]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Reshape the input tensors to [batch_size, channels, height, width]
x_train_tensor = torch.tensor(x_train, dtype=torch.float32).view(-1, 1, 28, 28)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
x_val_tensor = torch.tensor(x_val, dtype=torch.float32).view(-1, 1, 28, 28)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Creation of the TensorDatasets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

# Creation of the DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [26]:
from torch import optim
from torch.optim.lr_scheduler import StepLR

model = CNN(2, [16, 36], 2, [64, 121], 0.20556773023364286, 0.2641216757343955).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.0009269629482047373)


num_epochs = 42

# Training the CNN model

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Validation Accuracy: {100 * correct / total}%')

Epoch 1, Loss: 0.37186744501938424
Validation Accuracy: 97.225%
Epoch 2, Loss: 0.14943034331128002
Validation Accuracy: 98.25%
Epoch 3, Loss: 0.11492025888214509
Validation Accuracy: 98.275%
Epoch 4, Loss: 0.09604761921490232
Validation Accuracy: 98.48333333333333%
Epoch 5, Loss: 0.0835720376946653
Validation Accuracy: 98.25833333333334%
Epoch 6, Loss: 0.08002242047099087
Validation Accuracy: 98.61666666666666%
Epoch 7, Loss: 0.07092469543455324
Validation Accuracy: 98.44166666666666%
Epoch 8, Loss: 0.07089685162327562
Validation Accuracy: 98.65833333333333%
Epoch 9, Loss: 0.061762784351827575
Validation Accuracy: 98.64166666666667%
Epoch 10, Loss: 0.06129868561947175
Validation Accuracy: 98.74166666666666%
Epoch 11, Loss: 0.061145114667451705
Validation Accuracy: 98.65%
Epoch 12, Loss: 0.05638694184244863
Validation Accuracy: 98.975%
Epoch 13, Loss: 0.054107237003646634
Validation Accuracy: 98.78333333333333%
Epoch 14, Loss: 0.05234982679816312
Validation Accuracy: 98.95%
Epoch 15, Lo

In [27]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Convert test data to PyTorch tensor
x_test_tensor = torch.tensor(x_test, dtype=torch.float32).to(device)

# Create a TensorDataset and DataLoader for the test data
test_dataset = TensorDataset(x_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Set the model to evaluation mode
model.eval()

predictions = []

# Disable gradient computation
with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs[0]  # Extract the inputs from the tuple
        inputs = inputs.view(inputs.size(0), 1, 28, 28)  # Reshape the inputs to (batch_size, 1, 28, 28)
        inputs = inputs.to(device)  # Move inputs to the same device as the model
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy())

# Convert predictions to a numpy array
predictions = np.array(predictions)

print(predictions)

[7 2 1 ... 4 5 6]


In [28]:
# Create CSV file for submission to Kaggle
import pandas as pd

# Create a DataFrame with the `Id` and `Label` columns
df = pd.DataFrame({'ID': np.arange(0, len(predictions)), 'Label': predictions})

# Save the DataFrame to a CSV file
df.to_csv('submission.csv', index=False)

# Display the first few rows of the DataFrame
print(df.head())

   ID  Label
0   0      7
1   1      2
2   2      1
3   3      0
4   4      4
