In [124]:
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import numpy as np
import torch
import copy
import csv
import sys

In [125]:
# Check if GPU is available
if torch.cuda.is_available():
    print("CUDA is available...")
    print("GPU Device Name:", torch.cuda.get_device_name(0))
    print("CUDA Version:", torch.version.cuda)
    print("Number of GPUs:", torch.cuda.device_count())
    
    for i in range(torch.cuda.device_count()):
        print(f"GPU Device {i} Name: {torch.cuda.get_device_name(i)}")
        
    print("Current GPU Device:", torch.cuda.current_device())
else:
    print("CUDA is not available. PyTorch is using the CPU.")

# Set the device to GPU if available, otherwise use CPU
gpu_index = 0  # Change this to the index of the GPU you want to use
device = torch.device(f"cuda:{gpu_index}" if torch.cuda.is_available() else "cpu")

CUDA is available...
GPU Device Name: NVIDIA GeForce RTX 2070
CUDA Version: 12.4
Number of GPUs: 1
GPU Device 0 Name: NVIDIA GeForce RTX 2070
Current GPU Device: 0


In [126]:
# Auxiliary functions
class CustomDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        return image, label

### Model definition

In [127]:
# Model class
class MNISTCNN(nn.Module):
    def __init__(self):
        super(MNISTCNN, self).__init__()
        # Define the layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 28 * 28, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Apply the layers
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = x.view(-1, 64 * 28 * 28)  # Flatten the tensor
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

### Load dataset

In [128]:
# Load training and test data
train_data = np.genfromtxt('../data/train.csv', delimiter=',', skip_header=1)
test_data = np.genfromtxt('../data/test.csv', delimiter=',', skip_header=1)

# Reshape training and test images to 28x28 pixels
train_images = train_data[:, 1:].reshape(train_data.shape[0], 28, 28)
test_images = test_data.reshape(test_data.shape[0], 28, 28)

# Extract train labels from data
train_labels = train_data[:, 0]

# Ensure data is in the form of PyTorch tensors
train_images = torch.tensor(train_images, dtype=torch.float32)
test_images = torch.tensor(test_images, dtype=torch.float32)
train_labels = torch.tensor(train_labels, dtype=torch.long)

### Train the model on the training dataset

In [129]:
# Create train dataset
train_dataset = CustomDataset(train_images, train_labels)

# Split the training dataset into training and validation sets (70%-30%)
train_size = int(0.7 * train_images.shape[0])
val_size = train_images.shape[0] - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Initialize the model, loss function, and optimizer
model = MNISTCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Initialize the model, loss function, and optimizer
model = MNISTCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Initialize variables for early stopping
best_val_loss = float('inf')
patience = 15  # Number of epochs to wait for improvement before stopping
no_improvement_epochs = 0
best_model_wts = copy.deepcopy(model.state_dict())

# Training loop
num_epochs = 250

# Loop through each epoch
for epoch in range(num_epochs):
    # Set the model to training mode
    model.train()
    # Initialize running loss for the epoch
    running_loss = 0.0
    # Loop through each batch of images and labels in the training loader
    for images, labels in train_loader:
        # Prepare the images and labels for the model
        images = images.unsqueeze(1).to(device)
        labels = labels.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Forward pass: compute predicted outputs
        outputs = model(images)
        # Compute the loss
        loss = criterion(outputs, labels)
        # Backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # Perform a single optimization step (parameter update)
        optimizer.step()
        # Update the running loss
        running_loss += loss.item()

    # Print the mean batch loss for the epoch
    print(f'Training - Epoch [{epoch+1}/{num_epochs}], Mean batch loss: {running_loss/len(train_loader):.4f}')


    # Evaluation on the validation set
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0  # Initialize validation loss
    correct = 0  # Initialize the number of correct predictions
    total = 0  # Initialize the total number of predictions
    with torch.no_grad():  # Disable gradient calculation for evaluation
        for images, labels in val_loader:
            # Prepare the images and labels for the model
            images = images.unsqueeze(1).to(device)
            labels = labels.to(device)
            # Forward pass: compute predicted outputs
            outputs = model(images)
            # Compute the loss
            loss = criterion(outputs, labels)
            val_loss += loss.item()  # Update the validation loss
            # Get the predicted labels
            _, predicted = torch.max(outputs.data, dim=1)
            # Update the total number of predictions and correct predictions
            total += 64  # Batch size
            correct += (predicted == labels).sum().item()
    
    # Print the mean batch loss and accuracy for the validation set
    print(f'Validation - Mean batch loss: {val_loss/len(val_loader):.4f}, Accuracy: {100 * correct / total:.2f}%')

    # Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
        best_model_wts = copy.deepcopy(model.state_dict())
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f'Early stopping triggered after {epoch+1} epochs')
        break

# Load the best model weights
model.load_state_dict(best_model_wts)

Training - Epoch [1/250], Mean batch loss: 0.5594
Validation - Mean batch loss: 0.1457, Accuracy: 95.58%
Training - Epoch [2/250], Mean batch loss: 0.0561
Validation - Mean batch loss: 0.1039, Accuracy: 97.18%
Training - Epoch [3/250], Mean batch loss: 0.0292
Validation - Mean batch loss: 0.1438, Accuracy: 96.75%
Training - Epoch [4/250], Mean batch loss: 0.0218
Validation - Mean batch loss: 0.1362, Accuracy: 97.45%
Training - Epoch [5/250], Mean batch loss: 0.0233
Validation - Mean batch loss: 0.1945, Accuracy: 96.53%
Training - Epoch [6/250], Mean batch loss: 0.0239
Validation - Mean batch loss: 0.1221, Accuracy: 97.51%
Training - Epoch [7/250], Mean batch loss: 0.0104
Validation - Mean batch loss: 0.1436, Accuracy: 97.41%
Training - Epoch [8/250], Mean batch loss: 0.0136
Validation - Mean batch loss: 0.1537, Accuracy: 97.39%
Training - Epoch [9/250], Mean batch loss: 0.0198
Validation - Mean batch loss: 0.1267, Accuracy: 97.73%
Training - Epoch [10/250], Mean batch loss: 0.0122
Vali

<All keys matched successfully>

### Evaluate the model on the test dataset and save the predictions to a CSV file

In [130]:
# Initialize an empty list to store predictions
predictions = []

# Loop through each test image
for i in range(test_images.shape[0]):
    # Prepare the image for the model
    image = test_images[i].unsqueeze(0).unsqueeze(0).to(device)
    # Get the model's output
    output = model(image)
    # Get the predicted label
    _, predicted = torch.max(output.data, dim=1)
    # Append the predicted label to the predictions list
    predictions.append(predicted)

# Write predictions to a CSV file
with open('../submission/conv2d_submission.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the header row in the CSV file
    writer.writerow(["ImageId", "Label"])
    # Write each prediction to the CSV file with the corresponding image ID
    for i, label in enumerate(predictions, start=1):
        writer.writerow([i, label.item()])