In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
from datetime import datetime
import os


In [2]:

# check CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cpu


In [3]:

# Define the CNN with Residual Blocks
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1) 
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        # Implement Residual Blocks
        self.layer1 = self.make_layer(ResidualBlock, 64, 2, 1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 2, 2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 2, 2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 2, 2)
        self.avg_pool = nn.AvgPool2d(3)
        self.fc = nn.Linear(512, 15)  # 15 output classes

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


In [4]:

# load the dataset from the npz file

data = np.load('data.npz')
X_train = data['X_train'].astype(np.float32)  # Shape: (82875, 784)
y_train = data['y_train'].astype(np.int64)    # Shape: (82875,)
X_test = data['X_test'].astype(np.float32)    # Shape: (14625, 784)


In [5]:
# Normalize the data
X_scaled = X_train/255
X_scaled = X_scaled.reshape(-1, 1, 28, 28)  # Reshape back to (n_samples, channels, height, width)

X_test_scaled = X_test/255
X_test_scaled = X_test_scaled.reshape(-1, 1, 28, 28)

In [6]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_train, test_size=0.2, random_state=42)

In [7]:
# Convert arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train)
y_train_tensor = torch.tensor(y_train)
X_val_tensor = torch.tensor(X_val)
y_val_tensor = torch.tensor(y_val)
X_test_tensor = torch.tensor(X_test_scaled)

In [9]:

# Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=125, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=125, shuffle=False)


In [10]:

# Initialize the model, loss function, and optimizer
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
#optimizer = optim.RMSprop(model.parameters(), lr=0.0005, alpha=0.9, eps=1e-08, weight_decay=0, momentum=0.9, centered=False)
optimizer = optim.Adam(model.parameters(), lr=0.0008)

In [11]:

# Training model
def train(num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device) # Move data to GPU if available
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')


# Validate the model
def validate_model():
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for data, targets in val_loader:
            data, targets = data.to(device), targets.to(device)  # Move data to GPU if available
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = 100 * correct / total
    print(f'Validation Accuracy: {accuracy:.2f}%')
    
# Prediction function
def predict(data_loader):
    model.eval()
    all_preds = []
    with torch.no_grad():
        for data in data_loader:
            data = data.to(device)
            outputs = model(data)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
    return all_preds

In [68]:
# Run the training and validation
train(50)
validate_model()

Epoch 1, Loss: 0.19886234402656555
Epoch 2, Loss: 0.10900476574897766
Epoch 3, Loss: 0.12020643055438995
Epoch 4, Loss: 0.08914970606565475
Epoch 5, Loss: 0.0919119268655777
Epoch 6, Loss: 0.1542442888021469
Epoch 7, Loss: 0.06253644078969955
Epoch 8, Loss: 0.06355825811624527
Epoch 9, Loss: 0.006623979192227125
Epoch 10, Loss: 0.024642109870910645
Epoch 11, Loss: 0.004815190564841032
Epoch 12, Loss: 0.004590204451233149
Epoch 13, Loss: 0.024619080126285553
Epoch 14, Loss: 0.008317760191857815
Epoch 15, Loss: 0.002217015717178583
Epoch 16, Loss: 0.07220067828893661
Epoch 17, Loss: 0.005309847183525562
Epoch 18, Loss: 0.0063355909660458565
Epoch 19, Loss: 0.0409163199365139
Epoch 20, Loss: 0.0014564853627234697
Epoch 21, Loss: 0.021214304491877556
Epoch 22, Loss: 0.007388508878648281
Epoch 23, Loss: 0.0001725315669318661
Epoch 24, Loss: 0.0004041249630972743
Epoch 25, Loss: 0.019093720242381096
Epoch 26, Loss: 0.0001276477996725589
Epoch 27, Loss: 0.002137929666787386
Epoch 28, Loss: 0.

In [69]:

# Predict on the test set
test_loader = DataLoader(X_test_tensor, batch_size=64, shuffle=False)
test_predictions = predict(test_loader)

print(np.shape(test_predictions))


(14625,)


In [70]:
from datetime import datetime
import os

# Assume test_predictions are available from your model's output

# Create the submission directory if it doesn't exist
directory = "submission"
if not os.path.exists(directory):
    os.makedirs(directory)

# Get current date and time
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"cnn_predictions_{current_time}.csv"

# Full path for saving the file
full_path = os.path.join(directory, filename)

# Preparing the data to save
predict_id = np.arange(0, len(X_test))
submission_predictions = np.vstack((predict_id, test_predictions)).T

# Save the predictions to a CSV file
np.savetxt(full_path, submission_predictions, delimiter=",", fmt='%d', header="ID,Label", comments='')

print(f"File saved as {full_path}")

File saved as submission/cnn_predictions_20240415_132153.csv


In [12]:
from torchsummary import summary
input_shape = (1, 28, 28)
summary(model, input_shape)
# print(model)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 28, 28]             640
       BatchNorm2d-2           [-1, 64, 28, 28]             128
              ReLU-3           [-1, 64, 28, 28]               0
            Conv2d-4           [-1, 64, 28, 28]          36,928
       BatchNorm2d-5           [-1, 64, 28, 28]             128
              ReLU-6           [-1, 64, 28, 28]               0
            Conv2d-7           [-1, 64, 28, 28]          36,928
       BatchNorm2d-8           [-1, 64, 28, 28]             128
              ReLU-9           [-1, 64, 28, 28]               0
    ResidualBlock-10           [-1, 64, 28, 28]               0
           Conv2d-11           [-1, 64, 28, 28]          36,928
      BatchNorm2d-12           [-1, 64, 28, 28]             128
             ReLU-13           [-1, 64, 28, 28]               0
           Conv2d-14           [-1, 64,