In [None]:
# Section 0: Import necessary libraries

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from torchvision import transforms
from PIL import Image

from google.colab import drive
drive.mount('/content/gdrive')

!ls -l ./gdrive/MyDrive/Colab\ Notebooks/datasets

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
total 164265
-rw------- 1 root root     23875 Nov  6 01:33 diabetes2.csv
-rw------- 1 root root     26657 Dec 15 03:41 submission.csv
-rw------- 1 root root  33075636 Dec 14 05:43 test_data.csv
-rw------- 1 root root 135047556 Dec 14 05:43 train_data.csv
-rw------- 1 root root     32350 Dec 14 05:43 train_target.csv


In [None]:
# Section 1: Load and preprocess the data

# Load training data
train_data = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/datasets/train_data.csv', header=None)
train_target = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/datasets/train_target.csv', header=None)

# Load testing data
test_data = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/datasets/test_data.csv', header=None)

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, target, transform=None):
        self.data = data
        self.target = target
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, :].values.astype(np.uint8).reshape(48, 48)
        image = Image.fromarray(image)
        target = self.target.iloc[idx, 0]

        if self.transform:
            image = self.transform(image)

        return image, target

# Define transformations for the data
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

# Create datasets and dataloaders
X_train, X_val, y_train, y_val = train_test_split(train_data, train_target, test_size=0.2, random_state=42)

train_dataset = CustomDataset(X_train, y_train, transform=transform)
val_dataset = CustomDataset(X_val, y_val, transform=transform)
test_dataset = CustomDataset(test_data, pd.DataFrame(np.zeros((len(test_data), 1))), transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
# Section 2: Build the neural network model

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(256 * 3 * 3, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),

            nn.Linear(256, 3)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Instantiate the improved model and define loss function and optimizer
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Section 3: Train the model

num_epochs = 15
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Section 4: Evaluate the model on the validation set
    model.eval()
    val_predictions = []
    val_targets = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            val_predictions.extend(predicted.cpu().numpy())
            val_targets.extend(labels.cpu().numpy())

    val_accuracy = accuracy_score(val_targets, val_predictions)
    print(f'Validation Accuracy: {val_accuracy}')

Validation Accuracy: 0.7020092735703246
Validation Accuracy: 0.7394126738794435
Validation Accuracy: 0.7462132921174652
Validation Accuracy: 0.6772797527047913
Validation Accuracy: 0.7489953632148377
Validation Accuracy: 0.7604327666151468
Validation Accuracy: 0.7486862442040185
Validation Accuracy: 0.7394126738794435
Validation Accuracy: 0.7474497681607419
Validation Accuracy: 0.740030911901082
Validation Accuracy: 0.7261205564142195
Validation Accuracy: 0.7493044822256569
Validation Accuracy: 0.7582689335394127
Validation Accuracy: 0.7598145285935085
Validation Accuracy: 0.768160741885626


In [None]:
# Section 5: Make predictions on the test set

model.eval()
test_predictions = []

with torch.no_grad():
    for inputs, _ in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        test_predictions.extend(predicted.cpu().numpy())

In [None]:
# Section 6: Save predictions to a CSV file

submission_df = pd.DataFrame({'Id': range(len(test_predictions)), 'Category': test_predictions})
submission_df.to_csv('/content/gdrive/MyDrive/Colab Notebooks/datasets/submission.csv', index=False)

In [None]:
# Section 7: Print the final accuracy score

print(f'Final Validation Accuracy: {val_accuracy}')

Final Validation Accuracy: 0.768160741885626
