In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import models, transforms
import glob

In [2]:
class DigitSumDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images  # Shape: (N, H, W)
        self.labels = labels  # Shape: (N,)
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [3]:
image_files = sorted(glob.glob("../data/data*.npy"))
label_files = sorted(glob.glob("../data/lab*.npy"))

images = np.concatenate([np.load(f) for f in image_files], axis=0)  # Shape: (N, H, W)
labels = np.concatenate([np.load(f) for f in label_files], axis=0)  # Shape: (N,)

# Split into training and validation sets
split_ratio = 0.8
num_train = int(split_ratio * len(images))
num_val = len(images) - num_train

train_images, val_images = torch.utils.data.random_split(images, [num_train, num_val], generator=torch.Generator().manual_seed(42))
train_labels, val_labels = torch.utils.data.random_split(labels, [num_train, num_val], generator=torch.Generator().manual_seed(42))


In [4]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),  # Resizing for ResNet input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalizing for single-channel images
])

train_dataset = DigitSumDataset(train_images, train_labels, transform=transform)
val_dataset = DigitSumDataset(val_images, val_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [5]:
num_classes = len(np.unique(labels))  # Determine number of classes dynamically

model = models.resnet18(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Change input channels to 1
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Adjust final layer

model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home2/yashas.b/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 48.0MB/s]


In [6]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

        # Validation phase
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Validation Accuracy: {100 * correct / total:.2f}%")

In [13]:
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20)

Epoch 1/20, Loss: 0.25193544194102285
Validation Accuracy: 83.75%
Epoch 2/20, Loss: 0.23420378185311952
Validation Accuracy: 88.90%
Epoch 3/20, Loss: 0.18184679139157137
Validation Accuracy: 90.48%
Epoch 4/20, Loss: 0.2244209268465638
Validation Accuracy: 87.90%
Epoch 5/20, Loss: 0.188535438247025
Validation Accuracy: 87.25%
Epoch 6/20, Loss: 0.17559221915404002
Validation Accuracy: 85.93%
Epoch 7/20, Loss: 0.15354684675484895
Validation Accuracy: 80.77%
Epoch 8/20, Loss: 0.19731664078682662
Validation Accuracy: 82.42%
Epoch 9/20, Loss: 0.16490333901221554
Validation Accuracy: 84.33%
Epoch 10/20, Loss: 0.16352894906078777
Validation Accuracy: 91.65%
Epoch 11/20, Loss: 0.12682882344101867
Validation Accuracy: 89.47%
Epoch 12/20, Loss: 0.17755370432883502
Validation Accuracy: 86.43%
Epoch 13/20, Loss: 0.15048630842442315
Validation Accuracy: 89.87%
Epoch 14/20, Loss: 0.13937090584511558
Validation Accuracy: 89.50%
Epoch 15/20, Loss: 0.13475705725451312
Validation Accuracy: 89.98%
Epoch 1

In [14]:
def evaluate_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return all_predictions, all_labels


In [15]:
test_loader = DataLoader(val_dataset, batch_size=32, shuffle=False) # replace with the test dataset, to evaluate the model
predictions, true_labels = evaluate_model(model, test_loader)

Test Accuracy: 92.93%
