In [4]:
!pip install idx2numpy torchvision scikit-learn


Collecting idx2numpy
  Downloading idx2numpy-1.2.3.tar.gz (6.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: idx2numpy
  Building wheel for idx2numpy (setup.py) ... [?25ldone
[?25h  Created wheel for idx2numpy: filename=idx2numpy-1.2.3-py3-none-any.whl size=7904 sha256=1931ef3478e4deec598a2ac645aa4f9be21c9ca053bbcacac433c18c5ebd8924
  Stored in directory: /root/.cache/pip/wheels/e0/f4/e7/643fc5f932ec2ff92997f43f007660feb23f948aa8486f1107
Successfully built idx2numpy
Installing collected packages: idx2numpy
Successfully installed idx2numpy-1.2.3


In [None]:
import torch
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import idx2numpy
import time
from sklearn.metrics import accuracy_score, f1_score

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load MNIST IDX data
train_images_path = "/kaggle/input/mnist-dataset/train-images.idx3-ubyte"
train_labels_path = "/kaggle/input/mnist-dataset/train-labels.idx1-ubyte"
test_images_path = "/kaggle/input/mnist-dataset/t10k-images.idx3-ubyte"
test_labels_path = "/kaggle/input/mnist-dataset/t10k-labels.idx1-ubyte"

train_images = idx2numpy.convert_from_file(train_images_path)
train_labels = idx2numpy.convert_from_file(train_labels_path)
test_images = idx2numpy.convert_from_file(test_images_path)
test_labels = idx2numpy.convert_from_file(test_labels_path)

print("Train images shape:", train_images.shape)  # (60000, 28, 28)

# Normalize the data
train_images = train_images / 255.0
test_images = test_images / 255.0

# Transformations for VGG16 input
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.ToTensor()
])

# Custom Dataset for MNIST
class mnist_dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Prepare DataLoaders
train_dataset = mnist_dataset(train_images, train_labels, transform=transform)
test_dataset = mnist_dataset(test_images, test_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Load pre-trained VGG16 model
vgg16 = models.vgg16(weights=models.VGG16_Weights.DEFAULT)

# Modify VGG16 for single-channel input and 10-class output
vgg16.features[0] = torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
vgg16.classifier[6] = torch.nn.Linear(vgg16.classifier[6].in_features, 10)

vgg16.to(device)

# Define loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(vgg16.parameters(), lr=0.01, momentum=0.9)

# Train the model
num_epochs = 5
start_time = time.time()

for epoch in range(num_epochs):
    vgg16.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = vgg16(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")

training_time = time.time() - start_time
print(f"Training Time: {training_time:.2f} seconds")

# Evaluate the model
vgg16.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = vgg16(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate metrics
accuracy = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average="weighted")

print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")


Using device: cuda
Train images shape: (60000, 28, 28)


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 241MB/s] 
