In [2]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
import random

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset
from torch.nn import functional as F
import torchvision
from torchvision import datasets,transforms
import torchvision.transforms as transforms


train_dataset = torchvision.datasets.MNIST('classifier_data', train=True, download=True)
test_dataset  = torchvision.datasets.MNIST('classifier_data', train=False, download=True)

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

train_dataset.transform=transform
test_dataset.transform=transform

m=len(train_dataset)

In [13]:
class ConvNet(nn.Module): 
    def __init__(self, h1=96): 
        super(ConvNet, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)

        self.drop1 = nn.Dropout2d(p=0.5)
        self.fc1 = nn.Linear(32*7*7, h1)
        self.drop2 = nn.Dropout2d(p=0.1)
        self.fc2 = nn.Linear(h1, 10)
        
    def forward(self, x): 
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        
        x = self.drop1(x)
        x = x.view(x.size(0), -1)

        x = F.relu(self.fc1(x))
        x = self.drop2(x)
        x = self.fc2(x)

        return x
        

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
criterion = nn.CrossEntropyLoss()

dataset = ConcatDataset([train_dataset, test_dataset])

num_epochs=10
batch_size=128
k=10
splits=KFold(n_splits=k,shuffle=True,random_state=42)
foldperf={}

In [15]:
def train_epoch(model, device, dataloader, loss_fn, optimizer): 
    train_loss, train_correct = 0.0, 0
    model.train()
    for images, labels in dataloader: 
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = loss_fn(output, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(output.data, 1)
        train_correct += (predictions == labels).sum().item()
    
    return train_loss, train_correct

def valid_epoch(model, device, dataloader, loss_fn): 

    valid_loss, val_correct = 0.0, 0
    model.eval()
    # Disable gradient calculation during inference (cannot call loss.backward()) - don't want to update the model 
    with torch.no_grad(): 
        for images, labels in dataloader: 
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            loss = loss_fn(output, labels)
            valid_loss += loss.item()*images.size(0)
            scores, predictions = torch.max(output.data, 1)
            val_correct += (predictions == labels).sum().item()

    return valid_loss, val_correct 


In [16]:
# This will take a long time to run 
history = {
    'train_loss': [], 
    'test_loss': [], 
    'train_acc': [], 
    'test_acc': []
}

# For each fold 
for fold, (train_idx, val_idx) in enumerate(splits.split(np.arange(len(dataset)))): 
    print('Fold ', fold+1)

    # Pull from the indices randomly (like subsetting + shuffling)
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(val_idx)

    # Create data loaders 
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

    model = ConvNet()
    model.to(device)

    # I think optimizer stores the reference to model parameters, and is able to update them on each step 
    optimizer = optim.Adam(model.parameters(), lr=0.002)

    # Train the refreshed model for num_epochs 
    for epoch in range(num_epochs): 
        train_loss, train_correct = train_epoch(model, device, train_loader, criterion, optimizer)
        test_loss, test_correct = valid_epoch(model, device, test_loader, criterion)

        # Save the train/test loss/acc for each split 
        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        test_loss = test_loss / len(test_loader.sampler)
        test_acc = test_correct / len(test_loader.sampler) * 100

        print(f"Epoch:{epoch+1}/{num_epochs} AVG Training Loss:{train_loss} AVG Test Loss:{test_loss} AVG Training Acc {train_acc} % AVG Test Acc {test_acc} %")
        
        history['train_loss'].append(train_loss)
        history['test_loss'].append(test_loss)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc)



Fold  1




Epoch:1/10 AVG Training Loss:0.22317838957763853 AVG Test Loss:0.07188646753132344 AVG Training Acc 93.04920634920634 % AVG Test Acc 97.65714285714286 %
Epoch:2/10 AVG Training Loss:0.07348964653317891 AVG Test Loss:0.04854037914105824 AVG Training Acc 97.74920634920635 % AVG Test Acc 98.3 %


KeyboardInterrupt: 

In [None]:
# Overall train/test acc/loss is the average over all splits 
avg_train_loss = np.mean(history['train_loss'])
avg_test_loss = np.mean(history['test_loss'])
avg_train_acc = np.mean(history['train_acc'])
avg_test_acc = np.mean(history['test_acc'])

print('Performance of {} fold cross validation'.format(k))
print("Average Training Loss: {:.4f} \t Average Test Loss: {:.4f} \t Average Training Acc: {:.3f} \t Average Test Acc: {:.3f}".format(avg_train_loss,avg_test_loss,avg_train_acc,avg_test_acc))  
