In [1]:
import numpy as np
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
k_folds = 5
num_epochs = 5
num_classes = 10
batch_size = 64
learning_rate = 0.001

In [6]:
train_data = torchvision.datasets.MNIST(root='./data/',
                                   train=True,
                                   transform=transforms.ToTensor(),
                                   download=True)
test_data = torchvision.datasets.MNIST(root='./data/',
                                   train=False,
                                   transform=transforms.ToTensor())

In [7]:
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)

In [8]:
class MNIST_layers(nn.Module):
    def __init__(self):
        super(MNIST_layers, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),  # 16@28X28
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 16@14X14
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),  # 32@14X14
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 32@7X7
        )
        self.fc = nn.Linear(32*7*7, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.fc(out.reshape(out.size()[0], -1))
        return F.log_softmax(out, dim=1)

In [9]:
def train(fold, model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0.
    for idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = F.nll_loss(output, labels)
        loss.backward()
        optimizer.step()
        train_loss = loss.item()
        
    print('Train Fold/Epoch: {}/{} \tLoss: {:.6f}'.format(
        fold+1,epoch+1, train_loss))

def test(fold, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set for fold {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        fold,test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


        

In [None]:
history = {'train_loss': [], 'val_loss': []}

for fold, (train_idx, val_idx) in enumerate(kfold.split(np.arange(len(train_data)))):
    print('Fold {}'.format(fold + 1))
    
    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(train_data, batch_size=batch_size, sampler=val_sampler)
    
    model = MNIST_layers().to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        train(fold, model, device, train_loader, optimizer, epoch)
    
    test(fold, model, device, val_loader)

Fold 1
Train Fold/Epoch: 1/1 	Loss: 0.020540
Train Fold/Epoch: 1/2 	Loss: 0.044292
Train Fold/Epoch: 1/3 	Loss: 0.006914
