In [3]:
!ls ../data/SVHN_dataset/

test_32x32.mat	train_32x32.mat


In [16]:
import torch
import torchvision
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from scipy.io import loadmat
from matplotlib import pyplot as plt

In [6]:
train = loadmat('../data/SVHN_dataset/train_32x32.mat')
test = loadmat('../data/SVHN_dataset/test_32x32.mat')

In [19]:
np.place(train['y'], train['y']==10, 0)
np.unique(train['y'])

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [209]:
class SVHNDataset(Dataset):
    def __init__(self, mat_file, transform=None, target_transform=None):
        data = loadmat(mat_file)
        self.X = data['X']
        self.y = data['y'].astype(np.int64).squeeze()
        self.X = np.transpose(self.X, (3, 2, 0, 1))
        np.place(self.y, self.y==10, 0)
        
        self.transform = transform
        self.target_transform = target_transform
        
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        img, target = self.X[idx], self.y[idx]
        
        img = np.transpose(img, (1, 2, 0))
        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)
        
        return img, target
    
    def view(self, idx):
        img, target = self.__getitem__(idx)
        img = img/2 + 0.5
        plt.imshow(np.transpose(img, (0, 1, 2)))
        plt.title(f'True Label: {target}')
        return

In [210]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [211]:
train_dataset = SVHNDataset('../data/SVHN_dataset/train_32x32.mat', transform=transform)
test_dataset = SVHNDataset('../data/SVHN_dataset/test_32x32.mat', transform=transform)

In [212]:
batch_size = 32

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

In [213]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(in_channels=6, out_channels=12, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(12),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(24),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.fc = nn.Linear(24*4*4, out_features=10)

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = x.view(-1, 24 * 4 * 4)
        x = self.fc(x)
        return x


net = Net()

In [245]:
def get_accuracy(model, dataloader, device='cpu'):

    model.eval()
    results = []
    for i, (inputs, classes) in enumerate(testloader):
        inputs = inputs.to(DEVICE)
        classes = classes.to(DEVICE)
        outputs = net(inputs)
        _, preds = torch.max(outputs, 1)
        results.extend(classes==preds)
    accuracy = np.array(results).sum()/len(results)
    model.train()
    return accuracy

In [246]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [247]:
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            print(f"Test Accuracy : {get_accuracy(net, testloader)}")
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 0.361
Test Accuracy : 0.8718884449907806
[2,  2000] loss: 0.347
Test Accuracy : 0.8843730792870313
[3,  2000] loss: 0.339
Test Accuracy : 0.8919022741241549
[4,  2000] loss: 0.330
Test Accuracy : 0.8857944068838353
[5,  2000] loss: 0.324
Test Accuracy : 0.8897510755992625
Finished Training


In [None]:
np.arnp