In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

import torchvision.models as models

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [2]:
class Net(nn.Module):
        def __init__(self):
                super(Net, self).__init__()
                self.conv1 = nn.Conv2d(1, 32, 5)
                self.conv2 = nn.Conv2d(32, 64, 5)

                self.fc1 = nn.Linear(4*4*64, 512)
                self.fc2 = nn.Linear(512, 256)
                self.fc3 = nn.Linear(256, 10)

                self.pool = nn.MaxPool2d(2, stride=2)

                self.drop1 = nn.Dropout(0.25) # Try 0.5
                self.drop2 = nn.Dropout(0.50)

        def forward(self, x):
                x = F.relu(self.conv1(x))
                x = self.pool(x)
                x = self.drop1(x)
                x = F.relu(self.conv2(x))
                x = self.pool(x)
                x = x.reshape(-1, 1024)
                x = F.relu(self.fc1(x))
                x = self.drop2(x)
                x = F.relu(self.fc2(x))
                x = self.fc3(x)
                return x

In [9]:
def train_model(model, criterion, optimixer, train_loader, val_loader, num_epochs):

    train_losses = []
    valid_losses = []
    best_model_loss = 10
    
    for epoch in range(num_epochs):

        # Training
        for i, (data, labels) in enumerate(train_loader):
      
            prediction = model.forward(data)

            train_loss = criterion(prediction, labels)

            train_loss.backward()

            optimixer.step()

            optimixer.zero_grad()
        print(
            f'\rEpoch {epoch+1}, batch {i+1}/{len(train_loader)} - Loss: {train_loss}',"\n"
        )

        train_losses.append(train_loss)
        writer.add_scalar("Loss/train", train_loss, epoch)

        # Validation
        for batch_nr, (data, labels) in enumerate(val_loader):
            prediction = model.forward(data)
            loss_val = criterion(prediction, labels)
            valid_losses.append(loss_val)
        print(f"loss validation: {loss_val}","\n")

        if valid_losses[-1] < best_model_loss:
            print(f"\t > Found a better model, {best_model_loss} -> {valid_losses[-1]}")
            best_model = copy.deepcopy(model)
            best_model_loss = valid_losses[-1]

        writer.add_scalar("Loss/validation", loss_val, epoch)

    print(f"\nBest model loss: {best_model_loss}")
    return best_model, train_losses, valid_losses

def get_accuracy(network, loader):
    
    with torch.no_grad():
        correct = 0
        total = 0
        y_pred = []
        y_true = []

        for x, (data, labels) in enumerate(loader):

            prediction = network.forward(data)

            for i in range(len(data)):

                y_true.append(labels[i].item())
                y_pred.append(torch.argmax(prediction[i]).item())
                if y_true[i] == y_pred[i]:
                    correct += 1        
    
            total += float(len(data))
    
        score = correct/total

        accuracy = score

        return accuracy

In [10]:
LEARNING_RATE = 0.001
EPOCHS = 10
BATCH_SIZE = 1000

transform = transforms.Compose([transforms.ToTensor()])

trainset = torchvision.datasets.MNIST(root='./data', train=True,download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,download=True, transform=transform)

validset, trainset = torch.utils.data.random_split(trainset, [20000, 40000])

trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,shuffle=True)
validloader = torch.utils.data.DataLoader(validset, batch_size=BATCH_SIZE,shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,shuffle=False)

model = Net()

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

trained_model, train_loss, valid_loss = train_model(model, criterion, optimizer, trainloader, validloader, EPOCHS)

test_acc = get_accuracy(trained_model, testloader)
print(test_acc)
writer.flush()

Epoch 1, batch 40/40 - Loss: 0.3381681740283966 

loss validation: 0.2780453562736511 

	 > Found a better model, 10 -> 0.2780453562736511
Epoch 2, batch 40/40 - Loss: 0.15382182598114014 

loss validation: 0.1875322312116623 

	 > Found a better model, 0.2780453562736511 -> 0.1875322312116623
Epoch 3, batch 40/40 - Loss: 0.09274882078170776 

loss validation: 0.10112753510475159 

	 > Found a better model, 0.1875322312116623 -> 0.10112753510475159
Epoch 4, batch 40/40 - Loss: 0.08749064803123474 

loss validation: 0.10950310528278351 

Epoch 5, batch 40/40 - Loss: 0.06577182561159134 

loss validation: 0.07207788527011871 

	 > Found a better model, 0.10112753510475159 -> 0.07207788527011871
Epoch 6, batch 40/40 - Loss: 0.044530414044857025 

loss validation: 0.04476452246308327 

	 > Found a better model, 0.07207788527011871 -> 0.04476452246308327
Epoch 7, batch 40/40 - Loss: 0.04495922848582268 

loss validation: 0.052519895136356354 

Epoch 8, batch 40/40 - Loss: 0.0412859804928302

### Try network on SVHN

In [11]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), # The 3 channels becomes between -1 and 1. Formula: (pixelvalue - mean)/std
                                transforms.Resize(28),
                                transforms.Grayscale(num_output_channels=1)])

dataset = torchvision.datasets.SVHN(root='./data',download=True,transform=transform)

testset, validset, trainset = torch.utils.data.random_split(dataset, [10000,12000,51257])

trainloader_svhn = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,shuffle=True)
validloader_svhn = torch.utils.data.DataLoader(validset, batch_size=BATCH_SIZE,shuffle=True)
testloader_svhn = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,shuffle=False)

# Test the model
test_acc = get_accuracy(trained_model, testloader_svhn)
print(test_acc)
writer.flush()

Using downloaded and verified file: ./data\train_32x32.mat
0.183


### Transfer learning (fine-tune): MNIST -> SVHN

In [12]:
model_finetune = trained_model

LEARNING_RATE = 0.01
EPOCHS = 5

# Define our loss function
criterion = nn.CrossEntropyLoss()

# Define our optimixer
optimizer = torch.optim.Adam(model_finetune.parameters(), lr = LEARNING_RATE)

# Train the model
trained_model_finetuned, train_loss, valid_loss = train_model(model_finetune, criterion, optimizer, trainloader_svhn, validloader_svhn, EPOCHS)

# Test the model
test_acc = get_accuracy(trained_model_finetuned, testloader_svhn)
print(test_acc)
writer.flush()

Epoch 1, batch 52/52 - Loss: 1.1255439519882202 

loss validation: 1.105072259902954 

	 > Found a better model, 10 -> 1.105072259902954
Epoch 2, batch 52/52 - Loss: 0.8605552315711975 

loss validation: 0.912015974521637 

	 > Found a better model, 1.105072259902954 -> 0.912015974521637
Epoch 3, batch 52/52 - Loss: 0.8035498261451721 

loss validation: 0.7923974394798279 

	 > Found a better model, 0.912015974521637 -> 0.7923974394798279
Epoch 4, batch 52/52 - Loss: 0.7846595048904419 

loss validation: 0.7232319712638855 

	 > Found a better model, 0.7923974394798279 -> 0.7232319712638855
Epoch 5, batch 52/52 - Loss: 0.6200501918792725 

loss validation: 0.7335819005966187 


Best model loss: 0.7232319712638855
0.773


### Transfer learning (feature extraction): MNIST -> SVHN

In [15]:
model_featext = trained_model

LEARNING_RATE = 0.01
EPOCHS = 5

# Freeze all layers except the last few layers
for name, param in model_featext.named_parameters():
    if "fc3" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# Define our loss function
criterion = nn.CrossEntropyLoss()

# Define our optimixer
optimizer = torch.optim.Adam(model_featext.parameters(), lr = LEARNING_RATE)

# Train the model
trained_model_featexted, train_loss, valid_loss = train_model(model_featext, criterion, optimizer, trainloader_svhn, validloader_svhn, EPOCHS)

# Test the model
test_acc = get_accuracy(trained_model_featexted, testloader_svhn)
print(test_acc)
writer.flush()

Epoch 1, batch 52/52 - Loss: 0.7887284755706787 

loss validation: 0.593209981918335 

	 > Found a better model, 10 -> 0.593209981918335
Epoch 2, batch 52/52 - Loss: 0.5977726578712463 

loss validation: 0.7530398368835449 

Epoch 3, batch 52/52 - Loss: 0.5757005214691162 

loss validation: 0.6883167624473572 

Epoch 4, batch 52/52 - Loss: 0.4649699032306671 

loss validation: 0.725853443145752 

Epoch 5, batch 52/52 - Loss: 0.6794815063476562 

loss validation: 0.6937739849090576 


Best model loss: 0.593209981918335
0.793
