In [8]:
from google.colab import files, drive
drive.mount("/content/drive", force_remount = True)

Mounted at /content/drive


In [9]:
# create a cnn model for 10 class digit recognition problem

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class MyCNNNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        # define a convolution layer
        self.conv1 = nn.Conv2d(1, 6, 5)
        
        # define a pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # define another convolution layer
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # define a dense fully connected feedforward subnetwork
        self.fc1 = nn.Linear(16*4*4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)        
        
    def forward(self, x):
        # x -> conv1 -> relu -> pooling -> conv2 -> relu -> pooling -> fully connected
        
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.log_softmax(self.fc3(x))
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        
        for s in size:
            num_features *= s
            
        return num_features
        
net = MyCNNNetwork().to(device)
print(net)

MyCNNNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [10]:
# retrieve parameters in the network you defined

params = list(net.parameters())

for i in range(len(params)):
    print(params[i].size())

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 256])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [11]:
# define a customized dataset in torch

import os
import glob
import numpy as np
from skimage import io

from torch.utils.data import Dataset, DataLoader

# override __init__, __len__, and __getitem__ methods

class MNISTDatasets(Dataset):
    def __init__(self, dir, transform = None):
        self.dir = dir
        self.transform = transform
        
    def __len__(self):
        files = glob.glob(self.dir + "/*.jpg")[:100]
        return len(files)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        all_files = glob.glob(self.dir + "/*.jpg")[:100]
        img_fname = os.path.join(self.dir, all_files[idx])
        image = io.imread(img_fname)
        
        digit = int(self.dir.split("/")[-1].strip())
        label = np.array(digit)
        sample = {"image":image, "label":label}
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample

In [12]:
# customerized transformation with several operations:
# Rescale, ToTensor

from skimage import transform
from torchvision import transforms, utils

class Rescale(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size
        
    def __call__(self, sample):
        image, label = sample["image"], sample["label"]
        h, w = image.shape[-2:]
        
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h/w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w/h
        else:
            new_h, new_w = self.output_size
            
        new_h, new_w = int(new_h), int(new_w)
        
        new_img = transform.resize(image, (new_h, new_w))
        
        return {"image":image, "label":label}
    
class ToTensor(object):
    def __call__(self, sample):
        image, label = sample["image"], sample["label"]
        image = image.reshape((1, image.shape[0], image.shape[1]))
        
        return {"image":torch.from_numpy(image), "label":torch.from_numpy(label)}

In [13]:
# create data loader for training and validation

from torch.utils.data import random_split

batch_size = 32

list_datasets = []

for i in range(10):
    cur_ds = MNISTDatasets(dir = "/content/drive/My Drive/MNIST/trainingset/" + str(i), transform = transforms.Compose([Rescale(28), ToTensor()]))
    list_datasets.append(cur_ds)
    
dataset = torch.utils.data.ConcatDataset(list_datasets)
print(len(dataset))

train_size = int(len(dataset) * 0.7)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size, shuffle = True, num_workers = 1)
val_dataloader = DataLoader(val_dataset, batch_size, shuffle = True, num_workers = 1)

1000


In [14]:
# Training

epochs = 5
learning_rate = 1e-3
optimizer = optim.Adam(net.parameters(), lr = learning_rate, weight_decay = 1e-5)
criterion = nn.CrossEntropyLoss()

for epoch in range(epochs):
    net.train()
    running_loss = 0.0
    
    for batch_idx, batch in enumerate(train_dataloader):
        inputs, targets = batch["image"].to(device, dtype = torch.float), batch["label"].to(device, dtype = torch.long)
        optimizer.zero_grad()
        predicted_outputs = net(inputs)
        loss = criterion(predicted_outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if (batch_idx + 1) % 10 == 0:
            print("epoch: %d, batch: %d, training loss: %0.3f" % (epoch + 1, batch_idx + 1, running_loss/10))
            running_loss = 0.0
            
    # Validation
    
    net.eval()
    correct = [0.0] * 10
    total = [0.0] * 10
    
    with torch.no_grad():
        for batch_idx, batch in enumerate(val_dataloader):
            images, labels = batch["image"].to(device, dtype = torch.float), batch["label"].to(device, dtype = torch.long)
            predicted_outputs = net(images)
            _, predicted_labels = torch.max(predicted_outputs, 1)
            c = (predicted_labels == labels)
            
            for i in range(len(labels)):
                label = labels[i]
                correct[label] += c[i].item()
                total[label] += 1
                
    for i in range(10):
        print("\t Validation accuracy for digit %d: %0.2f" % (i, 100 * correct[i]/total[i]))



epoch: 1, batch: 10, training loss: 2.513
epoch: 1, batch: 20, training loss: 1.419
	 Validation accuracy for digit 0: 100.00
	 Validation accuracy for digit 1: 86.67
	 Validation accuracy for digit 2: 75.00
	 Validation accuracy for digit 3: 60.61
	 Validation accuracy for digit 4: 61.54
	 Validation accuracy for digit 5: 55.56
	 Validation accuracy for digit 6: 43.33
	 Validation accuracy for digit 7: 82.14
	 Validation accuracy for digit 8: 73.08
	 Validation accuracy for digit 9: 82.14
epoch: 2, batch: 10, training loss: 0.644
epoch: 2, batch: 20, training loss: 0.517
	 Validation accuracy for digit 0: 88.89
	 Validation accuracy for digit 1: 90.00
	 Validation accuracy for digit 2: 69.44
	 Validation accuracy for digit 3: 84.85
	 Validation accuracy for digit 4: 88.46
	 Validation accuracy for digit 5: 55.56
	 Validation accuracy for digit 6: 96.67
	 Validation accuracy for digit 7: 100.00
	 Validation accuracy for digit 8: 38.46
	 Validation accuracy for digit 9: 85.71
epoch: 3, 