# GoogLeNet with MNIST Dataset

`Author: YUAN Yanzhe`

- This notebook is a reproduction of the [GoogLeNet paper](https://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Szegedy_Going_Deeper_With_2015_CVPR_paper.html).
  - If you want to do parameter fine-tuning, setting hyperparameters on the entrance of the model is recommended.
    - e.g. def \_\_init\_\_(param) 
- The code runs on Google Colab, GPU mode

一些细节：
- googlenet的结构，由5个block组成：
  - 首先是一个7\*7的conv block：conv增加feature_num和image size，pool改变image size
  - 然后是一个conv-conv-pool的block：conv增加feature_num，pool改变image size
  - 然后是一个incpt-incpt-pool的block
    - incpt是inception block，每个inception block由四条通路组成在输出处cat（feature_num维度）。
    - incpt的输出输入在image size上不变，改变的是通道数。
  - 然后是一个incpt-incpt-incpt-incpt-pool的结构
  - 最后是一个incpt-incpt-globalAvgPool的结构，最后的feature_num是1024
  - 最后最后是一个flatten+fc：1024-10
- nn.ReLU()是一个层，nn.functional.relu()是一个函数，具体怎么用看个人习惯，若要用层得在init中定义。
- 用(96,96)的image size来训练模型

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/d2dl_pytorch')

In [3]:
# Import Packages
import torch
from torch import nn as nn
from torch import optim as optim
from torch.utils import data as Data

import torchvision
from torchvision import datasets
from torchvision import transforms

import numpy as np
import pandas as pd 
import time

import d2lzh_pytorch as d2dl

print(torch.__version__)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device on:', device)

1.7.0+cu101
device on: cuda


In [4]:
# Hyperparameters
batch_size = 256
num_epochs = 5
learning_rate = 0.001

num_classes = 10

# Load Data
# non-default argument follows default argument, has to define non-default value first
def load_data_from_mnist(batch_size, resize=None, root=''):
    trans = []
    if resize:
        trans.append(transforms.Resize(resize))
    trans.append(transforms.ToTensor())
    transform = transforms.Compose(trans)

    train_data = torchvision.datasets.MNIST(root=root,train=True,transform=transform,download=False)
    test_data = torchvision.datasets.MNIST(root=root,train=False,transform=transform,download=False)
    train_iterator = Data.DataLoader(train_data,batch_size=batch_size,shuffle=True,num_workers=4)
    test_iterator = Data.DataLoader(test_data,batch_size=batch_size,shuffle=True,num_workers=4)

    return train_iterator, test_iterator

def load_data_fashion_mnist(batch_size, resize=None, root=''):
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)

    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_iter, test_iter

#train_iterator, test_iterator = load_data_fashion_mnist(batch_size,resize=96)
train_iterator, test_iterator = load_data_from_mnist(batch_size,resize=96)

# Define Model
class globalAvgPool(nn.Module):
    # the function of global average pooling is to reduce the image size to (1,1),
    # which is convenient to reduce dimension later
    def __init__(self):
        super(globalAvgPool,self).__init__()
    def forward(self, x):
        return nn.functional.avg_pool2d(x,x.size()[2:])

class inceptionBlock(nn.Module):
    # in the inception block, there are 4 paths which use different kind of conv designs
    # as the output of inception block the image size is remained and the feature_num is chages 
    def __init__(self, in_c, c_1, c_2, c_3, c_4):
        super(inceptionBlock,self).__init__()
        # the first path
        self.p_1_1 = nn.Conv2d(in_c,c_1,1)
        # the second path
        self.p_2_1 = nn.Conv2d(in_c,c_2[0],1)
        self.p_2_2 = nn.Conv2d(c_2[0],c_2[1],3,1,1)
        # the third path
        self.p_3_1 = nn.Conv2d(in_c,c_3[0],1)
        self.p_3_2 = nn.Conv2d(c_3[0],c_3[1],5,1,2)
        # the fourth path
        self.p_4_1 = nn.MaxPool2d(3,1,1)
        self.p_4_2 = nn.Conv2d(in_c,c_4,1)

    def forward(self, x):
        p_1 = nn.functional.relu(self.p_1_1(x))
        p_2 = nn.functional.relu(self.p_2_2(nn.functional.relu(self.p_2_1(x))))
        p_3 = nn.functional.relu(self.p_3_2(nn.functional.relu(self.p_3_1(x))))
        p_4 = nn.functional.relu(self.p_4_2(self.p_4_1(x)))
        return torch.cat((p_1,p_2,p_3,p_4),dim=1)

class googLeNet(nn.Module):
    # GoogLeNet contains 5 blocks, in which contains several inception blocks.
    def __init__(self):
        super(googLeNet,self).__init__()
        self.b_1 = nn.Sequential(
            nn.Conv2d(1,64,7,2,3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.b_2 = nn.Sequential(
            nn.Conv2d(64,64,1),
            nn.Conv2d(64,192,3,1,1),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.b_3 = nn.Sequential(
            inceptionBlock(192,64,(96,128),(16,32),32),
            inceptionBlock(256, 128, (128, 192), (32, 96), 64),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.b_4 = nn.Sequential(
            inceptionBlock(480, 192, (96, 208), (16, 48), 64),
            inceptionBlock(512, 160, (112, 224), (24, 64), 64),
            inceptionBlock(512, 128, (128, 256), (24, 64), 64),
            inceptionBlock(512, 112, (144, 288), (32, 64), 64),
            inceptionBlock(528, 256, (160, 320), (32, 128), 128),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.b_5 = nn.Sequential(
            inceptionBlock(832, 256, (160, 320), (32, 128), 128),
            inceptionBlock(832, 384, (192, 384), (48, 128), 128),
            globalAvgPool()
        )
        self.blocks = nn.Sequential(self.b_1,self.b_2,self.b_3,self.b_4,self.b_5)

        self.fc_layer = nn.Sequential(
            # output layer
            nn.Linear(1024,10),
        )
    def forward(self, x):
        y = self.blocks(x)
        y = self.fc_layer(y.view(x.shape[0],-1))  # faltten layer
        return y

net = googLeNet()
print(net)

loss_func = nn.CrossEntropyLoss()
optimizor = optim.Adam(net.parameters(), lr=learning_rate)



googLeNet(
  (b_1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (b_2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (b_3): Sequential(
    (0): inceptionBlock(
      (p_1_1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
      (p_2_1): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
      (p_2_2): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (p_3_1): Conv2d(192, 16, kernel_size=(1, 1), stride=(1, 1))
      (p_3_2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (p_4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
      (p_4_2): Conv2d(192, 32, kernel_size=(1, 1)

In [5]:
# Train Model
def evaluate_model(net, test_iterator, device):
    net = net.to(device)
    print('testing on:', device)
    with torch.no_grad():
        correct,num_exp = 0.0,0
        for X,y in test_iterator:
            if isinstance(net, nn.Module):
                net.eval()  # eval mode will shut off dropout function
                correct += (net(X.to(device)).argmax(1)==y.to(device)).float().sum().cpu().item()
                net.train()
            else: 
                print('is this your self-defined nn module?? we are not considering GPU if so')
                if('is_training' in net.__code__.co_varnames): 
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            num_exp += y.size(0)
     
    return correct/num_exp*100

def train_model(num_epochs, train_iterator, test_iterator, loss_func, optimizor, net, device):
    net = net.to(device)
    print('training on:', device)
    for epoch in range(num_epochs):
        total_loss,total_batch,total_acc,total_num,start_time = 0.0,0,0.0,0,time.time()
        for X, y in train_iterator:
            X = X.to(device)
            y = y.to(device)

            output = net(X)
            loss = loss_func(output,y)
            optimizor.zero_grad()
            loss.backward()
            optimizor.step()
            
            total_loss += loss.cpu().item()
            total_batch += 1
            total_acc += (output.argmax(1)==y).sum().cpu().item()
            total_num += y.size(0)
        
        test_acc = evaluate_model(net, test_iterator, device)
        print('Epoch: {}, Average loss: {:.4f}, Average accuracy: {:.2f}%, Test Accuracy: {:.2f}%, time: {:.1f}sec' \
              .format(epoch+1, total_loss/total_batch, total_acc/total_num*100, test_acc, time.time()-start_time))

train_model(num_epochs,train_iterator,test_iterator,loss_func,optimizor,net,device)
        
# Prediction

training on: cuda
testing on: cuda
Epoch: 1, Average loss: 2.2984, Average accuracy: 12.28%, Test Accuracy: 20.80%, time: 43.3sec
testing on: cuda
Epoch: 2, Average loss: 1.1744, Average accuracy: 56.29%, Test Accuracy: 94.89%, time: 43.9sec
testing on: cuda
Epoch: 3, Average loss: 0.1318, Average accuracy: 96.28%, Test Accuracy: 97.36%, time: 44.7sec
testing on: cuda
Epoch: 4, Average loss: 0.0654, Average accuracy: 98.08%, Test Accuracy: 98.58%, time: 45.5sec
testing on: cuda
Epoch: 5, Average loss: 0.0464, Average accuracy: 98.63%, Test Accuracy: 98.10%, time: 45.8sec
