In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from matplotlib_inline import backend_inline
backend_inline.set_matplotlib_formats('svg')

In [4]:
# Dataset

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.1307, 0.3081)
])

# Download datasets
train_Data = datasets.MNIST(
    root = 'D:/Jupyter/DNN/dataset/mnist',
    train = True,
    download = True,
    transform = transform
)


test_Data = datasets.MNIST(
    root = 'D:/Jupyter/DNN/dataset/mnist',
    train = False,
    download = True,
    transform = transform
)

In [5]:
# batch loader

train_loader = DataLoader(train_Data, shuffle=True, batch_size=64)
test_loader = DataLoader(test_Data, shuffle=False, batch_size=64)

In [6]:
class Inception(nn.Module):
    def __init__(self, in_channels):   # in_channels: input parameters
        super(Inception, self).__init__()
        
        # 1st branch
        # one convolution layer, no need for nn.Sequential
        self.branch1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        
        # 2nd branch
        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.Conv2d(16, 24, kernel_size=3, padding=1),
            nn.Conv2d(24, 24, kernel_size=3, padding=1)
        )
        
        # 3rd branch
        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.Conv2d(16, 24, kernel_size=5, padding=2)
        )
        
        # 4th branch 
        self.branch4 = nn.Conv2d(in_channels, 24, kernel_size=1)
        
    def forward(self,x):
        # Parallel
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        
        outputs = [branch1, branch2, branch3, branch4]
        return torch.cat(outputs, 1)

# branch 2 and 3 with additional 1x1 filter, to reduce no. of branch and model complexity
# no.channel 64  =>  16

In [7]:
class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet, self).__init__()
        self.net=nn.Sequential(
            nn.Conv2d(1,10,kernel_size=5), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            Inception(in_channels=10), # small net within big net
            
            nn.Conv2d(88,20,kernel_size=5),nn.ReLU(), # Sum of each branch's output channel = 16+24+24+24=88
            nn.MaxPool2d(kernel_size=2, stride=2), #[20, 8, 8] => [20, 4, 4]
            Inception(in_channels=20), # Output[88, 4, 4]
            
            nn.Flatten(), # 88 * 4 * 4 = 1408
            nn.Linear(1408,10)
        )
    
    def forward(self, x):
        # forward propagation
        y = self.net(x) 
        return y       

In [9]:
# Examine Net Structure
X = torch.rand(size= (1, 1, 28, 28))
for layer in GoogLeNet().net:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape: \t', X.shape)

Conv2d output shape: 	 torch.Size([1, 10, 24, 24])
ReLU output shape: 	 torch.Size([1, 10, 24, 24])
MaxPool2d output shape: 	 torch.Size([1, 10, 12, 12])
Inception output shape: 	 torch.Size([1, 88, 12, 12])
Conv2d output shape: 	 torch.Size([1, 20, 8, 8])
ReLU output shape: 	 torch.Size([1, 20, 8, 8])
MaxPool2d output shape: 	 torch.Size([1, 20, 4, 4])
Inception output shape: 	 torch.Size([1, 88, 4, 4])
Flatten output shape: 	 torch.Size([1, 1408])
Linear output shape: 	 torch.Size([1, 10])


In [10]:
model = GoogLeNet().to('cuda:0')
loss_fn = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=learning_rate
)

In [None]:
# Training Network
epochs = 10
losses = []    

for epoch in range(epochs):
    for(x,y) in train_loader:                
        x, y = x.to('cuda:0'), y.to('cuda:0') 
        Pred = model(x)
        loss = loss_fn(Pred, y)
        losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Fig = plt.figure()
plt.plot(range(len(losses)), losses)
plt.show()

In [None]:
correct = 0
total = 0

with torch.no_grad():     
    for(x, y) in test_loader:  
        x, y = x.to('cuda:0'), y.to('cuda:0')
        Pred = model(x)       
        
        _, predicted = torch.max(Pred.data, dim = 1)
        correct += torch.sum( (predicted == y) )
        total += y.size(0)

print(f"Test Accuracy：{100*correct/total}%")