<a href="https://colab.research.google.com/github/PhChLong/ML-DL/blob/main/GoogLeNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
import torchvision
from torchvision.transforms import v2, ToTensor
from torchvision import datasets

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

## Data

In [3]:
transform_img = v2.Compose(
    [v2.Resize((96, 96)),
    ToTensor(),
    v2.Normalize(
    mean=[0.5],
    std=[0.5])]
)
trainset = datasets.FashionMNIST(root = './data', train = True, download= True, transform = transform_img)
testset = datasets.FashionMNIST(root = './data', train = False, download= True, transform = transform_img)

train_size = int(0.8* len(trainset))
valid_size = len(trainset) - train_size
trainset, validset = random_split(trainset, [train_size, valid_size])

train_loader = DataLoader(trainset, batch_size = 64, shuffle = True)
valid_loader = DataLoader(validset, batch_size = 64, shuffle = False)
test_loader = DataLoader(testset, batch_size = 64, shuffle = False)

100%|██████████| 26.4M/26.4M [00:02<00:00, 12.1MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 205kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.35MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 13.6MB/s]


## GoogLeNet

### Inception Block

In [4]:
class InceptionBlock(nn.Module):
    def __init__(self, in_chan, out1, out2, out3, out4):
        super().__init__()
        self.b1 = nn.Sequential(
            nn.Conv2d(in_chan, out1, kernel_size=1), nn.BatchNorm2d(out1), nn.ReLU()
        )
        self.b2 = nn.Sequential(
            nn.Conv2d(in_chan, out2[0], kernel_size=1), nn.BatchNorm2d(out2[0]), nn.ReLU(),
            nn.Conv2d(out2[0],out2[1], kernel_size= 3, padding= 1), nn.BatchNorm2d(out2[1]), nn.ReLU()
        )
        self.b3 = nn.Sequential(
            nn.Conv2d(in_chan, out3[0], kernel_size=1) ,nn.BatchNorm2d(out3[0]), nn.ReLU(),
            nn.Conv2d(out3[0],out3[1], kernel_size= 5, padding= 2),nn.BatchNorm2d(out3[1]), nn.ReLU()
        )
        self.b4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride = 1, padding = 1),
            nn.Conv2d(in_chan, out4, kernel_size=1),nn.BatchNorm2d(out4), nn.ReLU()
        )
        self.out_channel = out1 + out2[1] + out3[1] + out4
    def forward(self, x):
        x1 = self.b1(x)
        x2 = self.b2(x)
        x3 = self.b3(x)
        x4 = self.b4(x)
        return torch.concat((x1, x2, x3, x4), dim = 1)

### Model

In [5]:
class GoogLeNet(nn.Module):
    def __init__(self, classes = 10):
        super().__init__()
        self.b1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        self.b2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=1),  nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        self.b3 = nn.Sequential(InceptionBlock(192, 64, (96, 128), (16, 32), 32),
                         InceptionBlock(256, 128, (128, 192), (32, 96), 64),
                         nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        ## num_output_channel = 256 + 480 = 736
        self.b4 = nn.Sequential(InceptionBlock(480, 192, (96, 208), (16, 48), 64),
                         InceptionBlock(512, 160, (112, 224), (24, 64), 64),
                         InceptionBlock(512, 128, (128, 256), (24, 64), 64),
                         InceptionBlock(512, 112, (144, 288), (32, 64), 64),
                         InceptionBlock(528, 256, (160, 320), (32, 128), 128),
                         nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        self.b5 = nn.Sequential(InceptionBlock(832, 256, (160, 320), (32, 128), 128),
                         InceptionBlock(832, 384, (192, 384), (48, 128), 128),
                         nn.AdaptiveAvgPool2d((1,1)), nn.Flatten())
        self.net = nn.Sequential(
            self.b1, self.b2, self.b3, self.b4, self.b5, nn.Linear(1024, 10)
        )
    def forward(self, x):
        return self.net(x)

## Train

In [6]:
def train_dataloader_model(model, train_loader, valid_loader, optim, loss_fn, device = None, max_epoches = 100, diff = 1e-3, patience = 10):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_losses = []
    valid_loss_min = float("inf")
    patience_counter = 0
    for epoch in range(max_epoches):
        epoch_loss = 0
        model.train()
        for X_train, y_train in train_loader:
            X_train, y_train = X_train.to(device), y_train.to(device)

            y_train_pred = model(X_train)
            loss = loss_fn(y_train_pred, y_train)
            optim.zero_grad()
            loss.backward()
            optim.step()

            epoch_loss += loss.item()
        train_losses.append(epoch_loss/len(train_loader))

        model.eval()
        if epoch > 0:
            if abs(train_losses[-2] - train_losses[-1]) < diff:
                print(f"epoch: {epoch}\t|| loss: {epoch_loss/len(train_loader):.4f}")
                print("break due to model converges")
                return
        with torch.no_grad():
            epoch_valid_loss = 0
            for X_valid, y_valid in valid_loader:
                X_valid, y_valid = X_valid.to(device), y_valid.to(device)
                y_valid_pred = model(X_valid)
                c_loss = loss_fn(y_valid_pred, y_valid)
                epoch_valid_loss += c_loss.item()
            epoch_valid_loss /= len(valid_loader)
        if epoch_valid_loss < valid_loss_min - diff:
            valid_loss_min = epoch_valid_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"epoch: {epoch} || train_loss: {epoch_loss/len(train_loader):.4f} || valid_loss: {epoch_valid_loss:.4f}")
            print(f"Early stopping - no improvement for {patience} epochs")
            return
        # if epoch % 2 == 0:
        print(f"epoch: {epoch}\t|| loss: {epoch_loss/len(train_loader):.4f}\t || valid_loss: {epoch_valid_loss:.4f}")

In [7]:
model = GoogLeNet().to(device)
optim = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [8]:
train_dataloader_model(model, train_loader, valid_loader, optim= optim, loss_fn= loss_fn)

epoch: 0	|| loss: 0.4640	 || valid_loss: 0.3926
epoch: 1	|| loss: 0.3058	 || valid_loss: 0.3373
epoch: 2	|| loss: 0.2651	 || valid_loss: 0.2681
epoch: 3	|| loss: 0.2296	 || valid_loss: 0.4940
epoch: 4	|| loss: 0.2135	 || valid_loss: 0.3094
epoch: 5	|| loss: 0.1908	 || valid_loss: 0.3371
epoch: 6	|| loss: 0.1792	 || valid_loss: 0.2292
epoch: 7	|| loss: 0.1646	 || valid_loss: 0.2218
epoch: 8	|| loss: 0.1484	 || valid_loss: 0.2263
epoch: 9	|| loss: 0.1336	 || valid_loss: 0.2458
epoch: 10	|| loss: 0.1184	 || valid_loss: 0.2304
epoch: 11	|| loss: 0.1081	 || valid_loss: 0.2234
epoch: 12	|| loss: 0.0933	 || valid_loss: 0.2526
epoch: 13	|| loss: 0.0859	 || valid_loss: 0.2374
epoch: 14	|| loss: 0.0717	 || valid_loss: 0.2508
epoch: 15	|| loss: 0.0615	 || valid_loss: 0.2552
epoch: 16	|| loss: 0.0566	 || valid_loss: 0.2615


KeyboardInterrupt: 

In [10]:
model.eval()
with torch.no_grad():
    epoch_test_loss = 0
    for X_test, y_test in test_loader:
        X_test, y_test = X_test.to(device), y_test.to(device)
        y_test_pred = model(X_test)
        c_loss = loss_fn(y_test_pred, y_test)
        epoch_test_loss += c_loss.item()
    epoch_test_loss /= len(test_loader)
    print(epoch_test_loss)

0.29748661571722124
