# Global Average Pooling(GAP) Layer with simple CNN

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NAVIFOLIO/dl_intro/blob/main/notebooks/vgg_like_with_GAP_MNIST.ipynb)

Try [Global Average Pooling layer(Min Lin, et al, 2014)](https://arxiv.org/abs/1312.4400) in simple CNN.

Global Average Pooling レイヤーを試そう

解説記事（日本語）：[Global Average Pooling(GAP)をCNNで使う](https://navifolio-jp.com/cnn-global-average-pooing)

## MNIST Dataset Loading and Pytorch Dataset creation

MNISTデータセットを読み込み、PytorchのDatasetを作成します。

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

transform_train = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
])

mnist_train = MNIST(root="./input", train=True, download=True, transform=transform_train,)
mnist_test = MNIST(root="./input", train=False, download=True, transform=transform_test,)

batch_size = 100
train_loader = DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=mnist_test, batch_size=batch_size, shuffle=False)

data, lagels = iter(train_loader).__next__()
print(f'number of train data: {len(mnist_train)}, number of test data: {len(mnist_test)}')
print(f'train data shape: {data.size()}, test data shape: {data.size()}')

n_view_image = 25
view_loader = DataLoader(dataset=mnist_train, batch_size=n_view_image, shuffle=True)
classes = np.array(["0","1","2","3","4","5","6","7","8","9"])
view_dataiter = iter(view_loader)
images, view_labels = view_dataiter.__next__()

plt.figure(figsize=(10, 10))
for i in range(n_view_image):
    plt.subplot(5, 5, i+1)
    plt.imshow(np.transpose(images[i], (1, 2, 0)))
    label = classes[view_labels[i]]
    plt.title(label)
    plt.tick_params(labelbottom=False, labelleft=False, bottom=False, left=False)
plt.show()

## Simple CNN model like VGG with GAP layer | GAPレイヤーを使用したモデルの作成

I created VGG-like but very small CNN, and replace Max Pooling layer just before classifier to Global Average Pooling Layer.
Unlike naive VGG, CNN model below include batch normalization layer due to training stability. 

[VGG](https://arxiv.org/abs/1409.1556)を参考に、非常に小さなCNNモデルを作成します。分類器の前に使用される最大値Poolingの代わりに、Global Average Pooling　Layerを採用します。
ただし、素朴なVGGとは異なり、下記のモデルでは学習の安定化のためにバッチ正規化を行なっています。

reference: [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class mini_vgg(nn.Module):
    def __init__(self, init_weights=True):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(256, 256, bias=True)
        self.fc2 = nn.Linear(256, 10, bias=True)
        self.norm1 = nn.BatchNorm2d(64)
        self.norm2 = nn.BatchNorm2d(128)
        self.norm3 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.globalAvgPool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(p=0.5)
        
        if init_weights:
            for module in self.modules():
                if isinstance(module, nn.Conv2d):
                    nn.init.kaiming_normal_(module.weight)
                    if module.bias is not None:
                        nn.init.constant_(module.bias, 0)
                if isinstance(module, nn.Linear):
                    nn.init.kaiming_normal_(module.weight)
                    if module.bias is not None:
                        nn.init.constant_(module.bias, 0)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.norm1(self.conv2(x)))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.norm2(self.conv4(x)))
        x = self.pool(x)
        x = F.relu(self.conv5(x))
        x = F.relu(self.norm3(self.conv6(x)))
        x = self.globalAvgPool(x)
        
        x = x.view(-1, 256)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        return x
    
net = mini_vgg()
net.cuda()

## Model training and record loss and accuracy

モデルの訓練と損失・正解率の記録を行います。

In [None]:
from torch import optim

record_loss_train = []
record_loss_test = []
record_accuracy_train = []
record_accuracy_test = []

loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters())

for epoch in range(10):
    net.train()
    loss_train = 0
    correct_train = 0
    total_train = 0
    print(f'Training of Epoch:{epoch} Start.')
    for j, data in enumerate(train_loader):
        x, labels = data
        x, labels = x.cuda(), labels.cuda()
        y = net(x)
        loss = loss_func(y, labels)
        loss_train += loss.item()
        
        correct_train += (y.argmax(1) == labels).sum().item()
        total_train += len(x)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    loss_train /= (j + 1)
    record_loss_train.append(loss_train)
    accuracy_train = correct_train / total_train
    record_accuracy_train.append(accuracy_train)
    
    net.eval()
    loss_test = 0
    correct_test = 0
    total_test = 0
    print(f'Evaluation of Epoch:{epoch} Start.')
    for j, data in enumerate(test_loader):
        x_test, labels_test = data
        x_test, labels_test = x_test.cuda(), labels_test.cuda()
        y_test = net(x_test)
        loss = loss_func(y_test, labels_test)
        loss_test += loss.item()
        correct_test += (y_test.argmax(1) == labels_test).sum().item()
        total_test += len(x_test)
    
    loss_test /= (j + 1)
    record_loss_test.append(loss_test)
    accuracy_test = correct_test / total_test
    record_accuracy_test.append(accuracy_test)
    
    print(f"Epoch: {epoch}, Loss_Train {loss_train}, Loss_Test: {loss_test}")
    print(f"Epoch: {epoch}, Acc_Train {accuracy_train*100}, Acc_Test: {accuracy_test*100}")

## Graphing loss and accuracy by pyplot

`matplotlib.pyplot`ライブラリで、エポックごとの損失と精度（正解率）の推移をグラフ化します。

In [None]:
plt.plot(range(len(record_loss_train)), record_loss_train, label="Train")
plt.plot(range(len(record_loss_test)), record_loss_test, label="Test")
plt.legend()

plt.xlabel("Epochs")
plt.ylabel("Error")
plt.show()

plt.plot(range(len(record_accuracy_train)), record_accuracy_train, label="Train")
plt.plot(range(len(record_accuracy_test)), record_accuracy_test, label="Test")
plt.legend()

plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()

## Calculating percentage of correct answers for training data.

訓練済みモデルを使用し、訓練データに対しての正解率を百分率で計算し表示します。

In [None]:
correct = 0
total = 0
net.eval()

for i, (x, t) in enumerate(test_loader):
    x, t = x.cuda(), t.cuda()
    y = net(x)
    correct += (y.argmax(1) == t).sum().item()
    total += len(x)

print("Accuracy:", str(correct/total*100) + "%")

## Model with no GAP

GAPを使用しないモデルとの比較を行いたい方のために、以下にGAPを使用しないVGGベースのモデルを掲載します。

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class mini_vgg_with_no_gap(nn.Module):
    def __init__(self, init_weights=True):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(4096, 256, bias=True)
        self.fc2 = nn.Linear(256, 10, bias=True)
        self.norm1 = nn.BatchNorm2d(64)
        self.norm2 = nn.BatchNorm2d(128)
        self.norm3 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.avgPool = nn.AdaptiveAvgPool2d(4)
        self.dropout = nn.Dropout(p=0.5)
        
        if init_weights:
            for module in self.modules():
                if isinstance(module, nn.Conv2d):
                    nn.init.kaiming_normal_(module.weight)
                    if module.bias is not None:
                        nn.init.constant_(module.bias, 0)
                if isinstance(module, nn.Linear):
                    nn.init.kaiming_normal_(module.weight)
                    if module.bias is not None:
                        nn.init.constant_(module.bias, 0)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.norm1(self.conv2(x)))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.norm2(self.conv4(x)))
        x = self.pool(x)
        x = F.relu(self.conv5(x))
        x = F.relu(self.norm3(self.conv6(x)))
        x = self.pool(x)
        x = self.avgPool(x)
        
        x = x.view(-1, 4096)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        return x