In [1]:
import numpy as np

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

from tqdm import tqdm
from torchsummary import summary

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
NUM_CLASSES = 10
BATCH_SIZE = 64        
# LR = 0.045      
LR = 0.001
EPOCHS = 30        

In [4]:
cifar_train = torchvision.datasets.CIFAR10(root='./train/',
                                           train=True,
                                           download=True
                                          )

cifar_test = torchvision.datasets.CIFAR10(root='./test/',
                                          train=False,
                                          download=True
                                         )

Files already downloaded and verified
Files already downloaded and verified


In [5]:
mean_R, mean_G, mean_B = np.mean(cifar_train.data, axis=(0, 1, 2)) / 255.
std_R, std_G, std_B = np.std(cifar_train.data, axis=(0, 1, 2)) / 255.

In [6]:
train_transform = transforms.Compose([transforms.ToTensor(),
                                      transforms.Resize((299, 299)),
                                      transforms.Normalize((mean_R, mean_G, mean_B), (std_R, std_G, std_B)),
                                      transforms.RandomHorizontalFlip()
                                     ])

test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Resize((299, 299)),
                                     transforms.Normalize((mean_R, mean_G, mean_B), (std_R, std_G, std_B)),
                                    ])

In [7]:
cifar_train.transform = train_transform
cifar_test.transform = test_transform

In [8]:
train_sub = torch.utils.data.Subset(cifar_train, range(5000))
subLoader = torch.utils.data.DataLoader(train_sub, batch_size=BATCH_SIZE)

trainLoader = torch.utils.data.DataLoader(cifar_train,
                                          batch_size=BATCH_SIZE,
                                          shuffle=True)

testLoader = torch.utils.data.DataLoader(cifar_test,
                                        batch_size=BATCH_SIZE,
                                        shuffle=True)

In [9]:
class conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
        super(conv_block, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()

        nn.init.xavier_normal_(self.conv.weight)
    
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        out = self.relu(x)
        return out

In [10]:
class inception_fig5(nn.Module):
    def __init__(self, in_channels, filter_channels):
        super(inception_fig5, self).__init__()
        channels_1, channels_2, channels_3, channels_4 = filter_channels 
        self.branch1 = nn.Sequential(conv_block(in_channels, channels_1[0], kernel_size=1, padding='same'),
                                   conv_block(channels_1[0], channels_1[1], kernel_size=3, padding='same'),
                                   conv_block(channels_1[1], channels_1[2], kernel_size=3, padding='same')
                                   )
        
        self.branch2 = nn.Sequential(conv_block(in_channels, channels_2[0], kernel_size=1, padding='same'),
                                   conv_block(channels_2[0], channels_2[1], kernel_size=3, padding='same')
                                  )
        
        self.branch3 = nn.Sequential(nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
                                   conv_block(in_channels, channels_3, kernel_size=1, padding='same')
                                  )
        
        self.branch4 = conv_block(in_channels, channels_4, kernel_size=1, padding='same')
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_normal_(m.weight)

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out4 = self.branch4(x)
        out = torch.concat([out1, out2, out3, out4], dim=1)
        return out

In [11]:
class inception_fig6(nn.Module):
    def __init__(self, in_channels, filter_channels):
        super(inception_fig6, self).__init__()
        channels_1, channels_2, channels_3, channels_4 = filter_channels 
        self.branch1 = nn.Sequential(conv_block(in_channels, channels_1[0], kernel_size=1, padding='same'),
                                     conv_block(channels_1[0], channels_1[1], kernel_size=(1, 7), padding='same'),
                                     conv_block(channels_1[1], channels_1[2], kernel_size=(7, 1), padding='same'),
                                     conv_block(channels_1[2], channels_1[3], kernel_size=(1, 7), padding='same'),
                                     conv_block(channels_1[3], channels_1[4], kernel_size=(7, 1), padding='same')
                                    )
        
        self.branch2 = nn.Sequential(conv_block(in_channels, channels_2[0], kernel_size=1, padding='same'),
                                   conv_block(channels_2[0], channels_2[1], kernel_size=(1, 7), padding='same'),
                                   conv_block(channels_2[1], channels_2[2], kernel_size=(7, 1), padding='same')
                                  )
        
        self.branch3 = nn.Sequential(nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
                                   conv_block(in_channels, channels_3, kernel_size=1, padding='same')
                                  )
        
        self.branch4 = conv_block(in_channels, channels_4, kernel_size=1, padding='same')
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_normal_(m.weight)

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out4 = self.branch4(x)
        out = torch.concat([out1, out2, out3, out4], dim=1)
        return out

In [12]:
class inception_fig7(nn.Module):
    def __init__(self, in_channels, filter_channels):
        super(inception_fig7, self).__init__()
        channels_12, channels_34, channels_5, channels_6 = filter_channels

        self.branch12 = nn.Sequential(conv_block(in_channels, channels_12[0], kernel_size=1, padding='same'),
                                      conv_block(channels_12[0], channels_12[1], kernel_size=3, padding='same')
                                     )
        self.layer1 = conv_block(channels_12[1], channels_12[2][0], kernel_size=(1, 3), padding='same')
        self.layer2 = conv_block(channels_12[1], channels_12[2][1], kernel_size=(3, 1), padding='same')

        self.branch34 = conv_block(in_channels, channels_34[0], kernel_size=1, padding='same')
        self.layer3 = conv_block(channels_34[0], channels_34[1][0], kernel_size=(1, 3), padding='same')
        self.layer4 = conv_block(channels_34[0], channels_34[1][0], kernel_size=(3, 1), padding='same')

        self.branch5 = nn.Sequential(nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
                                     conv_block(in_channels, channels_5, kernel_size=1, padding='same')
                                    )
 
        self.branch6 = conv_block(in_channels, channels_6, kernel_size=1, padding='same')
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_normal_(m.weight)

    def forward(self, x):
        out12 = self.branch12(x)
        out1 = self.layer1(out12)
        out2 = self.layer2(out12)

        out34 = self.branch34(x)
        out3 = self.layer3(out34)
        out4 = self.layer4(out34)

        out5 = self.branch5(x)
        out6 = self.branch6(x)
        out = torch.concat([out1, out2, out3, out4, out5, out6], dim=1)
        return out

In [13]:
class inception_dim_red(nn.Module):
    def __init__(self, in_channels, filter_channels):
        super(inception_dim_red, self).__init__()
        channels_1, channels_2 = filter_channels
        self.branch1 = nn.Sequential(conv_block(in_channels, channels_1[0], kernel_size=1, padding='same'),
                                     conv_block(channels_1[0], channels_1[1], kernel_size=3, padding='same'),
                                     conv_block(channels_1[1], channels_1[2], kernel_size=3, stride=2)
                                    )

        self.branch2 = nn.Sequential(conv_block(in_channels, channels_2[0], kernel_size=1, padding='same'),
                                     conv_block(channels_2[0], channels_2[1], kernel_size=3, stride=2)
                                    )

        self.branch3 = nn.MaxPool2d(kernel_size=3, stride=2)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_normal_(m.weight)

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out = torch.concat([out1, out2, out3], dim=1)
        return out

In [14]:
class Inception_v2(nn.Module):
    def __init__(self):
        super(Inception_v2, self).__init__()

        self.conv1 = conv_block(3, 32, kernel_size=3, stride=2)
        self.conv2 = conv_block(32, 32, kernel_size=3)
        self.conv3 = conv_block(32, 64, kernel_size=3, padding='same')
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv4 = conv_block(64, 80, kernel_size=3)
        self.conv5 = conv_block(80, 192, kernel_size=3, stride=2)
        self.conv6 = conv_block(192, 288, kernel_size=3, padding='same')

        self.classifier = nn.Sequential(nn.AvgPool2d(kernel_size=(8, 8), stride=1),
                                        nn.Flatten(start_dim=1),
                                        nn.Dropout(p=0.5),
                                        nn.Linear(2048, NUM_CLASSES)
                                       )
        
        self.inception5_1 = inception_fig5(288, [[64, 96, 96], [48, 64], 64, 64])
        self.inception5_2 = inception_fig5(288, [[64, 96, 96], [48, 64], 64, 64])
        self.inception5_3 = inception_fig5(288, [[64, 96, 96], [48, 64], 64, 64])
        self.inecption5_red = inception_dim_red(288, [[64, 96, 96], [256, 384]])

        self.inception6_1 = inception_fig6(768, [[128, 128, 128, 128, 192], [128, 128, 192], 192, 192])
        self.inception6_2 = inception_fig6(768, [[160, 160, 160, 160, 192], [160, 160, 192], 192, 192])
        self.inception6_3 = inception_fig6(768, [[160, 160, 160, 160, 192], [160, 160, 192], 192, 192])
        self.inception6_4 = inception_fig6(768, [[192, 192, 192, 192, 192], [192, 192, 192], 192, 192])
        self.inception6_5 = inception_fig6(768, [[192, 192, 192, 192, 192], [192, 192, 192], 192, 192])
        self.inception6_red = inception_dim_red(768, [[128, 192, 192], [192, 320]])

        self.inception7_1 = inception_fig7(1280, [[448, 384, [384, 384]], [384, [384, 384]], 192, 320])
        self.inception7_2 = inception_fig7(2048, [[448, 384, [384, 384]], [384, [384, 384]], 192, 320])

        self.aux_classifier = nn.Sequential(nn.AvgPool2d(kernel_size=(5, 5), stride=3),
                                            nn.Conv2d(768, 128, kernel_size=(1, 1)),
                                            nn.Flatten(start_dim=1),
                                            nn.Linear(3200, 1024),
                                            nn.Dropout(0.4),
                                            nn.Linear(1024, NUM_CLASSES)
                                           )
        for layer in self.aux_classifier:
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight)

    def forward(self, x, train):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.pool(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)

        x = self.inception5_1(x)
        x = self.inception5_2(x)
        x = self.inception5_3(x)
        x = self.inecption5_red(x)

        x = self.inception6_1(x)
        x = self.inception6_2(x)
        x = self.inception6_3(x)
        x = self.inception6_4(x)
        x_a = self.inception6_5(x)
        x = self.inception6_red(x_a)

        x = self.inception7_1(x)
        x = self.inception7_2(x)

        model_output = self.classifier(x)
        
        # Auxiliary Classifier
        if train:
            aux_output = self.aux_classifier(x_a)
            return model_output, aux_output

        else:
            return model_output

model = Inception_v2().to(device)

In [15]:
smoothing_param = 0.1
def smoothed_categorical_crossentropy(y_true, y_pred):
    smooth_positives = 1 - smoothing_param
    smooth_negatives = smoothing_param / NUM_CLASSES
    y_true = y_true * smooth_positives + smooth_negatives
    loss_fn = nn.CrossEntropyLoss()
    return loss_fn(y_true, y_pred)

In [16]:
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.94)

In [17]:
def train_loop(model, Loader):
    train_loss = 0
    train_acc = 0
    model.train()
    for data, target in tqdm(Loader):
        data, target = data.to(device), target.to(device)
        model_out, aux_out = model(data, train=True)
        main_loss = smoothed_categorical_crossentropy(model_out, target)
        aux_loss = smoothed_categorical_crossentropy(aux_out, target)
        total_loss = main_loss + aux_loss * 0.3

        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        train_loss += total_loss.item() / len(Loader)
        train_acc += (torch.argmax(model_out, dim=1) == target).sum() / len(Loader.dataset)

    return train_loss, train_acc

In [18]:
def test_loop(model, Loader):
    test_loss = 0
    test_acc = 0
    model.eval()
    with torch.no_grad():
        for data, target in tqdm(Loader):
            data, target = data.to(device), target.to(device)
            model_out = model(data, train=False)
            loss = smoothed_categorical_crossentropy(model_out, target)

            test_loss += loss.item() / len(Loader)
            test_acc += (torch.argmax(model_out, dim=1) == target).sum() / len(Loader.dataset)

    return test_loss, test_acc

In [19]:
for epoch in range(1, EPOCHS+1):
    train_loss, train_acc = train_loop(model, subLoader)
    val_loss, val_acc = test_loop(model, testLoader)
    # lr_scheduler.step()

    print(f'\n[[ EPOCH  {epoch:2d} / {EPOCHS} ]]')
    print(f'Train Loss : {train_loss:.4f}, Train Accuracy : {train_acc*100:.2f} %')
    print(f'Valid Loss : {val_loss:.4f}, Valid Accuracy : {val_acc*100:.2f} %\n')

100%|██████████| 79/79 [01:34<00:00,  1.19s/it]
100%|██████████| 157/157 [01:01<00:00,  2.54it/s]



[[ EPOCH   1 / 30 ]]
Train Loss : 3.1704, Train Accuracy : 21.44 %
Valid Loss : 2.1232, Valid Accuracy : 27.88 %



100%|██████████| 79/79 [01:38<00:00,  1.25s/it]
100%|██████████| 157/157 [01:02<00:00,  2.49it/s]



[[ EPOCH   2 / 30 ]]
Train Loss : 2.4729, Train Accuracy : 27.18 %
Valid Loss : 1.7672, Valid Accuracy : 31.90 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH   3 / 30 ]]
Train Loss : 2.2630, Train Accuracy : 32.48 %
Valid Loss : 1.7318, Valid Accuracy : 35.94 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH   4 / 30 ]]
Train Loss : 2.1509, Train Accuracy : 35.46 %
Valid Loss : 1.6872, Valid Accuracy : 38.52 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH   5 / 30 ]]
Train Loss : 2.0457, Train Accuracy : 38.94 %
Valid Loss : 1.7284, Valid Accuracy : 39.98 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH   6 / 30 ]]
Train Loss : 1.9737, Train Accuracy : 41.36 %
Valid Loss : 1.6527, Valid Accuracy : 41.36 %



100%|██████████| 79/79 [01:39<00:00,  1.25s/it]
100%|██████████| 157/157 [01:02<00:00,  2.50it/s]



[[ EPOCH   7 / 30 ]]
Train Loss : 1.8964, Train Accuracy : 44.90 %
Valid Loss : 1.6174, Valid Accuracy : 43.10 %



100%|██████████| 79/79 [01:39<00:00,  1.25s/it]
100%|██████████| 157/157 [01:02<00:00,  2.50it/s]



[[ EPOCH   8 / 30 ]]
Train Loss : 1.8105, Train Accuracy : 47.56 %
Valid Loss : 1.8087, Valid Accuracy : 39.16 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.50it/s]



[[ EPOCH   9 / 30 ]]
Train Loss : 1.7244, Train Accuracy : 50.10 %
Valid Loss : 1.3634, Valid Accuracy : 49.69 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.49it/s]



[[ EPOCH  10 / 30 ]]
Train Loss : 1.6204, Train Accuracy : 53.32 %
Valid Loss : 1.4129, Valid Accuracy : 49.48 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.48it/s]



[[ EPOCH  11 / 30 ]]
Train Loss : 1.5371, Train Accuracy : 56.26 %
Valid Loss : 1.3711, Valid Accuracy : 52.76 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH  12 / 30 ]]
Train Loss : 1.4217, Train Accuracy : 60.20 %
Valid Loss : 1.4631, Valid Accuracy : 49.94 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.48it/s]



[[ EPOCH  13 / 30 ]]
Train Loss : 1.3412, Train Accuracy : 61.90 %
Valid Loss : 1.5013, Valid Accuracy : 50.86 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.48it/s]



[[ EPOCH  14 / 30 ]]
Train Loss : 1.2723, Train Accuracy : 64.58 %
Valid Loss : 1.5426, Valid Accuracy : 50.90 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.48it/s]



[[ EPOCH  15 / 30 ]]
Train Loss : 1.1939, Train Accuracy : 66.52 %
Valid Loss : 1.5888, Valid Accuracy : 50.32 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.48it/s]



[[ EPOCH  16 / 30 ]]
Train Loss : 1.1147, Train Accuracy : 69.16 %
Valid Loss : 1.4689, Valid Accuracy : 53.80 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH  17 / 30 ]]
Train Loss : 1.0643, Train Accuracy : 71.26 %
Valid Loss : 1.4281, Valid Accuracy : 55.78 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.49it/s]



[[ EPOCH  18 / 30 ]]
Train Loss : 0.9904, Train Accuracy : 73.48 %
Valid Loss : 1.3641, Valid Accuracy : 56.53 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.49it/s]



[[ EPOCH  19 / 30 ]]
Train Loss : 0.9260, Train Accuracy : 74.82 %
Valid Loss : 1.2957, Valid Accuracy : 60.65 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH  20 / 30 ]]
Train Loss : 0.8297, Train Accuracy : 77.90 %
Valid Loss : 1.3740, Valid Accuracy : 58.15 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.49it/s]



[[ EPOCH  21 / 30 ]]
Train Loss : 0.7552, Train Accuracy : 79.24 %
Valid Loss : 1.3363, Valid Accuracy : 59.94 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.50it/s]



[[ EPOCH  22 / 30 ]]
Train Loss : 0.7392, Train Accuracy : 80.30 %
Valid Loss : 1.4293, Valid Accuracy : 59.00 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.50it/s]



[[ EPOCH  23 / 30 ]]
Train Loss : 0.6920, Train Accuracy : 81.40 %
Valid Loss : 1.3734, Valid Accuracy : 61.13 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.50it/s]



[[ EPOCH  24 / 30 ]]
Train Loss : 0.6235, Train Accuracy : 83.46 %
Valid Loss : 1.5154, Valid Accuracy : 59.42 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.50it/s]



[[ EPOCH  25 / 30 ]]
Train Loss : 0.5938, Train Accuracy : 83.72 %
Valid Loss : 1.4927, Valid Accuracy : 60.24 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH  26 / 30 ]]
Train Loss : 0.4811, Train Accuracy : 87.24 %
Valid Loss : 1.6425, Valid Accuracy : 60.94 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH  27 / 30 ]]
Train Loss : 0.4747, Train Accuracy : 87.30 %
Valid Loss : 1.5812, Valid Accuracy : 62.17 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH  28 / 30 ]]
Train Loss : 0.4731, Train Accuracy : 87.26 %
Valid Loss : 1.7290, Valid Accuracy : 59.80 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:03<00:00,  2.49it/s]



[[ EPOCH  29 / 30 ]]
Train Loss : 0.4110, Train Accuracy : 89.02 %
Valid Loss : 1.9210, Valid Accuracy : 57.90 %



100%|██████████| 79/79 [01:39<00:00,  1.26s/it]
100%|██████████| 157/157 [01:02<00:00,  2.49it/s]


[[ EPOCH  30 / 30 ]]
Train Loss : 0.3484, Train Accuracy : 90.86 %
Valid Loss : 1.8468, Valid Accuracy : 59.48 %




