In [65]:
# Module Import
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

In [4]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else :
    DEVICE = torch.device('cpu')

print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)

Using PyTorch version: 1.8.1  Device: cuda


In [54]:
BATCH_SIZE = 128
EPOCHS = 90

## Dataset Loading (CalTech101)

In [71]:
transform = transforms.Compose([transforms.Resize((227,227)),
                    transforms.ToTensor()])

In [72]:
caltech_dataset = datasets.Caltech101(root = './data/Caltech101',
                                 download = True,
                                 transform = transform)

Files already downloaded and verified


#### Train:Validation:Test = 7:1:2

In [73]:
train_size = int(0.8 * len(caltech_dataset))
test_size = len(caltech_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(caltech_dataset, [train_size, test_size])

train_size = int(0.875 * train_size)
valid_size = len(train_dataset) - train_size
train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [train_size, valid_size])

In [74]:
print('train dataset size : ', len(train_dataset), '(%.2f)' %(len(train_dataset)/len(caltech_dataset)))
print('validation dataset size : ', len(valid_dataset), '(%.2f)' %(len(valid_dataset)/len(caltech_dataset)))
print('test dataset size : ', len(test_dataset), '(%.2f)' %(len(test_dataset)/len(caltech_dataset)))

train dataset size :  6073 (0.70)
validation dataset size :  868 (0.10)
test dataset size :  1736 (0.20)


In [127]:
train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset=valid_dataset, shuffle=False, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=BATCH_SIZE)

## Model Architecture

In [128]:
def conv_output_size(input_size, kernel_size, padding_size, stride):
    output = (input_size - kernel_size + 2 * padding_size) / stride + 1
    return output

def maxpool_output_size(input_size, pooling_size, stride):
    output = (input_size - pooling_size) / stride + 1
    return output

In [155]:
# 1st conv output size
maxpool_output_size(conv_output_size(227, 11, 0, 4),3,2)

27.0

In [130]:
# 2nd conv+maxpooing output size
maxpool_output_size(conv_output_size(27,3,1,1),3,2)

13.0

In [139]:
# 3rd conv output size
conv_output_size(13,3,1,1)

13.0

In [141]:
# 4th conv output size
conv_output_size(13,3,1,1)

13.0

In [160]:
# 5th conv+maxpooing output size
maxpool_output_size(conv_output_size(13,3,1,1),3,2)

6.0

In [162]:
class AlexNet(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.convLayer = nn.Sequential(
            # --- 1st Convolutional layer ---
            nn.Conv2d(in_channel = 3, 
                      out_channel = 96,
                      kernel_size = 11,
                      stride = 4),
            nn.ReLU(),
            nn.LocalResponseNorm(size = 5,
                                alpha = 0.0001,
                                beta = 0.75,
                                k = 2),
            nn.MaxPool2d(kernel_size = 3, stride = 2),
            
            # --- 2nd Convolutional layer ---
            nn.Conv2d(in_channel = 96,
                      out_channel = 256,
                      kernel_size = 5,
                      stride = 1,
                      padding = 1),
            nn.ReLU(),
            nn.LocalResponseNorm(size = 5,
                    alpha = 0.0001,
                    beta = 0.75,
                    k = 2),
            nn.MaxPool2d(kernel_size = 3, stride = 2),
            
            # --- 3rd Convolutional layer ---
            nn.Conv2d(in_channel = 256,
                     out_channel = 384,
                     kernel_size = 3,
                     stride = 1,
                     padding = 1),
            nn.ReLU(),
            
            # --- 4th Convolutional layer ---
            nn.Conv2d(in_channel = 384,
                     out_channel = 384,
                     kernel_size = 3,
                     stride = 1,
                     padding = 1),
            nn.ReLU(),
            
            # --- 5th Convolutional layer ---
            nn.Conv2d(in_channel = 384,
                     out_channel = 256,
                     kernel_size = 3,
                      stride = 1,
                      padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2),
        )
        
        self.fcLayer = nn.Sequential(
            # --- 1st Fully Connected layer ---
            nn.Linear(256*6*6, 4096), # channel * size, output_channel
            nn.ReLU(),
            nn.Dropout(p = 0.5),
            
            # --- 2nd Fully Connected layer ---
            nn.Linear(4096, 4096),
            nn.ReLu(),
            nn.Dropout(p = 0.5),
        )
    
    def forward(self, train):
        output = self.convLayer(train)
        output = self.fcLayer(output)
        
        return output