<a href="https://www.kaggle.com/code/ahmedwael2000/pytorch?scriptVersionId=139142743" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install torchsummary

In [None]:
import os 
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms as T
from torchvision import models
from torchsummary import summary
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib inline

In [None]:
data_path = '/kaggle/input/butterfly-image-classification'

test_path = os.path.join(data_path, 'test')
train_path = os.path.join(data_path, 'train')

df = pd.read_csv(os.path.join(data_path, 'Training_set.csv'))
test_df = pd.read_csv(os.path.join(data_path, 'Testing_set.csv'))

In [None]:
# converting type of columns to 'category'
df['label'] = df['label'].astype('category')

# Assigning numerical values and storing in another column
df['encoded_label'] = df['label'].cat.codes
df.head()

In [None]:
# Splitting the dataframe into 2 dataframes
train_df, cv_df = train_test_split(df, test_size=0.15)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, data_path, transforms=None):
        self.annotations = csv_file
        self.data_path = data_path
        self.transforms = transforms if transforms else T.ToTensor()
        self.num_classes = len(self.annotations.label.unique())
        
    def __getitem__(self, i):
        image_path = os.path.join(self.data_path, self.annotations.iloc[i, 0])
        image = self.transforms(Image.open(image_path))

        label = F.one_hot(torch.tensor(self.annotations.iloc[i, 2], dtype=int), num_classes=self.num_classes)
        label = label.type(torch.float)
        
        return (image, label)
    
    def __len__(self):
        return len(self.annotations)

In [None]:
transform_img = T.Compose([
    T.Resize(224),
    T.CenterCrop(224),
    T.ToTensor(),
])

train_dataset = CustomDataset(train_df, train_path, transform_img)
loader = DataLoader(train_dataset, batch_size=16)

In [None]:
# calculating the mean and std of images for normalization
def batch_mean_and_sd(loader):
    cnt = 0
    fst_moment = torch.empty(3)
    snd_moment = torch.empty(3)

    for images, _ in loader:
        b, c, h, w = images.shape
        nb_pixels = b * h * w
        sum_ = torch.sum(images, dim=[0, 2, 3])
        sum_of_square = torch.sum(images ** 2,
                                  dim=[0, 2, 3])
        fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
        snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels)
        cnt += nb_pixels

    mean, std = fst_moment, torch.sqrt(snd_moment - fst_moment ** 2)        
    return mean, std
  
mean, std = batch_mean_and_sd(loader)
print("mean and std: \n", mean, std)

# Or we can set them manually as:
# mean = (0.485, 0.456, 0.406)
# std = (0.229, 0.224, 0.225)
# These values are copied from Imagenet dataset

In [None]:
# Applying augmentation and preprocessing to the dataset
augment = T.Compose([
    T.RandomHorizontalFlip(p=0.5),
    T.RandomApply([
        T.RandomChoice([
            T.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3.0)),
            T.ElasticTransform(alpha=100.0),
            T.RandomAdjustSharpness(sharpness_factor=3.0)
        ], p=[0.4, 0.3, 0.3]),
        T.ColorJitter(brightness=.3, hue=.1, contrast=.2, saturation=0.2),
    ], p=0.8),
    T.RandomGrayscale(p=0.05),
    T.RandomVerticalFlip(p=0.05),
    T.RandomRotation(degrees=(-30.0, 30.0)),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std)
])

train_dataset = CustomDataset(train_df, train_path, augment)
cv_dataset = CustomDataset(cv_df, train_path, augment)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data Loader
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
cv_loader = DataLoader(cv_dataset, batch_size=16, shuffle=False)

# models  

- VGG19 
- ResNet18
- MobileNet
- GoogLeNet (InceptionNet)
- EfficientNet

## VGG19

In [None]:
class VGG19(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(VGG19, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels, 64, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(64, 128, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(128, 256, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(256, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(512, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Flatten(),
            nn.Linear(in_features=25088, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
        )

    def forward(self, x):
        return self.model(x)

vgg19 = VGG19(3, 1000).to(device)
summary(vgg19, (3, 224, 224))

In [None]:
# Load the pretrained VGG19 model
pretrained_vgg19 = models.vgg19(pretrained=True).to(device)
summary(pretrained_vgg19, (3, 224, 224))

In [None]:
# Loop over the layers of the model
for name, layer in list(pretrained_vgg19.named_children()):
    # We can freeze all these layers as they are already pre-trained
    print(name) # just displaying the name of each layer

# We need to classify 75 classes!!
num_classes = 75

# Delete the last layer (classifier)
pretrained_vgg19 = nn.Sequential(
    *list(pretrained_vgg19.children())[:-1],
    nn.Flatten(),
    *list(pretrained_vgg19.children())[-1][:-1],
).to(device)

# Insert a custom linear layer at the end for predicting 75 classes
custom_linear_layer = nn.Linear(in_features=4096, out_features=num_classes).to(device)

pretrained_vgg19.add_module('custom_linear', custom_linear_layer)
summary(pretrained_vgg19, (3, 224, 224))

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(pretrained_vgg19.parameters(), lr=1e-4)

In [None]:
def train_model(train_loader, model, criterion, optimizer, device='cpu', epochs=10):
    model.train()
    for epoch in range(epochs):
        for images, target in train_loader:
            output = model(images.to(device))
            
            optimizer.zero_grad()
            
            loss = criterion(output, target.to(device))
            loss.backward()
            
            optimizer.step()
            
        if epoch % 5 == 0:
            print(f'epoch {epoch}: loss: {loss}')
            
    print(f'training end, loss: {loss}')
    
    return model

In [None]:
new_vgg19 = train_model(train_loader, pretrained_vgg19, criterion, optimizer, device=device, epochs=20)

In [None]:
# Saving our trained model
torch.save(new_vgg19.state_dict(), 'trained_vgg19.pth')

## ResNet18

In [None]:
class ResBlock(nn.Module):
    def __init__(self, in_n, out_n, stride=1):
        
        super(ResBlock,self).__init__()
        self.conv1 = nn.Conv2d(in_n, out_n, 3, padding=1, stride=stride, bias=False)
        self.conv2 = nn.Conv2d(out_n, out_n, 3, padding=1, bias=False)
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(out_n)
        self.bn2 = nn.BatchNorm2d(out_n)
        
        if stride != 1 or in_n != out_n:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_n, out_n, 1, stride=stride, bias=False),
                nn.BatchNorm2d(out_n)
            )
        else:
            self.shortcut = nn.Identity()
        
    def forward(self, x):
        
        res = self.shortcut(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x += res
        
        return self.relu(x)
        
        
class ResNet18(nn.Module):
    def __init__(self, in_n, num_classes):
        super(ResNet18,self).__init__()
        self.conv1 = nn.Conv2d(in_n, 64, 7, stride=2, padding=3)
        self.pool1 = nn.MaxPool2d(3, stride=2, padding=1)
        self.block1 = ResBlock(64, 64)
        self.block2 = ResBlock(64, 64)
        self.block3 = ResBlock(64, 128, stride=2)
        self.block4 = ResBlock(128, 128)
        self.block5 = ResBlock(128, 256, stride=2)
        self.block6 = ResBlock(256, 256)        
        self.block7 = ResBlock(256, 512, stride=2)
        self.block8 = ResBlock(512, 512)
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512, num_classes)
        
    def forward(self, x):
        return nn.Sequential(
                    self.conv1,
                    self.pool1,
                    self.block1,
                    self.block2,
                    self.block3,
                    self.block4,
                    self.block5,
                    self.block6,
                    self.block7,
                    self.block8,
                    self.avgpool,
                    nn.Flatten(),
                    self.fc,
        )(x)

resnet18 = ResNet18(3, 1000) 
summary(resnet18, (3, 224, 224))

In [None]:
# Load pretrained resnet18
pretrained_resnet18 = models.resnet18(pretrained=True)
summary(pretrained_resnet18, (3,224,224))

In [None]:
num_classes = 75

# Insert a custom linear layer at the end for predicting 75 classes
pretrained_resnet18.fc = nn.Linear(in_features=pretrained_resnet18.fc.in_features,out_features= num_classes)

In [None]:
optimizer = optim.Adam(pretrained_res.parameters(), lr=1e-4)
new_resnet18 = train_model(train_loader, pretrained_resnet18, criterion, optimizer, device=device, epochs=20)

In [None]:
# Saving our trained model
torch.save(new_resnet18.state_dict(), 'trained_resnet18.pth')

## Inception Module (GoogLeNet)

In [None]:
class Conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, padding=0, stride=1):
        super(Conv_block, self).__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

    def forward(self, x):
        return self.conv_block(x)

class Inception_block(nn.Module):
    def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, pool_1x1):
        super(Inception_block, self).__init__()
        self.branch1 = Conv_block(in_channels, out_1x1, kernel_size=1)
        
        self.branch2 = nn.Sequential(
            Conv_block(in_channels, red_3x3, kernel_size=1),
            Conv_block(red_3x3, out_3x3, kernel_size=3, padding=1)
        )
        
        self.branch3 = nn.Sequential(
            Conv_block(in_channels, red_5x5, kernel_size=1),
            Conv_block(red_5x5, out_5x5, kernel_size=5, padding=2)
        )
        
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
            Conv_block(in_channels, pool_1x1, kernel_size=1)
        )
        
    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        
        return torch.concat([branch1, branch2, branch3, branch4], dim=1) # (batch_size, n_channels, height, width) (1, 3, 224, 224)
    

class Inception_Net(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(Inception_Net, self).__init__()
        self.model = nn.Sequential(
            Conv_block(in_channels, 64, kernel_size=7, stride=2, padding=3), # 7x7 block
            nn.MaxPool2d(kernel_size=3, padding=1, stride=2),
            
            Conv_block(64, 192, kernel_size=3, stride=1, padding=1), # 3x3 block
            nn.MaxPool2d(kernel_size=3, padding=1, stride=2),
            
            # in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, pool_1x1
            
            Inception_block(192, 64, 96, 128, 16, 32, 32), # inception3a block
            Inception_block(256, 128, 128, 192, 32, 96, 64), # inception3b block
            
            nn.MaxPool2d(kernel_size=3, padding=1, stride=2),
            
            Inception_block(480, 192, 96, 208, 16, 48, 64), # inception4a block
            Inception_block(512, 160, 112, 224, 24, 64, 64), # inception4b block
            Inception_block(512, 128, 128, 256, 24, 64, 64), # inception4c block
            Inception_block(512, 112, 144, 288, 32, 64, 64), # inception4d block
            Inception_block(528, 256, 160, 320, 32, 128, 128), # inception4e block
            
            nn.MaxPool2d(kernel_size=3, padding=1, stride=2),
            
            Inception_block(832, 256, 160, 320, 32, 128, 128), # inception5a block
            Inception_block(832, 384, 192, 384, 48, 128, 128), # inception5b block
            
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Dropout(p=0.4),
            nn.Flatten(),
            nn.Linear(in_features=1024, out_features=num_classes)
        )
        
    def forward(self, x):
        return self.model(x)

In [None]:
Net = Inception_Net(3, 1000)

summary(Net, (3, 224, 224))

In [None]:
# Load the pretrained VGG19 model
pretrained_inception = models.googlenet(pretrained=True)
summary(pretrained_inception, (3, 224, 224))