In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!pip install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1


In [2]:
import os 
import torch
import cv2 as cv
import torchvision
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchsummary import summary
import torchvision.transforms as T
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset

In [3]:
data_path = '/kaggle/input/butterfly-image-classification'

test_path = os.path.join(data_path, 'test')
train_path = os.path.join(data_path, 'train')

train_df = pd.read_csv(os.path.join(data_path, 'Training_set.csv'))
test_df = pd.read_csv(os.path.join(data_path, 'Testing_set.csv'))

In [4]:
# converting type of columns to 'category'
train_df['label'] = train_df['label'].astype('category')

# Assigning numerical values and storing in another column
train_df['encoded_label'] = train_df['label'].cat.codes
train_df.head()

Unnamed: 0,filename,label,encoded_label
0,Image_1.jpg,SOUTHERN DOGFACE,66
1,Image_2.jpg,ADONIS,0
2,Image_3.jpg,BROWN SIPROETA,12
3,Image_4.jpg,MONARCH,44
4,Image_5.jpg,GREEN CELLED CATTLEHEART,33


In [5]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, data_path, transforms=None):
        self.annotations = csv_file
        self.data_path = data_path
        self.transforms = transforms
        self.num_classes = len(self.annotations.label.unique())
        
    def __getitem__(self, i):
        image_path = os.path.join(self.data_path, self.annotations.iloc[i, 0])
        image = read_image(image_path)
        label = F.one_hot(torch.tensor(self.annotations.encoded_label[i], dtype=int), num_classes=self.num_classes)
        label = label.type(torch.float)
        if self.transforms:
            image = self.transforms(image)
        return (image, label)
    
    def __len__(self):
        return len(self.annotations)

In [6]:
def add_gaussian_noise(image):
    mean = np.random.uniform(low=-2.0, high=2.0)
    std = np.random.uniform(low=1.0, high=10.0)
    noise = torch.randn(image.size()) * std + mean
    return image / 255.0 + noise / 255.0

augment = T.Compose([
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(degrees=(-30, 30)),
    T.RandomCrop(size=(224, 224)),
    add_gaussian_noise
])

dataset = CustomDataset(train_df, train_path, augment)

train_size = int(0.85 * len(dataset))
cv_size = len(dataset) - train_size
train_dataset, cv_dataset = torch.utils.data.random_split(dataset, [train_size, cv_size])

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data loader
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
cv_loader = DataLoader(cv_dataset, batch_size=16, shuffle=False)

# models  

## VGG19, ResNet18, MobileNet, GoogLeNet (InceptionNet), EfficientNet


In [8]:
# VGG19: 
class VGG19(nn.Module):
    def __init__(self, in_channels, num_classes, device='cpu'):
        super(VGG19, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels, 64, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(64, 128, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(128,256,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256,256,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256,256,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256,256,3,stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(256,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(512,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512,512,3,stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),

            nn.Flatten(),
            nn.Linear(in_features=512*7*7, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
        ).to(device)
        
    def forward(self, x):
        return self.model(x)
    
vgg19 = VGG19(3, 1000, device)
summary(vgg19, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [13]:
# Load the pretrained VGG19 model
pretrained_vgg19 = models.vgg19(pretrained=True).to(device)
summary(pretrained_vgg19, (3, 224, 224))



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [14]:
# Loop over the layers of the model
for name, layer in list(pretrained_vgg19.named_children()):
    # We can freeze all these layers as they are already pre-trained
    print(name) # just displaying the name of each layer

# We need to classify 75 classes!!
num_classes = 75

# Delete the last layer (classifier)
pretrained_vgg19 = nn.Sequential(
    *list(pretrained_vgg19.children())[:-1],
    nn.Flatten(),
    *list(pretrained_vgg19.children())[-1][:-1],
).to(device)

# Insert a custom linear layer at the end for predicting 75 classes
custom_linear_layer = nn.Linear(in_features=4096, out_features=num_classes).to(device)

pretrained_vgg19.add_module('custom_linear', custom_linear_layer)
summary(pretrained_vgg19, (3, 224, 224))

features
avgpool
classifier
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
           

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(pretrained_vgg19.parameters(), lr=1e-5)

In [16]:
def train_model(train_loader, model, criterion, optimizer, device='cpu', epochs=10):
    model.train()
    for epoch in range(epochs):
        for images, target in train_loader:
            output = model(images.to(device))
            
            optimizer.zero_grad()
            
            loss = criterion(output, target.to(device))
            loss.backward()
            
            optimizer.step()
            
        if epoch % 5 == 0:
            print(f'epoch {epoch}: loss: {loss}')
            
    print(f'training end, loss: {loss}')
    
    return model

In [17]:
new_VGG19 = train_model(train_loader, pretrained_vgg19, criterion, optimizer, device=device, epochs=20)

epoch 0: loss: 1.068445086479187
epoch 5: loss: 0.6951164603233337
epoch 10: loss: 0.015174373053014278
epoch 15: loss: 0.0018582374323159456
training end, loss: 0.09725723415613174


In [18]:
# Saving our trained model


torch.save(pretrained_vgg19.state_dict(), 'pretrained_vgg19.pth')

In [None]:
# ResNet18