# Aerial Cactus Identification - Modeling 

Tutorial Link -> https://www.kaggle.com/code/werooring/ch11-modeling

- Performance Improvement
    - Perform various image transformation -> define image transformer
    - Create deeper CNN
    - Use better optimizer -> instead of standard one
    - Increase epoch number 

**Fix Seed Value**

In [1]:
import torch # pytorch
import random
import numpy as np
import os

# fix seed value
seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) 
torch.backends.cudnn.deterministic = True 
torch.backends.cudnn.benchmark = False    
torch.backends.cudnn.enabled = False      

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
device

device(type='cuda')

In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/aerial-cactus-identification/sample_submission.csv
/kaggle/input/aerial-cactus-identification/train.zip
/kaggle/input/aerial-cactus-identification/test.zip
/kaggle/input/aerial-cactus-identification/train.csv


In [5]:
data_path = '/kaggle/input/aerial-cactus-identification/'

labels = pd.read_csv(data_path + 'train.csv') # train data 
submission = pd.read_csv(data_path + 'sample_submission.csv')

In [6]:
# unzip the zip file 

from zipfile import ZipFile

# unzip training img data
with ZipFile(data_path + 'train.zip') as zipper:
    zipper.extractall()

# unzip test img data 
with ZipFile(data_path + 'test.zip') as zipper:
    zipper.extractall()

**Divide Train / Validation Data**

In [7]:
from sklearn.model_selection import train_test_split

train, valid = train_test_split(labels, 
                               test_size = 0.1, # ratio; train : valid = 9 : 1
                               stratify = labels['has_cactus'],
                               random_state = 50)

**Define DataSet Class**

In [8]:
import cv2
from torch.utils.data import Dataset 

class ImageDataset(Dataset):
    # constructor
    def __init__(self, df, img_dir = './', transform = None):
        super().__init__()
        self.df = df # train or validation dataset 
        self.img_dir = img_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_id = self.df.iloc[idx, 0]
        img_path = self.img_dir + img_id
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.df.iloc[idx, 1] # target value
        
        if self.transform is not None:
            # if there's a transformer(변환기)
            image = self.transform(image)
            
        return image, label

**Define Image Transformer**

- Why do we need to transform image? 
    - To create more image data -> Data augmentation (데이터 증강)

In [10]:
from torchvision import transforms 

transform_train = transforms.Compose([transforms.ToTensor(), # make image to Tensor object
                                     transforms.Pad(32, padding_mode = 'symmetric'),
                                     transforms.RandomHorizontalFlip(), # default: randomly choose 50% of image and transform symmetrically
                                     transforms.RandomVerticalFlip(),
                                     transforms.RandomRotation(10), # rotate randomly between -10~10 degrees
                                     transforms.Normalize((0.485, 0.456, 0.406), # mean(R,G,B)
                                                         (0.229, 0.224, 0.225))]) # distribution(R,G,B)

transform_test = transforms.Compose([transforms.ToTensor(),
                                    transforms.Pad(32, padding_mode = 'symmetric'),
                                    transforms.Normalize((0.485, 0.456, 0.406),
                                                        (0.229, 0.224, 0.225))])

**Create Dataset and Data Loader**

In [12]:
# create train / validation dataset
dataset_train = ImageDataset(df = train, img_dir = 'train/', transform = transform_train)
dataset_valid = ImageDataset(df = valid, img_dir = 'train/', transform = transform_test)

In [13]:
from torch.utils.data import DataLoader

loader_train = DataLoader(dataset = dataset_train, batch_size = 32, shuffle = True)
loader_valid = DataLoader(dataset = dataset_valid, batch_size = 32, shuffle = False)

## Create Model

- Improved CNN model
    - Deeper CNN
    - Apply batch normalization 
        - Goal: Stabilize training, Faster convergence
        - For each mini-batch, batch normalization computes the mean and variance of the inputs and normalizes them
        - After normalization, batch normalization introduces two trainable parameters,  \gamma  (scale) and  \beta  (shift), to allow the network to restore the representational capacity if needed
    - Use Leaky ReLU for activation function 

In [16]:
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__() # successed nn.Module's __init__() method call 
        
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 3, padding = 2),
                                   nn.BatchNorm2d(32), # batch normalization 
                                   nn.LeakyReLU(), # Leaky ReLU for activation function
                                   nn.MaxPool2d(kernel_size = 2))
        
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 2),
                                   nn.BatchNorm2d(64),
                                   nn.LeakyReLU(),
                                   nn.MaxPool2d(kernel_size = 2))
        
        self.layer3 = nn.Sequential(nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, padding = 2),
                                   nn.BatchNorm2d(128),
                                   nn.LeakyReLU(),
                                   nn.MaxPool2d(kernel_size = 2))
        
        self.layer4 = nn.Sequential(nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, padding = 2),
                                   nn.BatchNorm2d(256),
                                   nn.LeakyReLU(),
                                   nn.MaxPool2d(kernel_size = 2))
        
        self.layer5 = nn.Sequential(nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, padding = 2),
                                   nn.BatchNorm2d(512),
                                   nn.LeakyReLU(),
                                   nn.MaxPool2d(kernel_size = 2))
        
        self.avg_pool = nn.AvgPool2d(kernel_size = 4)
        
        self.fc1 = nn.Linear(in_features = 512 * 1 * 1, out_features = 64)
        self.fc2 = nn.Linear(in_features = 64, out_features = 2)
        
    # forward propagation
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.avg_pool(x)
        x = x.view(-1, 512 * 1 *1) # flattening
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [17]:
model = Model().to(device)

## Train Model

**Set Loss Function & Optimizer**

In [18]:
# loss function -> use cross entropy (because it's classification problem)
criterion = nn.CrossEntropyLoss()

In [19]:
# optimizer -> finding optimized weight algorithm
# Adamax (better ver of Adam)

optimizer = torch.optim.Adamax(model.parameters(), lr = 0.00006)

**Train Model**

In [20]:
epochs = 70 # increase epoch to 70

for epoch in range(epochs):
    epoch_loss = 0
    
    for images, labels in loader_train: # repeat count = len(loader_train)
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        epoch_loss += loss.item()
        loss.backward()
        
        optimizer.step() # new weight = original weight - (learning rate * gradient)
        
    print(f'epoch [{epoch+1}/{epochs}] - loss: {epoch_loss/len(loader_train):.4f}')

epoch [1/70] - loss: 0.1302
epoch [2/70] - loss: 0.0731
epoch [3/70] - loss: 0.0572
epoch [4/70] - loss: 0.0502
epoch [5/70] - loss: 0.0424
epoch [6/70] - loss: 0.0348
epoch [7/70] - loss: 0.0308
epoch [8/70] - loss: 0.0305
epoch [9/70] - loss: 0.0286
epoch [10/70] - loss: 0.0234
epoch [11/70] - loss: 0.0211
epoch [12/70] - loss: 0.0243
epoch [13/70] - loss: 0.0199
epoch [14/70] - loss: 0.0186
epoch [15/70] - loss: 0.0176
epoch [16/70] - loss: 0.0183
epoch [17/70] - loss: 0.0176
epoch [18/70] - loss: 0.0156
epoch [19/70] - loss: 0.0170
epoch [20/70] - loss: 0.0165
epoch [21/70] - loss: 0.0160
epoch [22/70] - loss: 0.0140
epoch [23/70] - loss: 0.0142
epoch [24/70] - loss: 0.0124
epoch [25/70] - loss: 0.0140
epoch [26/70] - loss: 0.0126
epoch [27/70] - loss: 0.0128
epoch [28/70] - loss: 0.0130
epoch [29/70] - loss: 0.0106
epoch [30/70] - loss: 0.0111
epoch [31/70] - loss: 0.0132
epoch [32/70] - loss: 0.0096
epoch [33/70] - loss: 0.0110
epoch [34/70] - loss: 0.0107
epoch [35/70] - loss: 0

## Performance Validation

In [21]:
from sklearn.metrics import roc_auc_score
import numpy as np

true_list = []
preds_list = []

In [23]:
model.eval() # evaluation stage -> won't apply dropout 

with torch.no_grad(): # inactivate calculating gradient (no need to calculate gradient in evaluation step)
    for images, labels in loader_valid:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        # why back to cpu? -> roc_auc is sklearn -> it can't perform on GPU
        preds = torch.softmax(outputs.cpu(), dim = 1)[:, 1] # preds probability
        true = labels.cpu() # true val
        
        # have to convert preds and true tensors to original python array or numpy array
        preds_list.extend(preds.tolist())
        true_list.extend(true.tolist())
        
print(f'validation data ROC AUC: {roc_auc_score(true_list, preds_list):.4f}')

validation data ROC AUC: 0.9997


## Prediction and Submit Result

In [24]:
# create test dataset and data loader
dataset_test = ImageDataset(df = submission, img_dir = 'test/', transform = transform_test)
loader_test = DataLoader(dataset = dataset_test, batch_size = 32, shuffle = False)

model.eval()

preds = []

with torch.no_grad():
    for images, _ in loader_test:
        images = images.to(device)
        
        outputs = model(images)
        
        preds_part = torch.softmax(outputs.cpu(), dim = 1)[:, 1].tolist()
        
        preds.extend(preds_part)

In [25]:
submission['has_cactus'] = preds
submission.to_csv('submission.csv', index = False)

In [26]:
import shutil

# delete entire directory 
shutil.rmtree('./train')
shutil.rmtree('./test')