<a href="https://www.kaggle.com/code/cocoyachi/cactus-simplecnn-pytorch?scriptVersionId=123675728" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import torch
import random
import numpy as np
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'CPU')
device

device(type='cuda')

### Prepare Data
* Seperate train, validation data
* Define dataset class
* Create Dataset
* Create Dataset loader

In [2]:
import pandas as pd

# data path
data_path = "/kaggle/input/aerial-cactus-identification/"

labels = pd.read_csv(data_path + 'train.csv')
submission = pd.read_csv(data_path + 'sample_submission.csv')

In [3]:
from zipfile import ZipFile

# Unzip train image data
with ZipFile(data_path + 'train.zip') as zipper:
    zipper.extractall()

# Unzip test image data
with ZipFile(data_path + 'test.zip') as zipper:
    zipper.extractall()

##### Seperate train, validation

In [4]:
from sklearn.model_selection import train_test_split

# Seperate train, valid
train, valid = train_test_split(labels,
                                test_size=0.1,  # train:valid = 9:1
                                stratify = labels['has_cactus'], # target ratio 3:1 by lookaround dataset
                                random_state=50
                               )

In [5]:
print("Number of train data:", len(train))
print("Number of valid data:", len(valid))

Number of train data: 15750
Number of valid data: 1750


##### Define dataset class

In [6]:
import cv2
from torch.utils.data import Dataset

class ImageDataset(Dataset):
    # Initialization constructor
    def __init__(self, df, img_dir='./', transform=None):
        super().__init__()  # Call Inherited Dataset constructor
        # Saving passed arguments
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        
    # Return Dataset Size Method
    def __len__(self):
        return len(self.df)
    
    # idx data return method
    def __getitem__(self, idx):
        img_id = self.df.iloc[idx, 0]  # image ID
        img_path = self.img_dir + img_id  # image file path
        image = cv2.imread(img_path)   # read image file
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # convert img color
        label = self.df.iloc[idx, 1]  # image label(target value)
        
        if self.transform is not None:
            image = self.transform(image)  # if transform is TRUE transfrom img 
        return image, label

##### Create Dataset

In [7]:
from torchvision import transforms # Module for image transform

transform = transforms.ToTensor()

In [8]:
dataset_train = ImageDataset(df=train, img_dir='train/', transform=transform)
dataset_valid = ImageDataset(df=valid, img_dir='train/', transform=transform)

##### Create Dataset loader
- An object that fetches data in the specified batch size

In [9]:
from torch.utils.data import DataLoader

loader_train = DataLoader(dataset=dataset_train, batch_size=32, shuffle=True)  # Avoid data bloat
loader_valid = DataLoader(dataset=dataset_valid, batch_size=32, shuffle=False)

### Create CNN Model

In [10]:
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    # Define neural network layer
    def __init__(self):
        super().__init__()
    
        # First convolution layer
        '''
        in_channels : input data channel
        out_channels : output data channel
        kernel_size : filter(kernel) size
        
        ToTensor() -> img shape is (3, 32, 32) => (channel, horizon, verical)
        '''
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32,
                               kernel_size=3, padding=2)

        # Second convolution layer
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64,
                               kernel_size=3, padding=2)

        # Max pooling layer
        self.max_pool = nn.MaxPool2d(kernel_size=2)

        # Average pooling layer
        self.avg_pool = nn.AvgPool2d(kernel_size=2)

        # Fully-connected layer
        self.fc = nn.Linear(in_features = 64 * 4 * 4, out_features=2)
    
    # Forward Propagation
    def forward(self, x):
        x = self.max_pool(F.relu(self.conv1(x)))
        x = self.max_pool(F.relu(self.conv2(x)))
        x = self.avg_pool(x)
        x = x.view(-1, 64 * 4 * 4)  # Flatten
        x = self.fc(x)
        return x

# $N_{out}=\lfloor\frac{N_{in}+2P-K}{S}\rfloor+1$

Image : (32, 3, 32, 32)  
Conv1 : (32, 32, 34, 34)  
Maxpool1 : (32, 32, 17, 17)  
Conv2 : (32, 64, 19, 19)  
Maxpool2 : (32, 64, 9, 9)  
Avgpool : (32, 64, 4, 4)  
Flatten : (32, 1024)  
Fully-connected : (32, 2)

In [11]:
# Assign to device
model = Model().to(device)

In [12]:
model

Model(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avg_pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc): Linear(in_features=1024, out_features=2, bias=True)
)

### Train Model

##### Loss function

In [13]:
criterion  = nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

#### Train the model
1. Fetching batch-sized data from data loader
2. Assign imported data and labels to device
3. Resetting the slope within an optimizer
4. Pass the image to neural network model and forward propagate to get an output value
5. Calculate loss by comparing predicted and actual value
6. Perform backpropagation based on loss
7. Update weights using the slope obtained from backpropagation
8. Repeat 1~7 number of iteration
9. Repeat 1~8 number of epoch

In [14]:
epochs = 10

for epoch in range(epochs):
    epoch_loss = 0 # initializing loss value
    
    for images, labels in loader_train:
        images = images.to(device)
        labels = labels.to(device)
        
        # initailize slope within optimizer
        optimizer.zero_grad()
        # forward propagation
        outputs = model(images)
        # calculate loss
        loss = criterion(outputs, labels)
        # add loss in current batch
        epoch_loss += loss.item() # run backpropagation
        loss.backward()
        # Update weights
        optimizer.step()
        
    # Print train data loss
    print(f"epoch [{epoch+1}/{epochs}] - loss: {epoch_loss/len(loader_train):.4f}")

epoch [1/10] - loss: 0.5246
epoch [2/10] - loss: 0.3603
epoch [3/10] - loss: 0.2461
epoch [4/10] - loss: 0.2035
epoch [5/10] - loss: 0.1813
epoch [6/10] - loss: 0.1646
epoch [7/10] - loss: 0.1506
epoch [8/10] - loss: 0.1440
epoch [9/10] - loss: 0.1336
epoch [10/10] - loss: 0.1268


### Performance Validation

In [15]:
from sklearn.metrics import roc_auc_score
# reset target value, pred probability list
true_list = []
preds_list = []

In [16]:
model.eval()  # Set the model to evaluation status

with torch.no_grad(): # calculate slope deactivate
    for images, labels in loader_valid:
        images = images.to(device)
        labels = labels.to(device)
        
        # forward propagation
        outputs = model(images)
        preds = torch.softmax(outputs.cpu(), dim=1)[:, 1] # predict probability
        true = labels.cpu()
        preds_list.extend(preds)
        true_list.extend(true)
        
# ROC AUC score
print(f"ROC AUC of validation data : {roc_auc_score(true_list, preds_list):.4f}")

ROC AUC of validation data : 0.9907


### Predict and submit

In [17]:
dataset_test = ImageDataset(df=submission, img_dir='test/', transform=transform)
loader_test = DataLoader(dataset=dataset_test, batch_size=32, shuffle=False)

In [18]:
model.eval()  # Set the model to evaluation status

preds = []

with torch.no_grad(): # calculate slope deactivate
    for images, _ in loader_test:
        images = images.to(device)
        
        # forward propagation
        outputs = model(images)
        preds_part = torch.softmax(outputs.cpu(), dim=1)[:, 1].tolist()
        true = labels.cpu()
        preds.extend(preds_part)

In [19]:
preds[:5]

[0.8638256788253784,
 0.9987354874610901,
 0.01867538131773472,
 0.016370542347431183,
 0.5339356064796448]

##### result

In [20]:
submission['has_cactus'] = preds
submission.to_csv('submission.csv', index=False)

### Remove unzip files

In [21]:
import shutil

shutil.rmtree('./train')
shutil.rmtree('./test')