In [1]:
import torch
import random
import numpy as np
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'CPU')
device

device(type='cuda')

# Prepare Data
* Seperate train, validation data
* Define dataset class
* Image conversion and data augmentation
* Create Dataset and Dataloader

In [2]:
import pandas as pd

# data path
data_path = "/kaggle/input/aerial-cactus-identification/"

labels = pd.read_csv(data_path + 'train.csv')
submission = pd.read_csv(data_path + 'sample_submission.csv')

In [3]:
from zipfile import ZipFile

# Unzip train image data
with ZipFile(data_path + 'train.zip') as zipper:
    zipper.extractall()

# Unzip test image data
with ZipFile(data_path + 'test.zip') as zipper:
    zipper.extractall()

##### Seperate train, validation

In [4]:
from sklearn.model_selection import train_test_split

# Seperate train, valid
train, valid = train_test_split(labels,
                                test_size=0.1,  # train:valid = 9:1
                                stratify = labels['has_cactus'], # target ratio 3:1 by lookaround dataset
                                random_state=50
                               )

In [5]:
print("Number of train data:", len(train))
print("Number of valid data:", len(valid))

Number of train data: 15750
Number of valid data: 1750


##### Define dataset class

In [6]:
import cv2
from torch.utils.data import Dataset

class ImageDataset(Dataset):
    # Initialization constructor
    def __init__(self, df, img_dir='./', transform=None):
        super().__init__()  # Call Inherited Dataset constructor
        # Saving passed arguments
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        
    # Return Dataset Size Method
    def __len__(self):
        return len(self.df)
    
    # idx data return method
    def __getitem__(self, idx):
        img_id = self.df.iloc[idx, 0]  # image ID
        img_path = self.img_dir + img_id  # image file path
        image = cv2.imread(img_path)   # read image file
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # convert img color
        label = self.df.iloc[idx, 1]  # image label(target value)
        
        if self.transform is not None:
            image = self.transform(image)  # if transform is TRUE transfrom img 
        return image, label

##### Image conversion and data augmentation

![dataaugmentation](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_41_0.png)  
(from MXNet documentation)  
  
##### Define image conversion 
Being too different from the original makes it unpredictable

In [7]:
from torchvision import transforms

# Converter for train data
transform_train = transforms.Compose([transforms.ToTensor(),
                                     transforms.Pad(32, padding_mode='symmetric'),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.RandomVerticalFlip(),
                                     transforms.RandomRotation(10),
                                     transforms.Normalize((0.485, 0.456, 0.406),
                                                          (0.229, 0.224, 0.225))])

# Converter for validaion and test data
transform_test = transforms.Compose([transforms.ToTensor(),
                                    transforms.Pad(32, padding_mode='symmetric'),
                                    transforms.Normalize((0.485, 0.456, 0.406),
                                                         (0.229, 0.224, 0.225))])

##### Create Dataset & Dataloader

In [8]:
dataset_train = ImageDataset(df = train, img_dir='train/',
                             transform=transform_train)
dataset_valid = ImageDataset(df=valid, img_dir='train/',
                             transform=transform_test)

In [9]:
from torch.utils.data import DataLoader

loader_train = DataLoader(dataset=dataset_train, batch_size=32, shuffle=True)
loader_valid = DataLoader(dataset=dataset_valid, batch_size=32, shuffle=False)

# Create Model  
### $N_{out}=\lfloor\frac{N_{in}+2P-K}{S}\rfloor+1$

In [10]:
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32,
                                             kernel_size=3, padding=2),
                                   nn.BatchNorm2d(32),
                                   nn.LeakyReLU(),
                                   nn.MaxPool2d(kernel_size=2))
        
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels=32, out_channels=64,
                                              kernel_size=3, padding=2),
                                    nn.BatchNorm2d(64),
                                    nn.LeakyReLU(),
                                    nn.MaxPool2d(kernel_size=2))
        
        self.layer3 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=128,
                                              kernel_size=3, padding=2),
                                    nn.BatchNorm2d(128),
                                    nn.LeakyReLU(),
                                    nn.MaxPool2d(kernel_size=2))
        
        self.layer4 = nn.Sequential(nn.Conv2d(in_channels=128, out_channels=256,
                                              kernel_size=3, padding=2),
                                    nn.BatchNorm2d(256),
                                    nn.LeakyReLU(),
                                    nn.MaxPool2d(kernel_size=2))
        
        self.layer5 = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=512,
                                              kernel_size=3, padding=2),
                                    nn.BatchNorm2d(512),
                                    nn.LeakyReLU(),
                                    nn.MaxPool2d(kernel_size=2))
        
        self.avg_pool = nn.AvgPool2d(kernel_size=4)
        # fully-connected
        self.fc1 = nn.Linear(in_features=512 * 1* 1, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=2)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.avg_pool(x)
        x = x.view(-1, 512 * 1* 1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [11]:
model = Model().to(device)

# Model Train
* Setting loss function & optimizer

In [12]:
# loss function
criterion = nn.CrossEntropyLoss()

In [13]:
# Optimizer
optimizer = torch.optim.Adamax(model.parameters(), lr=0.00006)

In [14]:
epochs = 70

for epoch in range(epochs):
    epoch_loss =0
    
    for images, labels in loader_train:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
        
    print(f"epoch [{epoch+1}/{epochs}] - loss: {epoch_loss/len(loader_train):.4f}")

epoch [1/70] - loss: 0.1309
epoch [2/70] - loss: 0.0644
epoch [3/70] - loss: 0.0504
epoch [4/70] - loss: 0.0445
epoch [5/70] - loss: 0.0401
epoch [6/70] - loss: 0.0339
epoch [7/70] - loss: 0.0308
epoch [8/70] - loss: 0.0278
epoch [9/70] - loss: 0.0266
epoch [10/70] - loss: 0.0249
epoch [11/70] - loss: 0.0236
epoch [12/70] - loss: 0.0234
epoch [13/70] - loss: 0.0219
epoch [14/70] - loss: 0.0202
epoch [15/70] - loss: 0.0206
epoch [16/70] - loss: 0.0179
epoch [17/70] - loss: 0.0191
epoch [18/70] - loss: 0.0145
epoch [19/70] - loss: 0.0170
epoch [20/70] - loss: 0.0138
epoch [21/70] - loss: 0.0159
epoch [22/70] - loss: 0.0149
epoch [23/70] - loss: 0.0150
epoch [24/70] - loss: 0.0135
epoch [25/70] - loss: 0.0146
epoch [26/70] - loss: 0.0123
epoch [27/70] - loss: 0.0128
epoch [28/70] - loss: 0.0117
epoch [29/70] - loss: 0.0102
epoch [30/70] - loss: 0.0106
epoch [31/70] - loss: 0.0094
epoch [32/70] - loss: 0.0097
epoch [33/70] - loss: 0.0090
epoch [34/70] - loss: 0.0099
epoch [35/70] - loss: 0

# Performance Validation

In [15]:
from sklearn.metrics import roc_auc_score

true_list = []
preds_list = []

model.eval()

with torch.no_grad():
    for images, labels in loader_valid:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        preds = torch.softmax(outputs.cpu(), dim=1)[:, 1]
        true = labels.cpu()
        preds_list.extend(preds)
        true_list.extend(true)
        
print(f"ROC AUC of validation : {roc_auc_score(true_list, preds_list):.4f}")

ROC AUC of validation : 0.9998


# Predict and Submit

In [16]:
dataset_test = ImageDataset(df=submission, img_dir='test/',
                            transform=transform_test)
loader_test = DataLoader(dataset=dataset_test, batch_size=32, shuffle=False)

model.eval()

preds = []

with torch.no_grad():
    for images, _ in loader_test:
        images = images.to(device)
        
        outputs = model(images)
        preds_part = torch.softmax(outputs.cpu(), dim=1)[:, 1].tolist()
        preds.extend(preds_part)

In [17]:
submission['has_cactus'] = preds
submission.to_csv('submission.csv', index=False)

## Delete unzip files

In [18]:
import shutil

shutil.rmtree('./train')
shutil.rmtree('./test')