In [1]:
import numpy as np
import pandas as pd
import torch
import os
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms
from torchvision import transforms
from PIL import Image

import time

from torch.nn import Conv2d, functional as F, Linear, MaxPool2d, Module
from torch import nn, optim
import torch

import gc
from inspect import getsourcefile
from os.path import abspath
from os import listdir
from json import dumps
import sys
import warnings
warnings.filterwarnings("ignore")

import random
import cv2
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

In [2]:
datadir = "../input/cats-dogs/"
datanames = []
for dirname, _, filenames in os.walk('/kaggle/input/cats-dogs/'):
    for filename in filenames:
        temp = filename.split('.')
        if temp[1] == "jpg":
            datanames.append(temp[0])


In [3]:
def get_train_transforms():
    return A.Compose(
        [
            A.RandomSizedBBoxSafeCrop(width=299, height=299, erosion_rate=0.1, p=0.25),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, 
                                           contrast_limit=0.2, p=0.9),
            ],p=0.9),
            #A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Rotate(limit=45, p=1),
            A.ChannelShuffle(p=0.25),
            A.FancyPCA(),
            A.GaussNoise(p=0.25),
            A.Blur(blur_limit=4,p=0.25),
            A.Cutout(num_holes=8, max_h_size=8, max_w_size=8, fill_value=0, p=0.5),
            A.Resize(width=299, height=299, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params = A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['field_id']
            )
    )
def get_test_transforms():
    return A.Compose(
        [
            A.Resize(width=299, height=299, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['field_id']
        )
    )


In [4]:
class Custom_Dataset(Dataset):

    def __init__(self, folder_path, datadir, transforms=None):
        self.folder_path = folder_path
        self.datadir = datadir
        self.data_len = len(self.datadir)
        self.transforms = transforms
        
    def __getitem__(self, index):
        single_image_path = self.datadir[index]
        img = cv2.imread(self.folder_path+ '/' + single_image_path + ".jpg")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img = Image.open(self.folder_path+ '/' + single_image_path + ".jpg").convert('RGB')
        target =  np.loadtxt(self.folder_path+ '/' + single_image_path + ".txt")
        if self.transforms is not None:
            #img = self.transforms(img)
            tr = self.transforms(image=img, bboxes=[target[1:]], field_id=['1, 2, 3, 4'])
            img, bbox = tr["image"], tr["bboxes"]
        img = img.type(torch.float)/255
        bbox = torch.tensor(bbox[0])
        return (img, np.int_(target[0])-1, bbox)

    def __len__(self):  
        return self.data_len


In [5]:
def load_split_train_test(datadir, datanames, valid_size = .1, batch_size = 128):
    num_train = len(datanames)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    np.random.shuffle(indices)
    train_idx, test_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    custom_dataset_train = Custom_Dataset(datadir, datanames, transforms=get_train_transforms())
    loader_train = torch.utils.data.DataLoader(dataset=custom_dataset_train, batch_size=batch_size,
                                                       shuffle=False, sampler=train_sampler)
    custom_dataset_test = Custom_Dataset(datadir, datanames, transforms=get_test_transforms())
    loader_test = torch.utils.data.DataLoader(dataset=custom_dataset_test, batch_size=batch_size,
                                                      shuffle=False, sampler=test_sampler)
    return loader_train, loader_test

In [6]:
class conv_block(nn.Module):
    def __init__(self, in_ch, out_ch, kernel=3, stride=1, padding = 1, pool = False):  # ch_in, ch_out, kernel, stride, padding, groups
        super(conv_block, self).__init__()
        self.conv = nn.Conv2d(in_ch, out_ch, kernel, stride, padding)
        self.bn = nn.BatchNorm2d(out_ch)
        self.act = nn.SiLU()
        self.Maxpool = nn.MaxPool2d(4)
        self.pool = pool
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        if self.pool: x = self.Maxpool(x)
        return x

class Net(nn.Module):
    def __init__(self, in_channels, num_classes = 2):
        super().__init__()
        self.conv1 = conv_block(in_channels, 64)
        self.res1 = nn.Sequential(conv_block(64, 64), conv_block(64, 64))
        self.conv2 = conv_block(64, 128, pool=True) #128 x 32 x 32
        self.res2 = nn.Sequential(conv_block(128, 128), conv_block(128, 128), conv_block(128, 128))
        self.conv3 = conv_block(128, 512, pool=True) #256 x 8 x 8
        self.res3 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
        self.conv4 = conv_block(512, 1024, pool=True) #512 x 2 x 2
        self.res4 = nn.Sequential(conv_block(1024, 1024), conv_block(1024, 1024, pool = True))
        self.classifier = nn.Sequential(nn.MaxPool2d(4), #1024 x 1 x 1
                                        nn.Flatten(), 
                                        nn.Dropout(0.2),
                                        nn.Linear(1024, 256),
                                        nn.ReLU(),
                                        nn.Linear(256, 1))
        self.bbox = nn.Sequential(nn.MaxPool2d(4), #1024 x 1 x 1
                                        nn.Flatten(), 
                                        nn.Dropout(0.2),
                                        nn.Linear(1024, 512),
                                        nn.ReLU(),
                                        nn.Linear(512, 4))


    def forward(self, xb):
        out = self.conv1(xb)
        out = self.res1(out) + out
        out = self.conv2(out)
        out = self.res2(out) + out
        out = self.conv3(out)
        out = self.res3(out) + out
        out = self.conv4(out)
        out = self.res4(out) + out
        cl = F.sigmoid(self.classifier(out))
        bb = F.sigmoid(self.bbox(out))
        return cl, bb

In [7]:
def IoU(boxA, boxB):
    iou = 0
    for i in range(boxA.shape[0]):
        xA = max(boxA[i,0], boxB[i,0])
        yA = max(boxA[i,1], boxB[i,1])
        xB = min(boxA[i,2], boxB[i,2])
        yB = min(boxA[i,3], boxB[i,3])
    
        interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    
        boxAArea = (boxA[i,2] - boxA[i,0] + 1) * (boxA[i,3] - boxA[i,1] + 1)
        boxBArea = (boxB[i,2] - boxB[i,0] + 1) * (boxB[i,3] - boxB[i,1] + 1)
    
        iou += float(interArea) / float(boxAArea + boxBArea - interArea)
        
    return iou

In [8]:
def train_metrics(model, optimizer, scheduler, train_dl, epoch):
    steps = 0
    total = 0
    sum_loss = 0
    for x, y_class, y_bb in train_dl:
        batch = y_class.shape[0]
        steps += 1
        optimizer.zero_grad()
        x, y_class, y_bb = x.to(device), y_class.to(device).float(), (y_bb.to(device)/299)
        out_class, out_bb = model(x)
        loss_class = nn.BCELoss()(out_class, y_class.unsqueeze(1))
        loss_bb = F.smooth_l1_loss(out_bb.float(), y_bb.float(), reduction="sum")
        loss = loss_class + 5*loss_bb
        loss.backward()
        optimizer.step()
        total += batch
        sum_loss += loss.item()
        #if epoch<23:
        scheduler.step()
        #elif epoch >30:
            
    return steps, total, sum_loss, model

In [9]:
def val_metrics(model, valid_dl):
    start_time = time.time()
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0 
    Iou_loss_cum = 0
    for x, y_class, y_bb in valid_dl:
        batch = y_class.shape[0]
        x, y_class, y_bb = x.to(device), y_class.to(device).float(), (y_bb.to(device)/299)
        out_class, out_bb = model(x)
        loss_class = nn.BCELoss()(out_class, y_class.unsqueeze(1))
        loss_bb = F.smooth_l1_loss(out_bb.float(), y_bb.float(), reduction="sum")
        IoU_loss = IoU(out_bb.cpu().detach().numpy(), y_bb.cpu().detach().numpy())
        Iou_loss_cum += IoU_loss
        loss = loss_class + 5*loss_bb
        correct += (out_class.cpu().detach().numpy().round() == y_class.cpu().detach().numpy().reshape(-1,1)).sum()
        sum_loss += loss.item()
        total += batch
        
    val_time = (time.time() - start_time)
    model.train()
    return sum_loss/total, correct/total, Iou_loss_cum/total, model, val_time

In [10]:
def fit_model(model, optimizer, scheduler, train_dl, val_dl, epochs=10):
    best_loss_val = 1
    for epoch in range(epochs):
        #if epoch > 50:
        steps, total, sum_loss, model = train_metrics(model, optimizer, scheduler, train_dl, epoch)
        gc.collect()
        torch.cuda.empty_cache()
        sum_loss_val, correct, mIoU, model, val_time = val_metrics(model, val_dl)
        if epoch>50 and sum_loss_val<=best_loss_val:
            best_loss_val = sum_loss_val
            torch.save(model.state_dict(), f'epoch:{epoch}_sum_loss_val:{sum_loss_val}.pt')
        gc.collect()
        torch.cuda.empty_cache()
        print("train_loss %.3f val_loss %.3f val_acc %.3f val_mIoU %.3f val_time %.3f" % (sum_loss/total, sum_loss_val, correct, mIoU, val_time))
    return model, sum_loss/total

In [11]:
epochs = 100
lr = 0.0005
batch_size = 16
train_dl, val_dl = load_split_train_test(datadir, datanames, .1138, batch_size)
device = torch.device("cuda" if torch.cuda.is_available() 
                             else "cpu")

model = Net(3,2)
model.to(device);
optimizer = optim.Adam(model.parameters(), lr=lr)
#scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4,8,12,16,25,40], gamma=0.6)
#scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, max_lr=0.005,step_size_up=9,mode="exp_range",gamma=0.9, cycle_momentum=False,scale_mode =  "cycle")
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, lr, epochs=epochs,
                                                steps_per_epoch=len(train_dl))
model, loss = fit_model(model, optimizer, scheduler, train_dl, val_dl, epochs)
print(loss)

torch.save(model, 'aerialmodel.pth')

train_loss 0.426 val_loss 0.347 val_acc 0.673 val_mIoU 0.702 val_time 9.396
train_loss 0.366 val_loss 0.308 val_acc 0.675 val_mIoU 0.715 val_time 5.547
train_loss 0.334 val_loss 0.398 val_acc 0.670 val_mIoU 0.689 val_time 5.779
train_loss 0.302 val_loss 0.312 val_acc 0.694 val_mIoU 0.723 val_time 5.624
train_loss 0.280 val_loss 0.249 val_acc 0.706 val_mIoU 0.750 val_time 5.645
train_loss 0.254 val_loss 0.251 val_acc 0.714 val_mIoU 0.750 val_time 5.980
train_loss 0.225 val_loss 0.242 val_acc 0.743 val_mIoU 0.754 val_time 5.725
train_loss 0.206 val_loss 0.180 val_acc 0.691 val_mIoU 0.785 val_time 5.800
train_loss 0.190 val_loss 0.158 val_acc 0.673 val_mIoU 0.800 val_time 6.052
train_loss 0.183 val_loss 0.257 val_acc 0.675 val_mIoU 0.754 val_time 5.727
train_loss 0.169 val_loss 0.187 val_acc 0.756 val_mIoU 0.778 val_time 5.705
train_loss 0.158 val_loss 0.132 val_acc 0.784 val_mIoU 0.822 val_time 5.862
train_loss 0.157 val_loss 0.149 val_acc 0.808 val_mIoU 0.798 val_time 5.724
train_loss 0