# Directory

```bash
├── project
│   ├── models
│   ├── script.ipynb
├── test_dataset
│   ├── BC
│   ├── LT
│   ├── test_data.csv
└── train_dataset
│   ├── BC
│   ├── LT
``` 

# OS

- Ubuntu
- Intel i9 10900
- NVIDIA GeForce RTX 3080

# Import Packages

In [4]:
!pip install parmap
!pip install timm

Collecting parmap
  Downloading parmap-1.5.3-py2.py3-none-any.whl (12 kB)
Installing collected packages: parmap
Successfully installed parmap-1.5.3


In [7]:
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import autocast
from torchvision import models

import albumentations as albu
from albumentations.pytorch import ToTensorV2
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from torch.utils.data.sampler import SequentialSampler, RandomSampler
import cv2
from sklearn.model_selection import StratifiedKFold
from itertools import permutations
from PIL import Image
from sklearn.model_selection import GroupKFold
import glob
from tqdm import tqdm
import random

import timm

import matplotlib.pyplot as plt
import parmap
import sklearn

In [10]:
print('numpy verison :', np.__version__)
print('pandas version :', pd.__version__)
print('opencv version :', cv2.__version__)
print('torch version :', torch.__version__)
print('sklearn verison :', sklearn.__version__)
print('albumentations version :', albu.__version__)

numpy verison : 1.19.5
pandas version : 1.1.5
opencv version : 4.5.2
torch version : 1.8.1+cu111
sklearn verison : 0.24.2
albumentations version : 1.0.0


# Image Resize

In [11]:
def read_crop(path):
    data = Image.open(path)
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [12]:
def resize(img, size):
    im = Image.fromarray(img)
    im = im.resize((size, size))
    
    return im

In [13]:
def save_image(path):
    size = 512
    save_dir = os.path.join(path, "resized"+str(size))
    os.makedirs(save_dir, exist_ok=True)
    f = []
    for (_, _ , filenames) in os.walk(path):
        f.extend(filenames)
        
    for filename in f:
        img = read_crop(os.path.join(path, filename))
        img = resize(img, size)
        img.save(os.path.join(save_dir, filename))

In [15]:
path_list = glob.glob("../train_dataset/BC/*")
path_list.extend(glob.glob("../train_dataset/LT/*"))
path_list.extend(glob.glob("../test_dataset/BC/*"))
path_list.extend(glob.glob("../test_dataset/LT/*"))

In [16]:
result = parmap.map(save_image, path_list, pm_pbar=True, pm_processes=4)

  0%|          | 0/25 [00:00<?, ?it/s]

In [17]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Data Loader

## Preprocessing

1) Permutation

- 각 식물 개체마다 순열로 정렬시켜 데이터 생성

2) 보조 변수 생성

- 모형 성능 향상을 위해 보조 변수 생성
- 2가지의 보조변수 활용
- 식물 개체의 종류(aux_input1 - CropDataset: LT / BC)
- After와 Before 이미지 용량 차이(aux_input2 - CropDataset: numeric)
- Delta에 영향을 주는 종속변수 생성(label1 - CropDataset) \
  After Date > Before Date이면 1 \
  After Date ≤ Before Date이면 0



## Data Augmentation

- Before와 After 이미지에 같은 종류의 Augmentation을 실시

In [18]:
class CropDataset(Dataset):
    def __init__(self, df, image_size, mode, f):
        super().__init__()
        self.df = df
        assert mode in ["train", "valid"] 
        self.mode = mode
        self.image_size = image_size
        
        if self.mode == 'train':
            self.df = self.df.sample(frac=f, random_state=random_seed).reset_index(drop=True)
            self.transform = albu.Compose([albu.Resize(self.image_size, self.image_size),
                                            albu.RandomResizedCrop(height=self.image_size, width=self.image_size, 
                                                    scale=(0.25,1.0), ratio=(0.75, 1.3333333333333333), 
                                                    interpolation=1, p=1.0),
                                            albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, 
                                                                  rotate_limit=30, interpolation=1, border_mode=0, value=0, p=0.25),
                                            albu.HorizontalFlip(p=0.5),
                                            albu.VerticalFlip(p=0.5),
                                            albu.OneOf([
                                                albu.MotionBlur(p=.2),
                                                albu.MedianBlur(blur_limit=3, p=0.1),
                                                albu.Blur(blur_limit=3, p=0.1),
                                            ], p=0.25),
                                            albu.OneOf([
                                                albu.CLAHE(clip_limit=2),
                                                albu.IAASharpen(),
                                                albu.IAAEmboss(),
                                                albu.RandomBrightnessContrast(),            
                                            ], p=0.25),
                                            albu.Cutout(num_holes=8, max_h_size=32, max_w_size=32, fill_value=0, p=0.25),
                                            albu.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
                                            ToTensorV2(),
                                        ], additional_targets={'image1': 'image'})
        else:
            self.transform = albu.Compose([
                albu.Resize(self.image_size, self.image_size),
                albu.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
                ToTensorV2(),
            ], additional_targets={'image1': 'image'})

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        before_img_path = self.df.loc[index, 'before_file_path']
        after_img_path = self.df.loc[index, 'after_file_path']

        before_img = cv2.imread(before_img_path)
        after_img = cv2.imread(after_img_path)
        
        label1 = torch.FloatTensor([self.df.loc[index, "label"]])
        label2 = torch.FloatTensor([self.df.loc[index, "delta"]])
        aux_input1 = torch.FloatTensor([self.df.loc[index, "cls"]])

        file_size1 = os.path.getsize(before_img_path)
        file_size2 = os.path.getsize(after_img_path)
        aux_input2 = torch.FloatTensor([file_size2/10000 - file_size1/10000])
        
        transformed = self.transform(image=before_img, image1=after_img)
        
        before_img = transformed['image'] 
        after_img = transformed['image1']
               
        return before_img, after_img, aux_input1, aux_input2, label1, label2 

# Modeling

- CNN2RNN 모형
- 내부 CNN모형: ResNet50, ResNet101
- 내부 RNN모형: Bidirectional RNN
- fully connected layer의 input: RNN의 feature + 보조 변수(class, diff_size)

In [6]:
class CNN2RNN_Network(nn.Module):
    def __init__(self, model, hidden_dim, img_size):
        super(CNN2RNN_Network, self).__init__()
        self.input_size = 2048
            
        if model == "resnet50":
            model = models.resnet50(pretrained = True)
        elif model == "resnet101":
            model = models.resnet101(pretrained = True)
        modules = list(model.children())[:-2]
        self.feature_extract_model = nn.Sequential(*modules)
        self.img_size = img_size
        self.seq_length = 2

        self.RNN = nn.RNN(input_size = self.input_size, hidden_size=hidden_dim, num_layers=2, bidirectional=True)
        self.fc1 = nn.Linear(hidden_dim*2 + 2, 1)
        self.fc2 = nn.Linear(hidden_dim*2 + 3, 1)
        
    def forward(self,x, aux_input1, aux_input2):
        x = x.view(-1,3,self.img_size, self.img_size)
        x = self.feature_extract_model(x)
        x = x.mean(dim=(-2,-1))
        x = x.view(2,-1,self.input_size)
        x,_ = self.RNN(x) 

        x = torch.cat([x[-1],aux_input1, aux_input2], dim=1)
        prob = self.fc1(x)
        x = torch.cat([x, prob], dim=1)
        delta = self.fc2(x) 
        
        return prob, delta

In [7]:
def train_step(batch_item, epoch, batch, training, scheduler):
    before_img = batch_item[0].to(device)
    after_img = batch_item[1].to(device)
    aux_input1 = batch_item[2].to(device)
    aux_input2 = batch_item[3].to(device)
    label1 = batch_item[4].to(device)
    label2 = batch_item[5].to(device)
    
    img = torch.stack([before_img, after_img])
    
    total = 0
    correct = 0
    if training is True:
        scheduler.step()
        model.train()
        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            output = model(img, aux_input1, aux_input2)
            loss1 = criterion1(output[0], label1)
            loss2 = criterion2(output[1], label2)
            loss = loss1 + loss2
            predicted = torch.sigmoid(output[0].data) > 0.5
            predicted = predicted.float()
            total += label1.size(0)
            correct += (predicted == label1).sum().item()
        loss.backward()
        optimizer.step()
        return loss.item(), loss2.item(), correct
    else:
        model.eval()
        with torch.no_grad():
            output = model(img, aux_input1, aux_input2)
            loss1 = criterion1(output[0], label1)
            loss2 = criterion2(output[1], label2)
            loss = loss1 + loss2
            predicted = torch.sigmoid(output[0].data) > 0.5
            predicted = predicted.float()
            total += label1.size(0)
            correct += (predicted == label1).sum().item()
            
        return loss.item(), loss2.item(), correct

In [8]:
p = 5
folds = [0,1,2,3,4]
nets = ["resnet101", "resnet50"]

for net in nets:
    random_seed = 123
    seed_everything(random_seed)
    print("=================",net,"=================")
    tr_path = "../train_dataset/"
    tr_file = "train_data.csv"

    pms = []
    cls = []
    splits = ["BC", "LT"]

    for split in splits:
        path = "../train_dataset/{}".format(split)

        folders = glob.glob(os.path.join(path, "*"))

        dirs = [os.path.join(f, "resized512") for f in folders]

        for d in dirs:
            permute = permutations(glob.glob(os.path.join(d, "*.png")),2)
            pm = list(permute)
            pms.extend(pm)
            cls.extend([int(split == "BC")] * len(pm))
    
    tr_data = pd.DataFrame(pms)
    tr_data.columns = ["before_file_path", "after_file_path"]
    tr_data = tr_data[["before_file_path", "after_file_path"]]
    tr_data['cls'] = cls

    tt_before = tr_data['before_file_path'].apply(lambda x: str(x).split("DAT")[1])
    tt_before = tt_before.apply(lambda x: int(str(x).split(".")[0]))
    tt_after = tr_data['after_file_path'].apply(lambda x: str(x).split("DAT")[1])
    tt_after = tt_after.apply(lambda x: int(str(x).split(".")[0]))

    tr_data['label'] = np.int_(tt_before - tt_after < 0)
    tr_data['before'] = tt_before
    tr_data['after'] = tt_after
    tr_data['delta'] = tt_after - tt_before
    
    tr_data['folder']= tr_data['before_file_path'].apply(lambda x: x.split("DAT")[0])
    
    tr_data = tr_data.sample(frac=1, random_state=random_seed).reset_index(drop=True)

    gkf = StratifiedKFold(n_splits=5, random_state=random_seed, shuffle = True)
    tr_data['fold'] = -1
    for fold, (train_idx, val_idx) in enumerate(gkf.split(tr_data, tr_data.label)):
        tr_data.loc[val_idx, 'fold'] = fold

    for fold in folds:
        valid_df = tr_data[tr_data['fold'] == fold]
        train_df = tr_data[tr_data['fold'] != fold]

        image_size = 448
        batch_size = 16

        train_df = train_df.sample(frac=1, random_state=random_seed).reset_index(drop=True)
        valid_df = valid_df.sample(frac=1, random_state=random_seed).reset_index(drop=True)

        train_dataset = CropDataset(
            df=train_df, image_size=image_size, mode='train', f=0.7)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, 
                                  sampler=RandomSampler(train_dataset), num_workers = 4, drop_last=True)
        valid_dataset = CropDataset(
            df=valid_df, image_size=image_size, mode='valid', f=1)
        valid_loader = DataLoader(valid_dataset, batch_size=batch_size, 
                                  sampler=RandomSampler(valid_dataset), num_workers = 4, drop_last=True)

        device = torch.device("cuda:0")
        model = CNN2RNN_Network(net,512,image_size)
        model = model.to(device)
        model_ft = model.feature_extract_model
        ct = 0
        for child in model_ft.children():
            ct += 1
            if ct < 8:
                for param in child.parameters():
                    param.requires_grad = False


        epochs = 30
        aux_epochs = 50
        learning_rate = 0.001
        save_path = './models/resnet_bestmodel_fold{}_{}.pt'.format(fold, net)
        
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, aux_epochs)
        criterion1 = nn.BCEWithLogitsLoss()
        criterion2 = nn.L1Loss()

        loss_plot, val_loss_plot = [], []
        patience = 0
        for epoch in range(epochs):
            total_loss, total_loss2, total_val_loss, total_val_loss2 = 0, 0, 0, 0
            total_correct, total_val_correct = 0, 0

            len_train = len(train_loader)
            tqdm_dataset = tqdm(enumerate(train_loader), total=len_train)
            training = True
            for batch, batch_item in tqdm_dataset:
                batch_loss, batch_loss2, correct = train_step(batch_item, epoch, batch, training, scheduler)
                total_loss += batch_loss
                total_loss2 += batch_loss2
                total_correct += correct

                tqdm_dataset.set_postfix({
                    'Epoch': epoch + 1,
                    'LR' : '{}'.format(round(optimizer.param_groups[0]['lr'],6)),
                    'Total Loss' : '{}'.format(round(total_loss/(batch+1),4)),
                    'Total Loss2' : '{}'.format(round(total_loss2/(batch+1),4)),
                    'Mean Accuracy' : '{}'.format(round(total_correct/((batch+1)*batch_size),4))
                })

            loss_plot.append(total_loss/(batch+1))

            len_val = len(valid_loader)
            tqdm_dataset = tqdm(enumerate(valid_loader), total=len_val)
            training = False
            for batch, batch_item in tqdm_dataset:
                batch_loss, batch_loss2, correct = train_step(batch_item, epoch, batch, training, scheduler)
                total_val_loss += batch_loss
                total_val_correct += correct
                total_val_loss2 += batch_loss2
                tqdm_dataset.set_postfix({
                    'Epoch': epoch + 1,
                    'Total Val Loss' : '{}'.format(round(total_val_loss/(batch+1),4)),
                    'Total Val Loss2' : '{}'.format(round(total_val_loss2/(batch+1),4)),
                    'Mean Val Accuracy' : '{}'.format(round(total_val_correct/((batch+1)*batch_size),4))
                })
            val_loss_plot.append(total_val_loss/(batch+1))

            if np.min(val_loss_plot) == val_loss_plot[-1]:
                patience = 0
                torch.save(model, save_path)
            if np.min(val_loss_plot) < val_loss_plot[-1]:
                patience += 1

            if patience >= p:
                break



100%|██████████| 1022/1022 [03:15<00:00,  5.22it/s, Epoch=1, LR=0.000594, Total Loss=2.898, Total Loss2=2.8037, Mean Accuracy=0.9595] 
100%|██████████| 365/365 [01:37<00:00,  3.74it/s, Epoch=1, Total Val Loss=2.146, Total Val Loss2=2.0613, Mean Val Accuracy=0.9635] 
100%|██████████| 1022/1022 [03:16<00:00,  5.20it/s, Epoch=2, LR=3.5e-05, Total Loss=2.0416, Total Loss2=1.9828, Mean Accuracy=0.9757] 
100%|██████████| 365/365 [01:37<00:00,  3.74it/s, Epoch=2, Total Val Loss=1.6938, Total Val Loss2=1.6521, Mean Val Accuracy=0.983] 
100%|██████████| 1022/1022 [03:16<00:00,  5.21it/s, Epoch=3, LR=0.000232, Total Loss=1.8433, Total Loss2=1.789, Mean Accuracy=0.9769] 
100%|██████████| 365/365 [01:37<00:00,  3.74it/s, Epoch=3, Total Val Loss=1.4004, Total Val Loss2=1.3659, Mean Val Accuracy=0.9854]
100%|██████████| 1022/1022 [03:15<00:00,  5.22it/s, Epoch=4, LR=0.000864, Total Loss=1.6391, Total Loss2=1.5937, Mean Accuracy=0.9812]
100%|██████████| 365/365 [01:37<00:00,  3.75it/s, Epoch=4, Total



100%|██████████| 1022/1022 [02:45<00:00,  6.16it/s, Epoch=1, LR=0.000594, Total Loss=2.9461, Total Loss2=2.8484, Mean Accuracy=0.9598]
100%|██████████| 365/365 [01:01<00:00,  5.91it/s, Epoch=1, Total Val Loss=1.9148, Total Val Loss2=1.8524, Mean Val Accuracy=0.9777]
100%|██████████| 1022/1022 [02:45<00:00,  6.16it/s, Epoch=2, LR=3.5e-05, Total Loss=2.0773, Total Loss2=2.0141, Mean Accuracy=0.9746] 
100%|██████████| 365/365 [01:00<00:00,  6.01it/s, Epoch=2, Total Val Loss=1.8208, Total Val Loss2=1.7617, Mean Val Accuracy=0.9779]
100%|██████████| 1022/1022 [02:36<00:00,  6.54it/s, Epoch=3, LR=0.000232, Total Loss=1.8939, Total Loss2=1.8406, Mean Accuracy=0.9794]
100%|██████████| 365/365 [01:00<00:00,  6.07it/s, Epoch=3, Total Val Loss=1.4961, Total Val Loss2=1.45, Mean Val Accuracy=0.9851]  
100%|██████████| 1022/1022 [02:36<00:00,  6.53it/s, Epoch=4, LR=0.000864, Total Loss=1.6901, Total Loss2=1.6404, Mean Accuracy=0.9811]
100%|██████████| 365/365 [01:00<00:00,  6.05it/s, Epoch=4, Total

In [9]:
class CropTestDataset(Dataset):
    def __init__(self, df, image_size, mode):
        super().__init__()
        self.df = df
        assert mode in ["train", "valid", "test"] 
        self.mode = mode
        self.image_size = image_size
        
        self.transform = albu.Compose([
            albu.Resize(self.image_size, self.image_size),
            albu.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
            ToTensorV2(),
        ], additional_targets={'image1': 'image'})

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        before_img_path = self.df.loc[index, 'before_file_path']
        after_img_path = self.df.loc[index, 'after_file_path']

        before_img = cv2.imread(before_img_path)
        after_img = cv2.imread(after_img_path)
        
        aux_input1 = torch.FloatTensor([self.df.loc[index, "cls"]])

        file_size1 = os.path.getsize(before_img_path)
        file_size2 = os.path.getsize(after_img_path)
        aux_input2 = torch.FloatTensor([file_size2/10000 - file_size1/10000])
        
        transformed = self.transform(image=before_img, image1=after_img)
        
        before_img = transformed['image'] 
        after_img = transformed['image1']

        indice = torch.FloatTensor([self.df.loc[index, 'idx']])
        
        return before_img, after_img, aux_input1, aux_input2, indice

In [14]:
te_path = "../test_dataset/"
te_file = "test_data.csv"
te_data = pd.read_csv(os.path.join(te_path,te_file))
te_data["before_file_path"] = te_data["before_file_path"].apply(lambda x: str(x) + ".png")
te_data["before_file_path"] = te_data["before_file_path"].apply(lambda x: os.path.join("../test_dataset",
                                                               str(x).split("_")[1],
                                                               str(x).split("_")[2],
                                                               "resized512",
                                                               str(x)))

te_data["after_file_path"] = te_data["after_file_path"].apply(lambda x: str(x)+".png")
te_data["after_file_path"] = te_data["after_file_path"].apply(lambda x: os.path.join("../test_dataset",
                                                               str(x).split("_")[1],
                                                               str(x).split("_")[2],
                                                                "resized512",
                                                               str(x)))
te_data['cls']= np.int_(te_data['before_file_path'].apply(lambda x: x.split("/")[2]) == "BC")

image_size = 448
batch_size = 2

test_dataset = CropTestDataset(df=te_data, image_size=image_size, mode='test')
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=False, num_workers=4)

In [15]:
model_path = "./models/"
device = torch.device("cuda:0")
folds = [0,1,2,3,4]

mean_pred = []

for net in nets:
    outs = []
    idxs = []
    for fold in folds:
        model = torch.load(model_path + 'resnet_bestmodel_fold{}_{}.pt'.format(str(fold),net))
        inf = []
        idx = []
        len_test = len(test_loader)
        tqdm_dataset = tqdm(test_loader, total=len_test)
        for before_img, after_img, aux_input1, aux_input2, indice in tqdm_dataset:
            before_img = before_img.to(device)
            after_img = after_img.to(device)
            aux_input1 = aux_input1.to(device)
            aux_input2 = aux_input2.to(device)
            img = torch.stack([before_img, after_img])
            model.eval()
            with torch.no_grad():
                output = model(img, aux_input1, aux_input2)
                _, delta = output
                delta = delta.detach().cpu().numpy()
            inf.append(delta)
            idx.append(indice)
        outs.append(np.concatenate(inf))
        idxs.append(np.concatenate(idx))
    out_pred = np.array(outs).mean(axis=0)
    idx = np.array(idxs).mean(axis=0)
    mean_pred.append(out_pred)

100%|██████████| 1980/1980 [01:31<00:00, 21.55it/s]
100%|██████████| 1980/1980 [01:29<00:00, 22.05it/s]
100%|██████████| 1980/1980 [01:30<00:00, 21.95it/s]
100%|██████████| 1980/1980 [01:30<00:00, 21.85it/s]
100%|██████████| 1980/1980 [01:30<00:00, 21.91it/s]
100%|██████████| 1980/1980 [00:54<00:00, 36.25it/s]
100%|██████████| 1980/1980 [00:53<00:00, 37.03it/s]
100%|██████████| 1980/1980 [00:53<00:00, 37.33it/s]
100%|██████████| 1980/1980 [00:53<00:00, 37.23it/s]
100%|██████████| 1980/1980 [00:53<00:00, 37.33it/s]


In [16]:
sub = pd.read_csv("../sample_submission.csv")
sub['time_delta'] = np.mean(mean_pred, axis= 0)

In [17]:
sub.to_csv("sample_submission.csv", index=False)