In [1]:
from utils.prepare_data import get_loaders
from utils.models import CassavaNet, get_params
from utils.settings import seed_everything
from utils.dataset import CassavaDataset
from utils.transforms import get_train_transforms, get_valid_transforms, get_inference_transforms
from utils.predict import inference_one_epoch, tta_predict

from sklearn.metrics import accuracy_score
from catboost import CatBoostClassifier, Pool


from tqdm.auto import tqdm 
from pathlib import Path
import os
import numpy as np 
import pandas as pd
import torch
import catalyst
import multiprocessing
import collections
from scipy.stats import gmean, hmean

In [2]:
model_name = 'tf_efficientnet_b2_ns'
fold = 1
ROOT = Path(os.getcwd())/ 'cassava-leaf-disease-classification'
OUTPUT_ROOT = ROOT / 'out'
SEED = 2021
seed_everything(SEED)
NUM_CORES = multiprocessing.cpu_count() - 2
BS = 16
img_size=528

train = pd.read_csv(ROOT / 'train_cv7_add.csv')
valid_fold = train[train[f"fold_{fold}"] == 'test']
valid_ds = CassavaDataset(valid_fold, ROOT / 'train/', transforms=get_valid_transforms(img_size = img_size))
infer_ds = CassavaDataset(valid_fold, ROOT / 'train/', transforms=get_inference_transforms(img_size = img_size))
valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=BS, num_workers=NUM_CORES, shuffle=False, pin_memory=False,) 
infer_loader = torch.utils.data.DataLoader(infer_ds, batch_size=BS, num_workers=NUM_CORES, shuffle=False, pin_memory=False,) 



device = catalyst.utils.get_device()
model = CassavaNet(5, model_name).to(device)
logdir = f"{OUTPUT_ROOT}/.logs_{model_name}_{fold}_stage_2_1/checkpoints/"
model_dict = torch.load(f'{logdir}/best.pth', map_location=device)['model_state_dict']
model.load_state_dict(model_dict)

SEED: 2021


<All keys matched successfully>

In [3]:
# from albumentations import (
#     HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90, 
#     Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
#     IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, 
#     IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout,
#     ShiftScaleRotate, CenterCrop, Resize, SmallestMaxSize,
#     RandomSunFlare, RandomShadow, RandomResizedCrop, CoarseDropout, RandomGridShuffle
# )
# from albumentations.pytorch import ToTensorV2

# mean=[0.485, 0.456, 0.406], 
# std=[0.229, 0.224, 0.225]
# n_tta = 4
# # def get_inference_transforms(img_size = 528):
# #     return Compose([
# #             SmallestMaxSize(img_size,interpolation=2, p=1.),
# #             CenterCrop(img_size, img_size, p=1.),
        
# #             OneOf(
# #                 [
# #                 #Transpose(p=1),
# #                 HorizontalFlip(p=0.25),
# #                 #VerticalFlip(p=0.5),
# #                 ShiftScaleRotate(p=0.25),
# #                 HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.25),
# #                 #RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=1),
# #                 CoarseDropout(p=0.25),
# #                 #Cutout(p=1),
# #                 #RandomGridShuffle(grid=(3, 3), p=1)
# #                 ], p=1),
        
# #             Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
# #             ToTensorV2(p=1.0),
# #         ], p=1.)

# train = pd.read_csv(ROOT / 'train_cv7_add.csv')
# valid_fold = train[train[f"fold_{fold}"] == 'test']
# valid_ds = CassavaDataset(valid_fold, ROOT / 'train/', transforms=get_valid_transforms(img_size = img_size))
# infer_ds = CassavaDataset(valid_fold, ROOT / 'train/', transforms=get_inference_transforms(img_size = img_size))
# valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=BS, num_workers=NUM_CORES, shuffle=False, pin_memory=False,) 
# infer_loader = torch.utils.data.DataLoader(infer_ds, batch_size=BS, num_workers=NUM_CORES, shuffle=False, pin_memory=False,) 



# device = catalyst.utils.get_device()
# model = CassavaNet(5, model_name).to(device)
# logdir = f"{OUTPUT_ROOT}/.logs_{model_name}_{fold}_stage_2_1/checkpoints/"
# model_dict = torch.load(f'{logdir}/best.pth', map_location=device)['model_state_dict']
# model.load_state_dict(model_dict)

In [4]:
# def inference_one_epoch(model, data_loader, device):
#     model.eval()

#     image_preds_all = []
#     labels = []
    
#     pbar = tqdm(enumerate(data_loader), total=len(data_loader))
#     for step, (imgs) in pbar:

#         img = imgs[0].to(device).float()

#         labels_batch = imgs[1]

#         image_preds = model(img)   #output = model(input)
#         image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]
#         labels += [labels_batch]
        
#     image_preds_all = np.concatenate(image_preds_all, axis=0)
#     labels = np.concatenate(labels, axis=0)

#     return image_preds_all, labels

In [5]:
# def tta_predict(model, infer_loader, valid_loader, device, func='gmean'):
    
#     tta_preds = []
#     model.eval()

#     with torch.no_grad():

#         no_tta_preds, no_tta_labels = inference_one_epoch(model, valid_loader, device)

#         for _ in range(n_tta):
#             tta, _ = inference_one_epoch(model, infer_loader, device)
#             tta_preds += [tta]
#             tta_preds += [no_tta_preds]

#     if func == 'gmean':
#         tta_preds = gmean(tta_preds, axis=0) 
#     else:
#         tta_preds = np.mean(tta_preds, axis=0) 
#     return tta_preds

In [30]:
tta_preds = tta_predict(4, model, infer_loader, valid_loader, device, func='gmean')

HBox(children=(FloatProgress(value=0.0, max=236.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=236.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=236.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=236.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=236.0), HTML(value='')))




In [31]:
with torch.no_grad():
    no_tta_preds, no_tta_labels = inference_one_epoch(model, valid_loader, device)

HBox(children=(FloatProgress(value=0.0, max=236.0), HTML(value='')))




In [32]:
accuracy_score(no_tta_labels, np.argmax(tta_preds, axis=1))

0.8987509965453095

In [33]:
tta_preds_ = gmean([tta_preds, no_tta_preds], axis=0)

In [34]:
accuracy_score(no_tta_labels, np.argmax(no_tta_preds, axis=1))

0.894764815306936

In [35]:
accuracy_score(no_tta_labels, np.argmax(tta_preds_, axis=1))

0.8982195057135265

gmean

11 0.8963592878022855

9 0.8958277969705023

np.mean

0.8968907786340685

In [12]:
tta_preds.shape

(3763, 5)

In [13]:

fold = 1
train = pd.read_csv(ROOT / 'train_cv7_add.csv')
valid_fold = train[train[f"fold_{fold}"] == 'test']
cat_fold = train[train[f"fold_{fold}"] == 'train']

cat_ds = CassavaDataset(cat_fold, ROOT / 'train/', transforms=get_inference_transforms(img_size = img_size))
valid_ds = CassavaDataset(valid_fold, ROOT / 'train/', transforms=get_valid_transforms(img_size = img_size))
infer_ds = CassavaDataset(valid_fold, ROOT / 'train/', transforms=get_inference_transforms(img_size = img_size))

valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=BS, num_workers=NUM_CORES, shuffle=False, pin_memory=False,) 
infer_loader = torch.utils.data.DataLoader(infer_ds, batch_size=BS, num_workers=NUM_CORES, shuffle=False, pin_memory=False,) 
cat_loader = torch.utils.data.DataLoader(cat_ds, batch_size=BS, num_workers=NUM_CORES, shuffle=False, pin_memory=False,) 


In [14]:
# val_preds = []
# tta_preds = []

# model.eval()

# with torch.no_grad():
#     for _ in tqdm(range(n_tta)):
#         tta, _ = inference_one_epoch(model, cat_loader, device)
#         tta_preds += [tta]

# tta_preds = gmean(tta_preds, axis=0)

In [15]:
device = catalyst.utils.get_device()
model = CassavaNet(5, model_name).to(device)
logdir = f"{OUTPUT_ROOT}/.logs_{model_name}_{fold}_stage_2_1/checkpoints/"
model_dict = torch.load(f'{logdir}/best.pth', map_location=device)['model_state_dict']
model.load_state_dict(model_dict)

val_preds = []
tta_preds = []
n_tta = 13
model.eval()

with torch.no_grad():
    for _ in tqdm(range(n_tta)):
        tta, _ = inference_one_epoch(model, cat_loader, device)
        tta_preds += [tta]

tta_preds = gmean(tta_preds, axis=0)

with torch.no_grad():
    no_tta_preds, tta_labels = inference_one_epoch(model, cat_loader, device)


HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1411.0), HTML(value='')))





KeyboardInterrupt: 

In [None]:
print(accuracy_score(tta_labels, np.argmax(tta_preds, 1))) #
print(accuracy_score(tta_labels, np.argmax(no_tta_preds, 1)))

In [None]:
with torch.no_grad():
    no_tta_preds_val, no_tta_labels_val = inference_one_epoch(model, valid_loader, device)

In [None]:
train_data = tta_preds

train_labels = tta_labels

test_data = Pool(no_tta_preds_val, no_tta_labels_val)

cat_model = CatBoostClassifier(iterations=2000,
                           depth=2,
                           learning_rate=0.01,
                           loss_function='MultiClassOneVsAll', #''MultiClass',
                           early_stopping_rounds=30,
                           #loss_function='CrossEntropy',
                           #verbose=200
                              )
# train the model
cat_model.fit(train_data, train_labels,
    eval_set=(no_tta_preds_val, no_tta_labels_val),
    verbose=False,
    plot=True
             )
# make the prediction using the resulting model
preds_class = cat_model.predict(test_data)
preds_proba = cat_model.predict_proba(test_data)

print(accuracy_score(no_tta_labels_val, preds_class))
print(accuracy_score(no_tta_labels_val, np.argmax(no_tta_preds_val, 1)))

In [None]:
accuracy_score(no_tta_labels_val, preds_class)