# Import

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
import math
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
from  PIL import Image
import os
import random
from tqdm import tqdm
import timm

import sklearn
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_validate
from sklearn.metrics import classification_report, confusion_matrix

# Training ML model

In [None]:
df_all = pd.read_csv('../input/my-csv/all_pred.csv')
x = df_all.iloc[:, :-1]
y = df_all.iloc[:, -1]
lr_model = LogisticRegression(multi_class='auto', solver='lbfgs', max_iter=200)
lr_model.fit(x, y)
print('lr model training done')

# Configuration

In [None]:
INPUT_PATH = '../input'
TRAIN_CSV_PATH = '../input/cassava-leaf-disease-classification/train.csv'
TRAIN_IMAGE_PATH = '../input/cassava-leaf-disease-classification/train_images/'
TEST_IMAGE_PATH = '../input/cassava-leaf-disease-classification/test_images/'
SUBMISSION_PATH = 'submission.csv'
DEVICES = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
OUT_FEATURES = 5

# Generate Test Aug

In [None]:
def get_test_aug(image_size):
    test_augs = A.Compose([
        A.OneOf([
            A.Resize(image_size, image_size, p=1.0),
            A.CenterCrop(image_size, image_size, p=1.0),
            A.RandomResizedCrop(image_size, image_size, p=1.0)
        ], p=1.0),
        A.Transpose(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Resize(image_size, image_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
        ], p=1.0
    )
    return test_augs

# Seed Everything

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# Inference Function

In [None]:
def inference(model, model_param_path, test_augs, df_path, tta):
    preds_1 = []
    names_1 = []
    model.eval()
    model_param = torch.load(model_param_path)
    new_model_param = {k[7:]: v for k, v in model_param.items() if 'module.' in k}
    model.load_state_dict(new_model_param)
    model = nn.DataParallel(model).to(DEVICES[0])
    test_image_list = np.asarray([image_name for image_name in os.listdir(TEST_IMAGE_PATH)])
    for single_image_name in test_image_list:
        names_1.append(single_image_name)
        with torch.no_grad():
            ans = torch.zeros(5).cuda()
            for _ in range(tta):
                image = Image.open(os.path.join(TEST_IMAGE_PATH, single_image_name))
                aug_image = test_augs(image=np.array(image))['image']
                test_image = torch.tensor(aug_image, dtype=torch.float).unsqueeze(0).cuda()
                ans += model(test_image).view(ans.shape)
            ans /= tta
            preds_1.append(ans)
    predictions_1 = torch.stack(preds_1, dim=0).to('cpu')
    normalize_pred_1 = F.normalize(predictions_1.T, p=2, dim=0).T
    my_array = np.array(normalize_pred_1)
    my_df = pd.DataFrame(my_array)
    name_array = np.array(names_1)
    name_df = pd.DataFrame(name_array)
    my_df = pd.concat([name_df, my_df], axis=1, ignore_index=True)
    torch.cuda.empty_cache()
    return my_df

# model_1 EfficientNetB4 Noisy-Student

In [None]:
B4NS_PATH = '../input/ensemble-1023/1022_b4ns.pth'
SEED = 42
IMAGE_SIZE = 512
TTA = 8

In [None]:
model_name_1 = "tf_efficientnet_b4_ns"
my_model_1 = timm.create_model(model_name_1, pretrained=False)
my_model_1.classifier = nn.Linear(my_model_1.classifier.in_features, OUT_FEATURES)

In [None]:
seed_everything(SEED)
test_augs = get_test_aug(image_size=IMAGE_SIZE)
df_1 = inference(my_model_1, B4NS_PATH, test_augs, 'b4ns.csv', TTA)

# model_2 ResNeXt50_32x4d

In [None]:
RES50_PATH = '../input/ensemble-1023/1022_res50.pth'
SEED = 42
IMAGE_SIZE = 512
TTA = 1

In [None]:
model_name2= "resnext50_32x4d"
my_model_2 = timm.create_model(model_name2, pretrained=False)
my_model_2.fc = nn.Linear(my_model_2.fc.in_features, OUT_FEATURES)


In [None]:
seed_everything(SEED)
test_augs = get_test_aug(image_size=IMAGE_SIZE)
df_2 = inference(my_model_2, RES50_PATH, test_augs, 'res50.csv', TTA)

# model_3 EfficientNetB5 Noisy-Student

In [None]:
B5NS_PATH = '../input/ensemble-1026/1026_b5ns.pth'
SEED = 43
IMAGE_SIZE = 384
TTA = 8

In [None]:
model_name3 = "tf_efficientnet_b5_ns"
my_model_3 = timm.create_model(model_name3, pretrained=False)
my_model_3.classifier = nn.Linear(my_model_3.classifier.in_features, OUT_FEATURES)

In [None]:
seed_everything(SEED)
test_augs = get_test_aug(image_size=IMAGE_SIZE)
df_3 = inference(my_model_3, B5NS_PATH, test_augs, 'b5ns.csv', TTA)

# model_4 ResNeXt50d_32x4d

In [None]:
RES50D_PATH = '../input/ensemble-1026/1026_res50d.pth'
SEED = 43
IMAGE_SIZE = 512
TTA = 1

In [None]:
model_name4 = "resnext50d_32x4d"
my_model_4 = timm.create_model(model_name4, pretrained=False)
my_model_4.fc = nn.Linear(my_model_4.fc.in_features, OUT_FEATURES)

In [None]:
seed_everything(SEED)
test_augs = get_test_aug(image_size=IMAGE_SIZE)
df_4 = inference(my_model_4, RES50D_PATH, test_augs, 'res50d.csv', TTA)

# model_5 SEResNeXt50_32x4d

In [None]:
# SERES50D_PATH = '../input/1029-ensemble/1029_resnext50.pth'
# SEED = 23
# IMAGE_SIZE = 512
# TTA = 1

In [None]:
# model_name5 = "seresnext50_32x4d"
# my_model_5 = timm.create_model(model_name5, pretrained=False)
# my_model_5.fc = nn.Linear(my_model_5.fc.in_features, OUT_FEATURES)

In [None]:
# seed_everything(SEED)
# test_augs = get_test_aug(image_size=IMAGE_SIZE)
# inference(my_model_5, SERES50D_PATH, test_augs, 'seres50.csv', TTA)

# Ensemble

In [None]:
df_image = df_1.iloc[:, 0]
df_1_ = df_1.iloc[:, 1:]
df_2_ = df_2.iloc[:, 1:]
df_3_ = df_3.iloc[:, 1:]
df_4_ = df_4.iloc[:, 1:]
df_all = pd.concat([df_1_, df_2_, df_3_, df_4_], axis=1)
x = df_all.iloc[:, :]
preds = lr_model.predict(x)
df_preds = pd.DataFrame(preds)
df_submission = pd.concat([df_image, df_preds], axis=1)
df_submission.columns =['image_id', 'label']
df_submission.to_csv(SUBMISSION_PATH, index=False)