# **Cassava Ensemble prediction**
2021/01/21 written by T.Yonezu

In [1]:
if False:
    !pip install "../input/efficientnet-pytorch-07/efficientnet_pytorch-0.7.0"

In [16]:
%load_ext autoreload
%autoreload 2

import torch
from torch.utils.data import DataLoader, Dataset

import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import glob 
import os
from tqdm import tqdm

from cassava_dataset import *

import warnings
warnings.simplefilter('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
INPUT_DIR = os.path.join('..',"..", 'input')

In [18]:
x = np.atleast_2d(glob.glob(os.path.join(INPUT_DIR, 'cassava-leaf-disease-classification', 'train_images',"*.jpg")))
x = x.T
x = pd.DataFrame(columns=["image_path"], data=x)
tmp = x["image_path"].str.split(os.path.sep,expand=True)
x["image_id"] = tmp[len(tmp.columns)-1]
x["label"] = np.nan

test_dict = dict( zip(x["image_path"],x["label"]) )

In [19]:
models_info = pd.read_csv(os.path.join(INPUT_DIR,"cassava-models","models-info.csv"))
models_info = models_info.sort_values("public_score")[::-1]
models_info = models_info[["file_name","nn_name","image_width","image_height"]]
use_models = models_info

print("##### use_models #####")
display(use_models)

##### use_models #####


Unnamed: 0,file_name,nn_name,image_width,image_height
0,ResNeXt50_30x4d_cassava(512x512)_EqualizedLabe...,resnext50_32x4d,512,512
1,ResNet50_cassava(800x600)_EqualizedLabel_finet...,resnet50,800,600
3,ResNet50_cassava(512x512)_EqualizedLabel_finet...,resnet50,512,512
2,VGG11_cassava(512x512)_EqualizedLabel_finetune...,vgg11,512,512
8,ResNet50_cassava_EqualizedLabel_finetuned_10Ep...,resnet50,224,224
4,ResNet50_cassava(512x512)_EqualizedLabel_finet...,resnet50,512,512
9,VGG11_cassava_EqualizedLabel_finetuned_50Epoch...,vgg11,224,224
7,ResNet50_32x4d_cassava_EqualizedLabel_finetune...,resnext50_32x4d,224,224
5,EfficientNet-b3_cassava_EqualizedLabel_finetun...,efficientnet-b3,224,224
6,EfficientNet-b7_cassava_EqualizedLabel_finetun...,efficientnet-b7,224,224


In [20]:
MODEL_DIR = os.path.join(INPUT_DIR,"cassava-models")

In [21]:
from torch import nn
from efficientnet_pytorch import EfficientNet
import torchvision.models as models

all_outputs = []
for index,file_name,nn_name,img_w,img_h in use_models.itertuples():
    
    # load model
    if nn_name == "vgg11":
        model = models.vgg11(pretrained=True)
        model.classifier[6] = nn.Linear(in_features=4096, out_features=5, bias=True)
    
    if nn_name == "efficientnet-b7":
        model = EfficientNet.from_name("efficientnet-b7")
        model._fc = nn.Linear(in_features=2560, out_features=5, bias=True)

    if nn_name == "efficientnet-b3":
        model = EfficientNet.from_name("efficientnet-b3")
        model._fc = nn.Linear(in_features=1536, out_features=5, bias=True)

    if nn_name == "resnext50_32x4d":
        model = models.resnext50_32x4d(pretrained=True)
        model.fc = nn.Linear(in_features=2048, out_features=5, bias=True)

    if nn_name == "resnet50":
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(in_features=2048, out_features=5, bias=True)
    
    PATH = os.path.join(MODEL_DIR,file_name)
    model.load_state_dict(torch.load(PATH))
    
    
    # make test_dataloader
    size = (img_w,img_h)
    mean = [0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    transform = ImageTransform(size,mean,std)
    test_dataloader = DataLoader(dataset = CassavaDataset(test_dict,transform=transform),
                                 batch_size = 1)

    output = cassava_predict_proba(model=model,
                                   test_dataloader=test_dataloader,
                                   sub_df=x,
                                   device="cuda")
    output["file_name"] = file_name
    output["nn_name"] = nn_name
    
    all_outputs.append(output)
    
all_outputs = pd.concat(all_outputs)
display(all_outputs)

  1%|▊                                                                             | 208/21397 [00:07<12:11, 28.98it/s]


KeyboardInterrupt: 

In [None]:
cols = ["class0_proba",
        "class1_proba",
        "class2_proba",
        "class3_proba",
        "class4_proba"]

pred = all_outputs.groupby("image_id")[cols].mean()
pred = pred.reset_index()
sub_df = x

sub_df = pd.merge(sub_df,pred,on="image_id")
sub_df["pred_label"] = sub_df[cols].values.argmax(axis=1)
sub_df = sub_df[["image_id","pred_label"]]
sub_df = sub_df.rename(columns={"pred_label":"label"})

sub_df["label"] = sub_df["label"].astype("int64")
sub_df.to_csv("submission.csv",index=False)

sub_df