### Imports and Jupyter setup

In [None]:
%load_ext autoreload
%autoreload 2

import os
import time
import tqdm
import torch
import wandb
import numpy as np
import pandas as pd
import torch.nn as nn

from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from timm.scheduler import CosineLRScheduler
from sklearn.metrics import f1_score, accuracy_score, top_k_accuracy_score

os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pd.set_option('display.max_columns', None)
device

### Custom Imports

In [None]:
from fgvc.utils.datasets import TrainDataset
from fgvc.utils.augmentations import test_transforms
# from fgvc.utils.utils import timer, init_logger, , 

from fgvc.utils.utils import timer, init_logger, seed_everything, getModel

In [None]:
!nvidia-smi

### Load Dataset Metadata

In [None]:
train_metadata = pd.read_csv("../../metadata/PlantCLEF2018_train_metadata.csv")
val_metadata = pd.read_csv("../../metadata/PlantCLEF2018_val_metadata.csv")


PlantCLEF2017_test = pd.read_csv("../../metadata/PlantCLEF2017_test_metadata.csv")
PlantCLEF2018_test = pd.read_csv("../../metadata/PlantCLEF2018_test_metadata.csv")


expert_subset = pd.read_csv("../../metadata/MediaId_ObservationId_ManVsMachineSubPart_ExpertCLEF2018.csv", sep=';', names=['MediaId', 'ObservationId'])
expert_subset = PlantCLEF2018_test[PlantCLEF2018_test['MediaId'].isin(expert_subset.MediaId)]

print(f'Number of samples in PlantCLEF2017_test: {len(PlantCLEF2017_test)}')
print(f'Number of samples in PlantCLEF2018_test: {len(PlantCLEF2018_test)}')
print(f'Number of samples in PlantCLEF2018_expert_test: {len(expert_subset)}')

In [None]:
PlantCLEF2017_test['image_path'] = PlantCLEF2017_test['image_path'].apply(lambda x: x.replace('/local/nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))

PlantCLEF2018_test['image_path'] = PlantCLEF2018_test['image_path'].apply(lambda x: x.replace('/local/nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))

expert_subset['image_path'] = expert_subset['image_path'].apply(lambda x: x.replace('/local/nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))

### Training Parameters

In [None]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

config = {"augmentations": 'light-random_crop',
           "optimizer": 'SGD',
           "scheduler": 'cyclic_cosine',
           "image_size": (224, 224),
           "random_seed": 777,
           "number_of_classes": len(train_metadata['class_id'].unique()),
           "architecture": 'vit_base_patch32_224',
           "batch_size": 32,
           "accumulation_steps": 4,
           "epochs": 100,
           "learning_rate": 0.01,
           "dataset": 'PlantCLEF2018',
           "loss": 'CrossEntropyLoss',
           "training_samples": len(train_metadata),
           "valid_samples": len(val_metadata),
           "workers": 12,
           }

RUN_NAME = f"{config['architecture']}-{config['optimizer']}-{config['scheduler']}-{config['augmentations']}"

### Fix Seeds

In [None]:
seed_everything(config['random_seed'])

### Init Model

In [None]:
# %%
model = getModel(config['architecture'], config['number_of_classes'], pretrained=True)
model_mean = list(model.default_cfg['mean'])
model_std = list(model.default_cfg['std'])

model.load_state_dict(torch.load('./vit_base_patch32_224-SGD-cyclic_cosine-light-random_crop-100E.pth'))

In [None]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

vanilla_augmentations = test_transforms(data='vanilla', image_size=config['image_size'], mean=model_mean, std=model_std)    
crop_augmentations = test_transforms(data='center_crop', image_size=config['image_size'], mean=model_mean, std=model_std)    

PlantCLEF2017_test_dataset_vanilla = TrainDataset(PlantCLEF2017_test, transform=vanilla_augmentations)
PlantCLEF2017_test_dataset_crop = TrainDataset(PlantCLEF2017_test, transform=crop_augmentations)

PlantCLEF2018_test_dataset_vanilla = TrainDataset(PlantCLEF2018_test, transform=vanilla_augmentations)
PlantCLEF2018_test_dataset_crop = TrainDataset(PlantCLEF2018_test, transform=crop_augmentations)

expert_test_dataset_vanilla = TrainDataset(expert_subset, transform=vanilla_augmentations)
expert_test_dataset_crop = TrainDataset(expert_subset, transform=crop_augmentations)




PlantCLEF2017_test_loader_vanilla = DataLoader(PlantCLEF2017_test_dataset_vanilla, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

PlantCLEF2017_test_loader_crop = DataLoader(PlantCLEF2017_test_dataset_crop, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])



PlantCLEF2018_test_loader_vanilla = DataLoader(PlantCLEF2018_test_dataset_vanilla, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

PlantCLEF2018_test_loader_crop = DataLoader(PlantCLEF2018_test_dataset_crop, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])


expert_test_loader_vanilla = DataLoader(expert_test_dataset_vanilla, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

expert_test_loader_crop = DataLoader(expert_test_dataset_crop, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

In [None]:
model.to(device)
model.eval()

print(f'Model Loaded and set to Eval mode.')

In [None]:
# from fgvc.utils.performance import test_loop

###  PlantCLEF 2017

In [None]:
vanilla_accuracy, vanilla_max_logit_obs_acc, vanilla_mean_softmax_obs_acc = test_loop(PlantCLEF2017_test, PlantCLEF2017_test_loader_vanilla, model, device)
crop_accuracy, crop_max_logit_obs_acc, crop_mean_softmax_obs_acc = test_loop(PlantCLEF2017_test, PlantCLEF2017_test_loader_crop, model, device)

In [None]:
print('Vanilla Accuracy:', np.round(vanilla_accuracy * 100, 2))
print('Vanilla Obs. Accuracy (max logit):', np.round(vanilla_max_logit_obs_acc * 100, 2))
print('Vanila Obs. Accuracy (mean softmax):', np.round(vanilla_mean_softmax_obs_acc * 100, 2))
print('------------------------------------')
print('Crop Accuracy:', np.round(crop_accuracy * 100, 2))
print('Crop Obs. Accuracy (max logit):', np.round(crop_max_logit_obs_acc * 100, 2))
print('Crop Obs. Accuracy (mean softmax):', np.round(crop_mean_softmax_obs_acc * 100, 2))

### PlantCLEF 2018

In [None]:
vanilla_accuracy, vanilla_max_logit_obs_acc, vanilla_mean_softmax_obs_acc = test_loop(PlantCLEF2018_test, PlantCLEF2018_test_loader_vanilla, model, device)
crop_accuracy, crop_max_logit_obs_acc, crop_mean_softmax_obs_acc = test_loop(PlantCLEF2018_test, PlantCLEF2018_test_loader_crop, model, device)

In [None]:
print('Vanilla Accuracy:', np.round(vanilla_accuracy * 100, 2))
print('Vanilla Obs. Accuracy (max logit):', np.round(vanilla_max_logit_obs_acc * 100, 2))
print('Vanila Obs. Accuracy (mean softmax):', np.round(vanilla_mean_softmax_obs_acc * 100, 2))
print('------------------------------------')
print('Crop Accuracy:', np.round(crop_accuracy * 100, 2))
print('Crop Obs. Accuracy (max logit):', np.round(crop_max_logit_obs_acc * 100, 2))
print('Crop Obs. Accuracy (mean softmax):', np.round(crop_mean_softmax_obs_acc * 100, 2))

### PlantCLEF 2018 - Experts

In [None]:
vanilla_accuracy, vanilla_max_logit_obs_acc, vanilla_mean_softmax_obs_acc = test_loop(expert_subset, expert_test_loader_vanilla, model, device)
crop_accuracy, crop_max_logit_obs_acc, crop_mean_softmax_obs_acc = test_loop(expert_subset, expert_test_loader_crop, model, device)

In [None]:
print('Vanilla Accuracy:', np.round(vanilla_accuracy * 100, 2))
print('Vanilla Obs. Accuracy (max logit):', np.round(vanilla_max_logit_obs_acc * 100, 2))
print('Vanila Obs. Accuracy (mean softmax):', np.round(vanilla_mean_softmax_obs_acc * 100, 2))
print('------------------------------------')
print('Crop Accuracy:', np.round(crop_accuracy * 100, 2))
print('Crop Obs. Accuracy (max logit):', np.round(crop_max_logit_obs_acc * 100, 2))
print('Crop Obs. Accuracy (mean softmax):', np.round(crop_mean_softmax_obs_acc * 100, 2))

In [None]:
import tqdm
import torch

import numpy as np

from scipy import stats
from sklearn.metrics import accuracy_score


def max_logits_performance(test_metadata):
    
    test_metadata['max_logits'] = [np.max(row) for row in test_metadata['logits']]
    test_metadata['observation_max'] = None

    ObservationIds = test_metadata.ObservationId.unique()

    for obs_id in ObservationIds:
        obs_images = test_metadata[test_metadata['ObservationId'] == obs_id]
        max_index =  obs_images.index[np.argmax(np.array(obs_images['max_logits']))]
        for index, pred in obs_images.iterrows():
            test_metadata.at[index, 'observation_max'] = test_metadata['preds'][max_index]
    
    test_metadata_obs = test_metadata.drop_duplicates(subset=['ObservationId'])
    max_logits_accuracy = accuracy_score(test_metadata_obs['class_id'], test_metadata_obs['observation_max'].astype('int32'))
    
    return max_logits_accuracy
    
    
def mean_softmax_performance(test_metadata):
    
    test_metadata['observation_mean'] = None

    ObservationIds = test_metadata.ObservationId.unique()

    for obs_id in ObservationIds:
        obs_images = test_metadata[test_metadata['ObservationId'] == obs_id]

        max_index =  np.argmax(sum(obs_images['logits']))
        for index, pred in obs_images.iterrows():
            test_metadata.at[index, 'observation_mean'] = max_index
    
    test_metadata_obs = test_metadata.drop_duplicates(subset=['ObservationId'])
    mean_softmax_accuracy = accuracy_score(test_metadata_obs['class_id'], test_metadata_obs['observation_mean'].astype('int32'))
    
    return mean_softmax_accuracy
    
    
def observation_performance(test_metadata):
    
    max_logits_accuracy = max_logits_performance(test_metadata)
    mean_softmax_accuracy = mean_softmax_performance(test_metadata)
    
    return max_logits_accuracy, mean_softmax_accuracy


def test_loop(test_metadata, test_loader, model, device):
    
    preds = np.zeros((len(test_metadata)))
    preds_raw = []
    wrong_paths = []

    for i, (images, _, _) in tqdm.tqdm(enumerate(test_loader), total=len(test_loader)):

        images = images.to(device)

        with torch.no_grad():
            y_preds = model(images)
        preds[i * len(images): (i+1) * len(images)] = y_preds.argmax(1).to('cpu').numpy()
        preds_raw.extend(y_preds.to('cpu').numpy())

    
    test_metadata['logits'] = preds_raw
    test_metadata['preds'] = preds

    
    accuracy = accuracy_score(test_metadata['class_id'], test_metadata['preds'])
    
    max_logit_obs_acc, mean_softmax_obs_acc = observation_performance(test_metadata)
    
    return accuracy, max_logit_obs_acc, mean_softmax_obs_acc

### Validation Performance

In [None]:
val_metadata['image_path'] = val_metadata['image_path'].apply(lambda x: x.replace('../../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))
val_metadata['image_path'] = val_metadata['image_path'].apply(lambda x: x.replace('../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))

val_dataset = TrainDataset(val_metadata, transform=vanilla_augmentations)


val_loader = DataLoader(val_dataset, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])




In [None]:
val_metadata

In [None]:
preds = np.zeros((len(val_metadata)))
preds_raw = []
wrong_paths = []

for i, (images, _, _) in tqdm.tqdm(enumerate(val_loader), total=len(val_loader)):

    images = images.to(device)

    with torch.no_grad():
        y_preds = model(images)
    preds[i * len(images): (i+1) * len(images)] = y_preds.argmax(1).to('cpu').numpy()
    preds_raw.extend(y_preds.to('cpu').numpy())

In [None]:
val_metadata['logits'] = preds_raw
val_metadata['preds'] = preds
val_metadata['max_probability'] = [np.max(softmax(row)) for row in val_metadata['logits']]


accuracy = accuracy_score(val_metadata['class_id'], val_metadata['preds'])

In [None]:
from scipy.special import softmax

In [None]:
val_metadata['max_probability'] = [np.max(softmax(row)) for row in val_metadata['logits']]

In [None]:
val_metadata['max_probability'].hist(bins=20)

In [None]:
def threshold_analysis(test_metadata, performance_threshold: int = 0.50, performance_step: int = 0.10):
    class_tresholds = {}
    classified_documents = 0
    for class_id in sorted(test_metadata.class_id.unique()):

        for threshold in np.arange(0.0, 1.0, performance_step):

            class_metadata = test_metadata[test_metadata.class_id == class_id]
            tmp = class_metadata[class_metadata['max_probability'] >= threshold]
            if len(tmp) != 0:
                vanilla_accuracy = accuracy_score(tmp['class_id'], tmp['preds'])

                if performance_threshold <= vanilla_accuracy:
                    class_tresholds[class_id] = threshold
                    num_documents = len(tmp[tmp['max_probability'] >= threshold])
                    if len(class_metadata) != 0:
                        doc_fraction = num_documents / len(class_metadata)
                    else:
                        doc_fraction = 0

                    classified_documents += num_documents

                    #print(f'Threshold for class {class_id_2_doc_type[class_id]} is {round(threshold * 100,2)}. Achieved accuracy of {vanilla_accuracy} for {round(doc_fraction * 100,2)} of files.')
                    break                
        else:
            class_tresholds[class_id] = 1.0
            #print(f'No threshold for class {class_id_2_doc_type[class_id]}.')

    return class_tresholds, classified_documents / len(test_metadata)

In [None]:
class_tresholds, fraction = threshold_analysis(val_metadata, 0.1)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(class_tresholds.values(), 20)

In [None]:
performance_step = 0.1

fractions = []
accuracies = []

for performance_threshold in tqdm.tqdm(np.arange(0.0, 1.0, performance_step), total=1/performance_step):

    class_tresholds, fraction = threshold_analysis(val_metadata, performance_threshold, performance_step)
    
    class_fractions = []

    for class_id in sorted(val_metadata.class_id.unique()):

        class_metadata = val_metadata[val_metadata.class_id == class_id]
        tmp = class_metadata[class_metadata['max_probability'] >= class_tresholds[class_id]]
        class_fractions.append(tmp)

    selected_predictions = pd.concat(class_fractions).reset_index().drop(columns=['index', 'Unnamed: 0'])
    vanilla_accuracy = accuracy_score(selected_predictions['class_id'], selected_predictions['preds'])
    
    fractions.append(fraction)
    accuracies.append(vanilla_accuracy)

In [None]:
accuracies

In [None]:
fractions

In [None]:
plt.plot(accuracies, fractions, '-', linewidth=1, markersize=2)
plt.ylabel('Fraction of Classified documents')
plt.xlabel('Overall Accuracy.')
plt.xlim(0.55, 1.0)
plt.ylim(0.4, 1.0)
plt.tight_layout()
plt.savefig('accuracy_to_num.pdf', dpi=200)