### Imports and Jupyter setup

In [1]:
%load_ext autoreload
%autoreload 2

import os
import time
import tqdm
import torch
import wandb
import numpy as np
import pandas as pd
import torch.nn as nn

from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from timm.scheduler import CosineLRScheduler
from sklearn.metrics import f1_score, accuracy_score, top_k_accuracy_score

os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pd.set_option('display.max_columns', None)
device

device(type='cuda')

### Custom Imports

In [2]:
from fgvc.utils.datasets import TrainDataset
from fgvc.utils.augmentations import test_transforms
# from fgvc.utils.utils import timer, init_logger, , 

from fgvc.utils.utils import timer, init_logger, seed_everything, getModel

In [3]:
!nvidia-smi

Thu Apr 28 16:24:33 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 207...  Off  | 00000000:09:00.0 Off |                  N/A |
| 25%   31C    P8     5W / 215W |    277MiB /  7974MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 1080    Off  | 00000000:0A:00.0 Off |                  N/A |
| 38%   68C    P2   188W / 180W |   1278MiB /  8119MiB |     92%      Default |
|       

### Load Dataset Metadata

In [4]:
train_metadata = pd.read_csv("../../metadata/PlantCLEF2018_train_metadata.csv")
val_metadata = pd.read_csv("../../metadata/PlantCLEF2018_val_metadata.csv")


PlantCLEF2017_test = pd.read_csv("../../metadata/PlantCLEF2017_test_metadata.csv")
PlantCLEF2018_test = pd.read_csv("../../metadata/PlantCLEF2018_test_metadata.csv")


expert_subset = pd.read_csv("../../metadata/MediaId_ObservationId_ManVsMachineSubPart_ExpertCLEF2018.csv", sep=';', names=['MediaId', 'ObservationId'])
expert_subset = PlantCLEF2018_test[PlantCLEF2018_test['MediaId'].isin(expert_subset.MediaId)].reset_index().drop(columns=['index'])

print(f'Number of samples in PlantCLEF2017_test: {len(PlantCLEF2017_test)}')
print(f'Number of samples in PlantCLEF2018_test: {len(PlantCLEF2018_test)}')
print(f'Number of samples in PlantCLEF2018_expert_test: {len(expert_subset)}')

Number of samples in PlantCLEF2017_test: 25170
Number of samples in PlantCLEF2018_test: 6892
Number of samples in PlantCLEF2018_expert_test: 216


  val_metadata = pd.read_csv("../../metadata/PlantCLEF2018_val_metadata.csv")


In [5]:
PlantCLEF2017_test['image_path'] = PlantCLEF2017_test['image_path'].apply(lambda x: x.replace('/local/nahouby/Datasets/PlantCLEF/', '/Projects/Data/'))
PlantCLEF2018_test['image_path'] = PlantCLEF2018_test['image_path'].apply(lambda x: x.replace('/local/nahouby/Datasets/PlantCLEF/', '/Projects/Data/'))
expert_subset['image_path'] = expert_subset['image_path'].apply(lambda x: x.replace('/local/nahouby/Datasets/PlantCLEF/', '/Projects/Data/'))

### Training Parameters

In [6]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

config = {"augmentations": 'light',
           "optimizer": 'SGD',
           "scheduler": 'plateau',
           "image_size": (224, 224),
           "random_seed": 777,
           "number_of_classes": len(train_metadata['class_id'].unique()),
           "architecture": 'vit_base_patch16_224',
           "batch_size": 16,
           "accumulation_steps": 8,
           "epochs": 100,
           "learning_rate": 0.01,
           "dataset": 'PlantCLEF2018',
           "loss": 'CrossEntropyLoss',
           "training_samples": len(train_metadata),
           "valid_samples": len(val_metadata),
           "workers": 12,
           }


RUN_NAME = f"{config['architecture']}-{config['optimizer']}-{config['scheduler']}-{config['augmentations']}"

### Fix Seeds

In [7]:
seed_everything(config['random_seed'])

### Init Model

In [8]:
# %%
model = getModel(config['architecture'], config['number_of_classes'], pretrained=True)
model_mean = list(model.default_cfg['mean'])
model_std = list(model.default_cfg['std'])

model.load_state_dict(torch.load('./vit_base_patch16_224-SGD-plateau-light-100E.pth'))

<All keys matched successfully>

In [9]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

vanilla_augmentations = test_transforms(data='vanilla', image_size=config['image_size'], mean=model_mean, std=model_std)    
crop_augmentations = test_transforms(data='center_crop', image_size=config['image_size'], mean=model_mean, std=model_std)    

PlantCLEF2017_test_dataset_vanilla = TrainDataset(PlantCLEF2017_test, transform=vanilla_augmentations)
PlantCLEF2017_test_dataset_crop = TrainDataset(PlantCLEF2017_test, transform=crop_augmentations)

PlantCLEF2018_test_dataset_vanilla = TrainDataset(PlantCLEF2018_test, transform=vanilla_augmentations)
PlantCLEF2018_test_dataset_crop = TrainDataset(PlantCLEF2018_test, transform=crop_augmentations)

expert_test_dataset_vanilla = TrainDataset(expert_subset, transform=vanilla_augmentations)
expert_test_dataset_crop = TrainDataset(expert_subset, transform=crop_augmentations)




PlantCLEF2017_test_loader_vanilla = DataLoader(PlantCLEF2017_test_dataset_vanilla, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

PlantCLEF2017_test_loader_crop = DataLoader(PlantCLEF2017_test_dataset_crop, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])



PlantCLEF2018_test_loader_vanilla = DataLoader(PlantCLEF2018_test_dataset_vanilla, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

PlantCLEF2018_test_loader_crop = DataLoader(PlantCLEF2018_test_dataset_crop, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])


expert_test_loader_vanilla = DataLoader(expert_test_dataset_vanilla, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

expert_test_loader_crop = DataLoader(expert_test_dataset_crop, 
                                               batch_size=config['batch_size'], 
                                               shuffle=False, 
                                               num_workers=config['workers'])

In [10]:
model.to(device)
model.eval()

print(f'Model Loaded and set to Eval mode.')

Model Loaded and set to Eval mode.


In [11]:
from fgvc.utils.performance import test_loop 

###  PlantCLEF 2017

In [12]:
vanilla_accuracy, vanilla_max_logit_obs_acc, vanilla_mean_softmax_obs_acc = test_loop(PlantCLEF2017_test, PlantCLEF2017_test_loader_vanilla, model, device)
crop_accuracy, crop_max_logit_obs_acc, crop_mean_softmax_obs_acc = test_loop(PlantCLEF2017_test, PlantCLEF2017_test_loader_crop, model, device)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1574/1574 [02:34<00:00, 10.17it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1574/1574 [02:35<00:00, 10.10it/s]


In [13]:
print('Vanilla Accuracy:', np.round(vanilla_accuracy * 100, 2))
print('Vanilla Obs. Accuracy (max logit):', np.round(vanilla_max_logit_obs_acc * 100, 2))
print('Vanila Obs. Accuracy (mean softmax):', np.round(vanilla_mean_softmax_obs_acc * 100, 2))
print('------------------------------------')
print('Crop Accuracy:', np.round(crop_accuracy * 100, 2))
print('Crop Obs. Accuracy (max logit):', np.round(crop_max_logit_obs_acc * 100, 2))
print('Crop Obs. Accuracy (mean softmax):', np.round(crop_mean_softmax_obs_acc * 100, 2))

Vanilla Accuracy: 76.08
Vanilla Obs. Accuracy (max logit): 82.48
Vanila Obs. Accuracy (mean softmax): 83.22
------------------------------------
Crop Accuracy: 70.97
Crop Obs. Accuracy (max logit): 79.18
Crop Obs. Accuracy (mean softmax): 80.21


### PlantCLEF 2018

In [14]:
vanilla_accuracy, vanilla_max_logit_obs_acc, vanilla_mean_softmax_obs_acc = test_loop(PlantCLEF2018_test, PlantCLEF2018_test_loader_vanilla, model, device)
crop_accuracy, crop_max_logit_obs_acc, crop_mean_softmax_obs_acc = test_loop(PlantCLEF2018_test, PlantCLEF2018_test_loader_crop, model, device)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 431/431 [00:42<00:00, 10.11it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 431/431 [00:42<00:00, 10.10it/s]


In [15]:
print('Vanilla Accuracy:', np.round(vanilla_accuracy * 100, 2))
print('Vanilla Obs. Accuracy (max logit):', np.round(vanilla_max_logit_obs_acc * 100, 2))
print('Vanila Obs. Accuracy (mean softmax):', np.round(vanilla_mean_softmax_obs_acc * 100, 2))
print('------------------------------------')
print('Crop Accuracy:', np.round(crop_accuracy * 100, 2))
print('Crop Obs. Accuracy (max logit):', np.round(crop_max_logit_obs_acc * 100, 2))
print('Crop Obs. Accuracy (mean softmax):', np.round(crop_mean_softmax_obs_acc * 100, 2))

Vanilla Accuracy: 51.99
Vanilla Obs. Accuracy (max logit): 69.74
Vanila Obs. Accuracy (mean softmax): 74.03
------------------------------------
Crop Accuracy: 37.36
Crop Obs. Accuracy (max logit): 60.71
Crop Obs. Accuracy (mean softmax): 64.38


### PlantCLEF 2018 - Experts

In [16]:
vanilla_accuracy, vanilla_max_logit_obs_acc, vanilla_mean_softmax_obs_acc = test_loop(expert_subset, expert_test_loader_vanilla, model, device)
crop_accuracy, crop_max_logit_obs_acc, crop_mean_softmax_obs_acc = test_loop(expert_subset, expert_test_loader_crop, model, device)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00,  8.69it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00,  8.86it/s]


In [17]:
print('Vanilla Accuracy:', np.round(vanilla_accuracy * 100, 2))
print('Vanilla Obs. Accuracy (max logit):', np.round(vanilla_max_logit_obs_acc * 100, 2))
print('Vanila Obs. Accuracy (mean softmax):', np.round(vanilla_mean_softmax_obs_acc * 100, 2))
print('------------------------------------')
print('Crop Accuracy:', np.round(crop_accuracy * 100, 2))
print('Crop Obs. Accuracy (max logit):', np.round(crop_max_logit_obs_acc * 100, 2))
print('Crop Obs. Accuracy (mean softmax):', np.round(crop_mean_softmax_obs_acc * 100, 2))

Vanilla Accuracy: 40.74
Vanilla Obs. Accuracy (max logit): 60.0
Vanila Obs. Accuracy (mean softmax): 64.0
------------------------------------
Crop Accuracy: 39.81
Crop Obs. Accuracy (max logit): 52.0
Crop Obs. Accuracy (mean softmax): 60.0
