### Imports and Jupyter setup

In [1]:
%load_ext autoreload
%autoreload 2

import os
import time
import tqdm
import torch
import wandb
import numpy as np
import pandas as pd
import torch.nn as nn

from torch.utils.data import DataLoader

from sklearn.metrics import f1_score, accuracy_score, top_k_accuracy_score

os.environ["CUDA_VISIBLE_DEVICES"]="1"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pd.set_option('display.max_columns', None)
device

device(type='cuda')

### Custom Imports

In [2]:
from fgvc.utils.datasets import TrainDataset
from fgvc.utils.augmentations import light_transforms
from fgvc.utils.utils import timer, init_logger, seed_everything, getModel

In [3]:
!nvidia-smi

Thu Apr 21 17:13:29 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01   Driver Version: 470.103.01   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:04:00.0 Off |                  N/A |
| 26%   48C    P2    55W / 215W |   6520MiB /  7982MiB |     96%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:09:00.0 Off |                  N/A |
| 71%   52C    P8    16W / 340W |   6686MiB / 10014MiB |      0%      Default |
|       

### Load Dataset Metadata

In [6]:
train_metadata = pd.read_csv("../../metadata/PlantCLEF2018_train_metadata.csv")
print(len(train_metadata))

val_metadata = pd.read_csv("../../metadata/PlantCLEF2018_val_metadata.csv")
print(len(val_metadata))

web_metadata = pd.read_csv("../../metadata/PlantCLEF2018_web_metadata.csv")
print(len(web_metadata))

286841
33703


  val_metadata = pd.read_csv("../../metadata/PlantCLEF2018_val_metadata.csv")


281284


In [7]:
web_metadata

Unnamed: 0.1,Unnamed: 0,Species,Origin,OriginalUrl,Genus,Family,ObservationId,MediaId,YearInCLEF,LearnTag,ClassId,BackUpLink,image_path,class_id,family_id,genus_id
0,61483,Amaryllis belladonna L.,web,http://media.eol.org/content/2013/10/01/07/423...,Amaryllis,Liliaceae,453419,527776,PlantCLEF2017,Train,10752,http://otmedia.lirmm.fr/LifeCLEF/b/w/10752/527...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,530,175,128
1,771282,Fraxinus latifolia Benth.,web,http://science.halleyhosting.com/nature/leaves...,Fraxinus,Oleaceae,1282346,1356703,PlantCLEF2017,Train,212716,http://otmedia.lirmm.fr/LifeCLEF/b/w/212716/13...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,3943,226,1176
2,43559,Ballota nigra L.,web,http://media.eol.org/content/2012/05/23/06/018...,Ballota,Lamiaceae,596132,670489,PlantCLEF2017,Train,148503,http://otmedia.lirmm.fr/LifeCLEF/b/w/148503/67...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,1099,166,326
3,195716,Hippocrepis comosa L.,web,http://media.eol.org/content/2012/12/04/18/533...,Hippocrepis,Fabaceae,1437490,1511847,PlantCLEF2017,Train,172970,http://otmedia.lirmm.fr/LifeCLEF/b/w/172970/15...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,4606,121,1398
4,408078,Torilis nodosa (L.) Gaertn.,web,http://actaplantarum.org/floraitaliae/download...,Torilis,Apiaceae,2589621,2663978,PlantCLEF2017,Train,19031,http://otmedia.lirmm.fr/LifeCLEF/b/w/19031/266...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,9319,21,2792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281279,115901,Achillea filipendulina Lam.,web,http://media.eol.org/content/2016/07/29/12/089...,Achillea,Asteraceae,352224,426581,PlantCLEF2017,Train,69802,http://otmedia.lirmm.fr/LifeCLEF/b/w/69802/426...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,151,34,17
281280,659946,Achillea filipendulina Lam.,web,http://4.bp.blogspot.com/-OeUAHR_RINo/UKdIIpYp...,Achillea,Asteraceae,352260,426617,PlantCLEF2017,Train,69802,http://otmedia.lirmm.fr/LifeCLEF/b/w/69802/426...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,151,34,17
281281,659969,Achillea filipendulina Lam.,web,http://www.woodcotenursery.co.uk/perennial/ach...,Achillea,Asteraceae,352230,426587,PlantCLEF2017,Train,69802,http://otmedia.lirmm.fr/LifeCLEF/b/w/69802/426...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,151,34,17
281282,660048,Achillea filipendulina Lam.,web,http://www.performancenursery.gardenideaswest....,Achillea,Asteraceae,352151,426508,PlantCLEF2017,Train,69802,http://otmedia.lirmm.fr/LifeCLEF/b/w/69802/426...,/local/nahouby/Datasets/PlantCLEF/PlantCLEF201...,151,34,17


In [8]:
train_metadata['image_path'] = train_metadata['image_path'].apply(lambda x: x.replace('../../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))
train_metadata['image_path'] = train_metadata['image_path'].apply(lambda x: x.replace('../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))

val_metadata['image_path'] = val_metadata['image_path'].apply(lambda x: x.replace('../../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))
val_metadata['image_path'] = val_metadata['image_path'].apply(lambda x: x.replace('../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))

web_metadata['image_path'] = web_metadata['image_path'].apply(lambda x: x.replace('../../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))
web_metadata['image_path'] = web_metadata['image_path'].apply(lambda x: x.replace('../../nahouby/Datasets/PlantCLEF/', '/Data-10T/PlantCLEF/'))

In [9]:
from PIL import Image

Image.open(web_metadata['image_path'][0])

FileNotFoundError: [Errno 2] No such file or directory: '/local/nahouby/Datasets/PlantCLEF/PlantCLEF2017/web/10752/527776.jpg'

### Training Parameters

In [None]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

config = {"augmentations": 'light',
           "optimizer": 'SGD',
           "scheduler": 'cyclic_cosine',
           "image_size": (224, 224),
           "random_seed": 777,
           "number_of_classes": len(train_metadata['class_id'].unique()),
           "architecture": 'vit_base_patch32_224',
           "batch_size": 32,
           "accumulation_steps": 4,
           "epochs": 100,
           "learning_rate": 0.01,
           "dataset": 'PlantCLEF2018',
           "scheduler": 'ReduceLROnPlateau',
           "loss": 'CrossEntropyLoss',
           "training_samples": len(train_metadata),
           "valid_samples": len(val_metadata),
           "workers": 12,
           }

RUN_NAME = f"{config['architecture']}-{config['optimizer']}-{config['scheduler']}-{config['augmentations']}"

### Fix Seeds & Log Setup

In [None]:
LOG_FILE = f'{RUN_NAME}.log'
LOGGER = init_logger(LOG_FILE)

seed_everything(config['random_seed'])

### Init Model

In [None]:
# %%
model = getModel(config['architecture'], config['number_of_classes'], pretrained=True)
model_mean = list(model.default_cfg['mean'])
model_std = list(model.default_cfg['std'])

In [None]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

if config['augmentations'] == 'light':
    train_augmentations = light_transforms(data='train', image_size=config['image_size'], mean=model_mean, std=model_std)
    val_augmentations = light_transforms(data='valid', image_size=config['image_size'], mean=model_mean, std=model_std)

train_dataset = TrainDataset(train_metadata, transform=train_augmentations)
valid_dataset = TrainDataset(val_metadata, transform=val_augmentations)

train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=config['workers'])
valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=config['workers'])

### Init WandB

In [None]:
from fgvc.utils.wandb import init_wandb

init_wandb(config, RUN_NAME, entity='picekl', project='frontiers-plant-recognition')

### Set Optimizers!

In [None]:
if config['optimizer'] == 'AdamW':
    optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
elif config['optimizer'] == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9)

if config['scheduler'] =='plateau':
    scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=1, verbose=True, eps=1e-6)
elif config['scheduler'] == 'cyclic_cosine':
    CYCLES = 5
    t_initial = config['epochs'] / CYCLES
    scheduler = CosineLRScheduler(optimizer, t_initial=20, lr_min=0.0001, cycle_decay = 0.9, cycle_limit = 5)

### Training Loop

In [None]:
with timer('Train model', LOGGER):
    
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(config['epochs']):
        
        start_time = time.time()

        model.train()
        avg_loss = 0.
        
        num_steps_per_epoch = len(train_loader)
        num_updates = epoch * num_steps_per_epoch

        optimizer.zero_grad()
        
        train_lbls = np.zeros((len(train_metadata)))
        train_preds = np.zeros((len(train_metadata)))

        for i, (images, labels, _) in tqdm.tqdm(enumerate(train_loader)):

            images = images.to(device)
            labels = labels.to(device)

            y_preds = model(images)
            loss = criterion(y_preds, labels)
            
            # Scale the loss to the mean of the accumulated batch size
            avg_loss += loss.item() / len(train_loader) 
            loss = loss / config['accumulation_steps']
            loss.backward()
            if (i - 1) % config['accumulation_steps'] == 0:
                optimizer.step()
                optimizer.zero_grad()
                
            if config['scheduler'] == 'cyclic_cosine':
                num_updates += 1
                scheduler.step_update(num_updates=num_updates)
                
                
            train_preds[i * len(labels): (i+1) * len(labels)] = y_preds.argmax(1).to('cpu').numpy()
            train_lbls[i * len(labels): (i+1) * len(labels)] = labels.to('cpu').numpy()
            
        model.eval()
        avg_val_loss = 0.
        preds = np.zeros((len(valid_dataset)))
        preds_raw = []

        for i, (images, labels, _) in enumerate(valid_loader):
            
            images = images.to(device)
            labels = labels.to(device)
            
            with torch.no_grad():
                y_preds = model(images)
            
            preds[i * len(images): (i+1) * len(images)] = y_preds.argmax(1).to('cpu').numpy()
            preds_raw.extend(y_preds.to('cpu').numpy())

            loss = criterion(y_preds, labels)
        
            avg_val_loss += loss.item() / len(valid_loader)
        
        
        if config['scheduler'] == 'plateau':
            scheduler.step(avg_val_loss)
        elif config['scheduler'] == 'cyclic_cosine':
            scheduler.step(epoch + 1)
        
        train_accuracy = accuracy_score(train_lbls, train_preds)
        train_f1 = f1_score(train_lbls, train_preds, average='macro')
        
        accuracy = accuracy_score(val_metadata['class_id'], preds)
        f1 = f1_score(val_metadata['class_id'], preds, average='macro')
        recall_3 = top_k_accuracy_score(val_metadata['class_id'], preds_raw, k=3)

        elapsed = time.time() - start_time
        
        LOGGER.debug(f'  Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} F1: {f1*100:.2f}  Acc: {accuracy*100:.2f} Recall@3: {recall_3*100:.2f} time: {elapsed:.0f}s')
       
        wandb.log({'Train_loss (avr.)': avg_loss,
                   'Val. loss (avr.)': avg_val_loss,
                   'Val. F1': np.round(f1*100, 2),
                   'Val. Accuracy': np.round(accuracy*100, 2),
                   'Val. Recall@3': np.round(recall_3*100, 2),
                   'Learning Rate': optimizer.param_groups[0]["lr"],
                   'Train. Accuracy': np.round(train_accuracy*100, 2),
                   'Train. F1': np.round(train_f1*100, 2),})

        if accuracy>best_score:
            best_score = accuracy
            LOGGER.debug(f'  Epoch {epoch+1} - Save Best Accuracy: {best_score:.6f} Model')
            torch.save(model.state_dict(), f'{RUN_NAME}_best_accuracy.pth')

        if avg_val_loss<best_loss:
            best_loss = avg_val_loss
            LOGGER.debug(f'  Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save(model.state_dict(), f'{RUN_NAME}_best_loss.pth')

In [None]:
torch.save(model.state_dict(), f'{RUN_NAME}-100E.pth')