In [1]:
import gc
import os
import os.path as osp
import cv2
import sys
import json
import tqdm
import time
import timm
import torch
import random
import sklearn.metrics

from PIL import Image
from pathlib import Path
from functools import partial
from contextlib import contextmanager

import numpy as np
import scipy as sp
import pandas as pd
import torch.nn as nn

from torch.optim import Adam, SGD
from scipy.special import softmax
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset
from albumentations import Compose, Normalize, Resize
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import f1_score, accuracy_score, top_k_accuracy_score

os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def seed_torch(seed=777):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 777
seed_torch(SEED)


In [4]:
import timm
import torch
import torchvision.transforms as T
from PIL import Image
from urllib.request import urlopen
model = timm.create_model("hf-hub:BVRA/tf_efficientnet_b3.in1k_ft_df20m_224", pretrained=True)
model = model.eval()
train_transforms = T.Compose([T.Resize(224), 
                              T.ToTensor(), 
                              T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) 

# test_image_path = "../data/DF20M/2237852193-58.JPG"
# img = Image.open(test_image_path)
# output = model(train_transforms(img).unsqueeze(0))  

In [5]:
# softmax(output.detach().numpy()).max()

## Using metadata

In [6]:
train_df = pd.read_csv("../metadata/DanishFungi2023-train_mini.csv")
test_df = pd.read_csv("../metadata/DanishFungi2023-val_mini.csv")
# test_df

In [7]:

IMAGE_DIR = r"C:\Programming\Python Projects\Vision\FungiCLEF\data\DF20M"
train_df["image_path"] = train_df.image_path.apply(
    lambda path: os.path.join(IMAGE_DIR, os.path.basename(path)))

test_df["image_path"] = test_df.image_path.apply(
    lambda path: os.path.join(IMAGE_DIR, os.path.basename(path)))

In [8]:
from sklearn import preprocessing

label_encoders = {}
columns_to_be_encoded = ["Habitat", "Substrate", "MetaSubstrate"]

for column_name in columns_to_be_encoded:
    le = preprocessing.LabelEncoder()
    label_encoders = {column_name: le}
    
    train_df[column_name] = le.fit_transform(train_df[column_name]).astype(np.int64)
    test_df[column_name] = le.fit_transform(test_df[column_name]).astype(np.int64)

In [9]:
metadata = pd.concat([train_df, test_df])
len(metadata)

36393

# Calculating prios

In [10]:
cls_counts = metadata.groupby("class_id").size()
class_distribution = cls_counts / len(metadata)
sum(class_distribution)

1.0

## Calculate Distributions of Selected Features

In [11]:
SELECTED_FEATURES = ["month", "Habitat", "Substrate", "MetaSubstrate"]
metadata_distributions = {}
for feature in SELECTED_FEATURES:
    distribution = metadata.groupby([feature, "class_id"]).size() / metadata.groupby(feature).size()
    metadata_distributions[feature] = distribution

# Predictions

## Preparing Model

In [12]:
N_CLASSES = len(metadata['class_id'].unique())
IMAGE_SIZE = [224, 224]

MODEL_NAME = "BVRA/tf_efficientnet_b3.in1k_ft_df20m_224"
USE_CALIBRATION = True
USE_OBSERVATION_PREDS = True

model = timm.create_model(f"hf-hub:{MODEL_NAME}", pretrained=True)
model = model.eval()
model_mean = list(model.default_cfg['mean'])
model_std = list(model.default_cfg['std'])
print(model_mean, model_std)
model_mean = [0.5, 0.5, 0.5]
model_std = [0.5, 0.5, 0.5]
print(model_mean, model_std)

model.to(device)
model.eval()
print("Done.")

[0.485, 0.456, 0.406] [0.229, 0.224, 0.225]
[0.5, 0.5, 0.5] [0.5, 0.5, 0.5]
Done.


In [13]:
from fgvc.special.calibration import ModelWithTemperature, get_temperature

if USE_CALIBRATION:
    model = ModelWithTemperature(model)


## Prepare Dataloader

In [14]:
from dataset_cls import TestDataset, get_transforms
test_dataset = TestDataset(test_df, [*SELECTED_FEATURES, "observationID"], transform=get_transforms(model_mean, model_std, IMAGE_SIZE))

In [15]:
batch_size = 64

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8)

## Vanilla Predictions without Pre-Processing

In [16]:
avg_val_loss = 0.
preds = np.zeros((len(test_df)))
GT_lbls = []
image_paths = []
preds_raw = []
criterion = nn.CrossEntropyLoss()

seen_features = {feature: [] for feature in [*SELECTED_FEATURES, "observationID"]}

for i, (images, labels, paths, selected_features) in enumerate(tqdm.tqdm(test_loader, total=len(test_loader))):

    images = images.to(device)
    labels = labels.to(device)
    
    
    with torch.no_grad():
        y_preds = model(images)
        
    preds[i * batch_size: (i+1) * batch_size] = y_preds.argmax(1).to('cpu').numpy()
    GT_lbls.extend(labels.to('cpu').numpy())
    preds_raw.extend(y_preds.to('cpu').numpy())
    image_paths.extend(paths)
    
    for feature in [*SELECTED_FEATURES, "observationID"]:
        seen_features[feature].extend(selected_features[feature])

vanilla_f1 = f1_score(test_df['class_id'], preds, average='macro')
vanilla_accuracy = accuracy_score(test_df['class_id'], preds)
vanilla_recall_3 = top_k_accuracy_score(test_df['class_id'], preds_raw, k=3)
vanilla_recall_5 = top_k_accuracy_score(test_df['class_id'], preds_raw, k=5)
vanilla_recall_10 = top_k_accuracy_score(test_df['class_id'], preds_raw, k=10)

print('Vanilla:', vanilla_f1, vanilla_accuracy, vanilla_recall_3, vanilla_recall_5, vanilla_recall_10)

  0%|                                                                                                                                                           | 0/58 [01:11<?, ?it/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [None]:
seen_observation_ids = np.array(seen_features["observationID"])
unique_observation_ids = np.unique(seen_observation_ids)

preds_raw_np = np.array(preds_raw)

obs_preds_raw = np.zeros((len(test_df), N_CLASSES))
obs_preds = np.zeros((len(test_df)))

for unique_observation_id in unique_observation_ids:
    same_observation_indexes = np.where(seen_observation_ids == unique_observation_id)
    
    observation_predictions = preds_raw_np[same_observation_indexes]
    # print(np.average(observation_predictions, axis=0))
    _obs_preds = np.average(observation_predictions, axis=0)
    
    obs_preds_raw[same_observation_indexes] = _obs_preds
    
    
    obs_preds[same_observation_indexes] = _obs_preds.argmax()
    
obs_f1 = f1_score(test_df['class_id'], obs_preds, average='macro')
obs_accuracy = accuracy_score(test_df['class_id'], obs_preds)
obs_recall_3 = top_k_accuracy_score(test_df['class_id'], obs_preds_raw, k=3)

print('ObservationID:', obs_f1, obs_accuracy, obs_recall_3)

if USE_OBSERVATION_PREDS:
    vanilla_f1 = obs_f1
    vanilla_accuracy = obs_accuracy
    vanilla_recall_3 = obs_recall_3
    preds_raw = obs_preds_raw
    



In [None]:
temperature = get_temperature(
    logits=np.array(preds_raw),
    targs=np.array(GT_lbls)
)



## Weighting by each Selected Feature

In [None]:
EPSILON_DIVISION_NUMERIC_CHECK = 1e-12

def post_process_by_feature(metadata_distribution, class_distribution, ground_truth_labels, raw_predictions, seen_feature_values):
    wrong_predictions = []
    weighted_predictions = []
    weighted_predictions_raw = []
    feature_prior_ratio = []

    for lbl, raw_prediction, seen_feature in tqdm.tqdm(zip(ground_truth_labels, raw_predictions, seen_feature_values), total=len(ground_truth_labels)):
        preds = softmax(raw_prediction)
        # Add empty distributions
        local_feature_distribution = np.ones(len(preds))
        precomputed_seen_feature_distribution = metadata_distribution[int(seen_feature)]
        local_feature_distribution[precomputed_seen_feature_distribution.index] = precomputed_seen_feature_distribution

        p_feature = (preds * local_feature_distribution) / (sum(preds * local_feature_distribution))


        prior_ratio = p_feature / class_distribution        
        max_index = np.argmax(prior_ratio * preds)     


        feature_prior_ratio.append(prior_ratio)
        weighted_predictions_raw.append(prior_ratio * preds)
        weighted_predictions.append(max_index)

        if lbl != max_index:
            wrong_predictions.append([lbl, seen_feature])
            
    return feature_prior_ratio, weighted_predictions, weighted_predictions_raw


In [None]:

def post_process_selected_features(metadata_distributions, class_distribution, raw_predictions, ground_truth_labels):
    feature_prior_ratios = {}
    metrics_by_features = {}
    for feature in SELECTED_FEATURES:
        metadata_distribution = metadata_distributions[feature]
        seen_feature_values = seen_features[feature]

        feature_prior_ratio, weighted_predictions, weighted_predictions_raw = post_process_by_feature(
            metadata_distribution=metadata_distribution,
            class_distribution=class_distribution,
            ground_truth_labels=ground_truth_labels,
            raw_predictions=raw_predictions,
            seen_feature_values=seen_feature_values
        )
        feature_prior_ratios[feature] = feature_prior_ratio

        f1 = f1_score(test_df['class_id'], weighted_predictions, average='macro')
        accuracy = accuracy_score(test_df['class_id'], weighted_predictions)
        recall_3 = top_k_accuracy_score(test_df['class_id'], weighted_predictions_raw, k=3)
        metrics_by_features[feature] = {
            "f1": f1,
            "accuracy": accuracy,
            "recall_3": recall_3
        }
        print(f'{feature}:', f1, accuracy, recall_3)
        print(f'{feature} dif:', np.around(f1-vanilla_f1, 3), np.around((accuracy-vanilla_accuracy) * 100, 2), np.around((recall_3-vanilla_recall_3)*100))
    
    return feature_prior_ratios, metrics_by_features
        
feature_prior_ratios, metrics_by_features = post_process_selected_features(
    metadata_distributions=metadata_distributions,
    class_distribution=class_distribution,
    raw_predictions=preds_raw,
    ground_truth_labels=GT_lbls
)

## Weighting by Combinations of Selected Features

In [None]:
def post_process_combine_priors(raw_predictions, selected_feature_prior_ratios: list):
    merged_predictions = []
    merged_predictions_raw = []
    
    for index, raw_prediction in enumerate(raw_predictions):
        prediction = softmax(raw_prediction)
        
        
        weighted_prediction = prediction
        for feature_prior_ratio in selected_feature_prior_ratios:
            weighted_prediction *= feature_prior_ratio[index]
        
        merged_prediction = weighted_prediction / (sum(weighted_prediction))
        max_index = np.argmax(merged_prediction)
        
        merged_predictions_raw.append(merged_prediction)
        merged_predictions.append(max_index)
        
    return merged_predictions, merged_predictions_raw
        

In [None]:
from itertools import combinations


def post_process_prior_combinations(raw_predictions, feature_prior_ratios):
    metrics_by_combination = {}
    all_combinations_selected_features = []
    for r in range(2, len(SELECTED_FEATURES) + 1):
        all_combinations_selected_features.extend(combinations(SELECTED_FEATURES, r))
    
    for combination in all_combinations_selected_features:

        selected_feature_prior_ratios = [feature_prior_ratios[feature] for feature in combination]

        merged_predictions, merged_predictions_raw = post_process_combine_priors(
            raw_predictions=preds_raw,
            selected_feature_prior_ratios=selected_feature_prior_ratios
        )

        f1 = f1_score(test_df['class_id'], merged_predictions, average='macro')
        accuracy = accuracy_score(test_df['class_id'], merged_predictions)
        recall_3 = top_k_accuracy_score(test_df['class_id'], merged_predictions_raw, k=3)
        
        combination_name = " + ".join(combination)
        
        metrics_by_combination[combination_name] = {
            "f1": f1,
            "accuracy": accuracy,
            "recall_3": recall_3
        }
        print(combination_name)
        print("F1, Acc, Recall3: ", f1, accuracy, recall_3)
        print("Diff: ", np.around(f1-vanilla_f1, 3), np.around((accuracy-vanilla_accuracy) * 100, 2), np.around((recall_3-vanilla_recall_3)*100, 2))
    
    return metrics_by_combination
        
metrics_by_combination = post_process_prior_combinations(
    raw_predictions=preds_raw,
    feature_prior_ratios=feature_prior_ratios
)

In [None]:

results = {"Vanilla": {'f1': vanilla_f1, 'accuracy': vanilla_accuracy, 'recall_3': vanilla_recall_3}}
results.update(metrics_by_features)
results.update(metrics_by_combination)

results_df = pd.DataFrame(results).transpose()
results_df

In [None]:
output_path = f"../output/PP-{MODEL_NAME}.txt"

output_msg = f"{MODEL_NAME}, Calibration: {USE_CALIBRATION}, Observation: {USE_OBSERVATION_PREDS}\n{results_df.to_markdown()}\n\n"

with open(output_path, "a") as fp:
    fp.write(output_msg)