In [None]:
import os
import tqdm
import timm
import torch
import random

import numpy as np
import pandas as pd
import torch.nn as nn

from torch.utils.data import DataLoader
from sklearn.metrics import f1_score, accuracy_score, top_k_accuracy_score
from fgvc.utils.utils import set_random_seed
from fgvc.utils.utils import set_cuda_device

device = set_cuda_device('0')
print(f'Device: {device}')

SEED = 777
set_random_seed(SEED)

In [None]:
%load_ext autoreload
%autoreload 2

## Using metadata

In [None]:
train_df = pd.read_csv("../metadata/DanishFungi2023-train_mini.csv")
test_df = pd.read_csv("../metadata/DanishFungi2023-val_mini.csv")

TARGET_FEATURE = "class_id"

In [None]:

IMAGE_DIR = "/home/marek/Projects/PythonLand/Vision/Baseline/data/DF20M"
train_df["image_path"] = train_df.image_path.apply(
    lambda path: os.path.join(IMAGE_DIR, os.path.basename(path)))

test_df["image_path"] = test_df.image_path.apply(
    lambda path: os.path.join(IMAGE_DIR, os.path.basename(path)))

In [None]:
from sklearn import preprocessing

label_encoders = {}
columns_to_be_encoded = ["Habitat", "Substrate"]

for column_name in columns_to_be_encoded:
    le = preprocessing.LabelEncoder()
    label_encoders = {column_name: le}
    
    train_df[column_name] = le.fit_transform(train_df[column_name]).astype(np.int64)
    test_df[column_name] = le.fit_transform(test_df[column_name]).astype(np.int64)

In [None]:
metadata = pd.concat([train_df, test_df])
len(metadata)

# Calculating prios

In [None]:
cls_counts = metadata.groupby(TARGET_FEATURE).size()
class_distribution = cls_counts / len(metadata)
sum(class_distribution)

## Calculate Distributions of Selected Features

In [None]:
from postprocessing import get_target_to_feature_conditional_distributions
SELECTED_FEATURES = ["Habitat", "month", "Substrate"]

# test_df = test_df[~test_df[SELECTED_FEATURES].isna().any(axis=1)]

metadata_distributions = {}
for feature in SELECTED_FEATURES:
    metadata_distributions[feature] = get_target_to_feature_conditional_distributions(
        metadata,
        feature,
        TARGET_FEATURE,
        add_to_missing=False
    )

# Predictions

## Preparing Model

In [None]:
N_CLASSES = len(metadata[TARGET_FEATURE].unique())
IMAGE_SIZE = [224, 224]

MODEL_NAME = "BVRA/vit_base_patch16_224.ft_df20m_224"
USE_CALIBRATION = True
USE_OBSERVATION_PREDS = True

model = timm.create_model(f"hf-hub:{MODEL_NAME}", pretrained=True)
model = model.eval()

# model_mean = list(model.default_cfg['mean'])
# model_std = list(model.default_cfg['std'])
# print(model_mean, model_std)
model_mean = [0.5, 0.5, 0.5]
model_std = [0.5, 0.5, 0.5]
print(model_mean, model_std)

model.to(device)
model.eval()
print(f"Done. {device}")

In [None]:
from fgvc.special.calibration import ModelWithTemperature, get_temperature

if USE_CALIBRATION:
    model = ModelWithTemperature(model)
    model.to(device)


## Prepare Dataloader

In [None]:
from dataset_cls import ExtraFeaturesDataset, get_transforms
test_dataset = ExtraFeaturesDataset(
    test_df,
    image_path_feature='image_path',
    target_feature=TARGET_FEATURE,
    extra_features=[*SELECTED_FEATURES, "observationID"], 
    transform=get_transforms(model_mean, model_std, IMAGE_SIZE)
)

In [None]:
batch_size = 64

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8)

## Vanilla Predictions without Pre-Processing

In [None]:
from postprocessing import predict_with_features


preds, preds_raw, GT_lbls, seen_features = predict_with_features(model, test_loader, device)

vanilla_f1 = f1_score(test_df[TARGET_FEATURE], preds, average='macro')
vanilla_accuracy = accuracy_score(test_df[TARGET_FEATURE], preds)
vanilla_recall_3 = top_k_accuracy_score(test_df[TARGET_FEATURE], preds_raw, k=3)
vanilla_recall_5 = top_k_accuracy_score(test_df[TARGET_FEATURE], preds_raw, k=5)
vanilla_recall_10 = top_k_accuracy_score(test_df[TARGET_FEATURE], preds_raw, k=10)

print('Vanilla:', vanilla_f1, vanilla_accuracy, vanilla_recall_3, vanilla_recall_5, vanilla_recall_10)

In [None]:
seen_observation_ids = np.array(seen_features["observationID"])
unique_observation_ids = np.unique(seen_observation_ids)

preds_raw_np = np.array(preds_raw)

obs_preds_raw = np.zeros((len(test_df), N_CLASSES))
obs_preds = np.zeros((len(test_df)))

for unique_observation_id in unique_observation_ids:
    same_observation_indexes = np.where(seen_observation_ids == unique_observation_id)
    
    observation_predictions = preds_raw_np[same_observation_indexes]
    
    _obs_preds = np.average(observation_predictions, axis=0)
    
    obs_preds_raw[same_observation_indexes] = _obs_preds
    
    obs_preds[same_observation_indexes] = _obs_preds.argmax()
    
obs_f1 = f1_score(test_df[TARGET_FEATURE], obs_preds, average='macro')
obs_accuracy = accuracy_score(test_df[TARGET_FEATURE], obs_preds)
obs_recall_3 = top_k_accuracy_score(test_df[TARGET_FEATURE], obs_preds_raw, k=3)

print('ObservationID:', obs_f1, obs_accuracy, obs_recall_3)

if USE_OBSERVATION_PREDS:
    vanilla_f1 = obs_f1
    vanilla_accuracy = obs_accuracy
    vanilla_recall_3 = obs_recall_3
    preds_raw = obs_preds_raw


In [None]:
temperature = get_temperature(
    logits=np.array(preds_raw),
    targs=np.array(GT_lbls)
)


## Weighting by each Selected Feature

In [None]:
from postprocessing import weight_predictions_by_feature_distribution

def post_process_selected_features(metadata_distributions, class_distribution, raw_predictions, ground_truth_labels):
    feature_prior_ratios = {}
    metrics_by_features = {}
    for feature in SELECTED_FEATURES:
        metadata_distribution = metadata_distributions[feature]
        seen_feature_values = seen_features[feature]

        weighted_predictions, weighted_predictions_raw, feature_prior_ratio = weight_predictions_by_feature_distribution(
            target_to_feature_conditional_distributions=metadata_distribution,
            target_distribution=class_distribution,
            ground_truth_labels=ground_truth_labels,
            raw_predictions=raw_predictions,
            ground_truth_feature_categories=seen_feature_values
        )
        feature_prior_ratios[feature] = feature_prior_ratio

        f1 = f1_score(test_df[TARGET_FEATURE], weighted_predictions, average='macro')
        accuracy = accuracy_score(test_df[TARGET_FEATURE], weighted_predictions)
        recall_3 = top_k_accuracy_score(test_df[TARGET_FEATURE], weighted_predictions_raw, k=3)
        metrics_by_features[feature] = {
            "f1": f1,
            "accuracy": accuracy,
            "recall_3": recall_3
        }
        print(f'{feature}:', f1, accuracy, recall_3)
        print(f'{feature} dif:', np.around(f1-vanilla_f1, 3), np.around((accuracy-vanilla_accuracy) * 100, 2), np.around((recall_3-vanilla_recall_3)*100))
    
    return feature_prior_ratios, metrics_by_features
        
feature_prior_ratios, metrics_by_features = post_process_selected_features(
    metadata_distributions=metadata_distributions,
    class_distribution=class_distribution,
    raw_predictions=preds_raw,
    ground_truth_labels=GT_lbls
)

## Weighting by Combinations of Selected Features

In [None]:
from itertools import combinations
from postprocessing import weight_predictions_combined_feature_priors


def post_process_prior_combinations(raw_predictions, feature_prior_ratios):
    metrics_by_combination = {}
    all_combinations_selected_features = []
    for num_features in range(2, len(SELECTED_FEATURES) + 1):
        all_combinations_selected_features.extend(combinations(SELECTED_FEATURES, num_features))
    
    for combination in all_combinations_selected_features:

        selected_feature_prior_ratios = [feature_prior_ratios[feature] for feature in combination]

        merged_predictions, merged_predictions_raw = weight_predictions_combined_feature_priors(
            raw_predictions=raw_predictions,
            feature_prior_ratios=selected_feature_prior_ratios
        )

        f1 = f1_score(test_df[TARGET_FEATURE], merged_predictions, average='macro')
        accuracy = accuracy_score(test_df[TARGET_FEATURE], merged_predictions)
        recall_3 = top_k_accuracy_score(test_df[TARGET_FEATURE], merged_predictions_raw, k=3)
        
        combination_name = " + ".join(combination)
        
        metrics_by_combination[combination_name] = {
            "f1": f1,
            "accuracy": accuracy,
            "recall_3": recall_3
        }
        print(combination_name)
        print("F1, Acc, Recall3: ", f1, accuracy, recall_3)
        print("Diff: ", np.around(f1-vanilla_f1, 3), np.around((accuracy-vanilla_accuracy) * 100, 2), np.around((recall_3-vanilla_recall_3)*100, 2))
    
    return metrics_by_combination
        
metrics_by_combination = post_process_prior_combinations(
    raw_predictions=preds_raw,
    feature_prior_ratios=feature_prior_ratios
)

In [None]:
results = {
    "Vanilla":       {'f1': vanilla_f1, 'accuracy': vanilla_accuracy, 'recall_3': vanilla_recall_3},
    "ObservationID": {'f1': obs_f1, 'accuracy': obs_accuracy, 'recall_3': obs_recall_3}
}
results.update(metrics_by_features)
results.update(metrics_by_combination)

results_df = pd.DataFrame(results).transpose()
results_df = results_df[['accuracy', 'recall_3', 'f1']]
results_df.head(50)

In [None]:
def subtract_base_performance(base: dict, post_processed: dict) -> dict:
    difference = {}
    for key, val in base.items():
        difference[key] = post_processed[key] - val
    return difference

for key, val in results.items():
    if key == "Vanilla":
        continue
    results[key] = subtract_base_performance(results["Vanilla"], val)

results_df = pd.DataFrame(results).transpose()
results_df *= 100

results_df = results_df.round(decimals=2)
results_df = results_df[['accuracy', 'recall_3', 'f1']]
results_df.head(50)
result_message = f"{results_df.to_markdown()}\n"
print(result_message)


In [None]:
from postprocessing import post_processing_pipeline


post_processing_pipeline(
    metadata,
    model,
    test_loader,
    device,
    TARGET_FEATURE,
    SELECTED_FEATURES
)