In [1]:
import json

import pandas as pd
import numpy as np
import os

from hydra import compose, initialize
from omegaconf import OmegaConf
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from tqdm import tqdm
from PIL import Image
import torch
from pathlib import Path
from PIL import ImageFile

from closedset_model import build_model
from competition_metrics import evaluate
from datasets import get_valid_transform
from paths import METADATA_DIR, VAL_DATA_DIR
from utils import copy_config, get_device

np.set_printoptions(precision=5)
ImageFile.LOAD_TRUNCATED_IMAGES = True

class PytorchWorker:
    """Run inference using PyTorch."""

    def __init__(self, model_path: str, number_of_categories: int = 1784, model_id="efficientnet_b0", device="cpu", transforms=None):

        ########################################
        # must be set before calling _load_model
        self.number_of_categories = number_of_categories
        self.model_id = model_id
        self.device = device
        ########################################

        self.transforms = transforms
        # most other attributes must be set before calling _load_model, so call last
        self.model = self._load_model(model_path)

    def _load_model(self, model_path):
        print("Setting up Pytorch Model")
        # model = models.efficientnet_b0()
        # model.classifier[1] = nn.Linear(in_features=1280, out_features=self.number_of_categories)
        model = build_model(
            model_id=self.model_id,
            pretrained=False,
            fine_tune=False,
            num_classes=self.number_of_categories,
            # this is all that matters. everything else will be overwritten by checkpoint state
            dropout_rate=0.5,
        ).to(self.device)
        model_ckpt = torch.load(model_path, map_location=self.device)
        model.load_state_dict(model_ckpt['model_state_dict'])

        return model.to(self.device).eval()

    def predict_image(self, image: np.ndarray) -> list():
        """Run inference using ONNX runtime.

        :param image: Input image as numpy array.
        :return: A list with logits and confidences.
        """

        img = self.transforms(image)
        
        if isinstance(img, tuple):
            img = torch.cat([instance.unsqueeze(0) for instance in img])
            img = torch.unique(img, dim=0)

        if img.dim() < 4:
            img = img.unsqueeze(0)
        
        img = img.to(self.device)
        
        logits = self.model(img)

        return logits


def get_probas(test_metadata, model_id, model_path, images_root_path, device, transforms):
    """Make submission file"""

    model = PytorchWorker(model_path, model_id=model_id, device=device, transforms=transforms)

    probas_total = []
    image_paths = test_metadata["image_path"]

    with torch.no_grad():
        for image_path in tqdm(image_paths):
            image_path = os.path.join(images_root_path, image_path)
            test_image = Image.open(image_path).convert("RGB")
            logits = model.predict_image(test_image)
            probas = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()
            if probas.shape[0] > 1:
                probas = np.mean(probas, axis=0)
            probas = probas.squeeze()
            probas_total.append(probas)
    
    return probas_total


def evaluate_experiment(cfgs, trial_name=None, multi_instance=False, device="cpu", multicrop=False, debug=False):

    submission_file_path = "test-time-augmengations-submission.csv"
    if trial_name is not None:
        submission_file_path = trial_name + submission_file_path

    metadata_file_path = METADATA_DIR / "SnakeCLEF2023-ValMetadata.csv"
    test_metadata = pd.read_csv(metadata_file_path)
    if debug:
        test_metadata = test_metadata.head(20)
    if not multi_instance:
        test_metadata.drop_duplicates("observation_id", keep="first", inplace=True)
    
    probas_per_model = []
    for cfg in cfgs:
        experiment_id = cfg["experiment_id"]
        if debug: print(f"getting probas for experiment {experiment_id}")
        model_id = cfg["model_id"]
        image_size = cfg["image_size"]
        transforms = get_valid_transform(image_size=image_size, pretrained=True, fivecrop=multicrop)
        experiment_dir = Path("model_checkpoints") / experiment_id
        predictions_output_csv_path = str(experiment_dir / "submission.csv")
        model_file = "model.pth"
        model_path = str(experiment_dir / model_file)
        probas = get_probas(
            model_id=model_id,
            test_metadata=test_metadata,
            model_path=model_path,
            images_root_path=VAL_DATA_DIR,
            device=device,
            transforms=transforms,
        )
        probas_per_model.append(probas)
    probas_per_model = np.array(probas_per_model)
    if debug: print("probas_per_model.shape", probas_per_model.shape)
    if len(cfgs) > 1:
        averaged_probas = np.mean(probas_per_model, axis=0)
    else:
        averaged_probas = probas_per_model.squeeze()
    if debug: print("averaged_probas.shape", averaged_probas.shape)
    # if debug: print("np.argmax(averaged_probas)", np.argmax(averaged_probas))

    if multi_instance:
        preds = []
        # pandas unique preserves order
        for obs_id in test_metadata["observation_id"].unique():
            indices = list(test_metadata["observation_id"].loc[lambda x: x==obs_id].index)
            if len(indices) > 1:
                if debug: print("indices", indices)
                observation_probas = averaged_probas[indices, :]
                observation_average = np.mean(averaged_probas[indices], axis=0)
                if debug: print("observation_average.shape", observation_average.shape)
                preds.extend([np.argmax(observation_average)] * len(indices))
            else:
                preds.append(np.argmax(averaged_probas[indices], axis=1)[0])
    else:
        preds = np.argmax(averaged_probas, axis=1)

    if debug:
        print("preds", preds)
        if isinstance(preds, list):
            preds = np.array(preds)
        print("preds.shape", preds.shape)
    
    submission_df = test_metadata.copy()
    submission_df["class_id"] = preds
    submission_df = submission_df[["observation_id", "class_id"]]
    submission_df.drop_duplicates("observation_id", keep="first", inplace=True)
    submission_df.to_csv(submission_file_path, index=False)

    competition_metrics_scores = evaluate(
        test_annotation_file=metadata_file_path,
        user_submission_file=submission_file_path,
        phase_codename="prediction-based",
    )["submission_result"]

    return competition_metrics_scores

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
    
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-focal-balanced-sampling-paused",
    "image_size": 768,},
]
device = get_device()
scores_ensemble_multi = evaluate_experiment(cfgs=cfgs, trial_name="ensemble_multi-instance",
                                            multi_instance=True, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [08:45<00:00, 26.86it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [09:59<00:00, 23.56it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 57.67, 'Accuracy': 73.14, 'PSC': (25.07, 2.68, 6.2, 16.94), 'PSC_total': (1579, 169, 94, 257), 'Track1 Metric': 87.49, 'Track2 Metric': 2901}
Completed evaluation


In [3]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
    
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-focal-balanced-sampling-paused",
    "image_size": 768,},
]
device = get_device()
scores_ensemble = evaluate_experiment(cfgs=cfgs, trial_name="ensemble",
                                      multi_instance=False, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [04:11<00:00, 31.07it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [04:11<00:00, 31.11it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 55.53, 'Accuracy': 70.89, 'PSC': (27.19, 2.92, 6.86, 18.06), 'PSC_total': (1713, 184, 104, 274), 'Track1 Metric': 86.55, 'Track2 Metric': 3149}
Completed evaluation


In [4]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
]
device = get_device()
scores = evaluate_experiment(cfgs=cfgs, trial_name="baseline",
                             multi_instance=False, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [04:11<00:00, 31.09it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 54.21, 'Accuracy': 70.48, 'PSC': (27.67, 2.86, 6.86, 18.46), 'PSC_total': (1743, 180, 104, 280), 'Track1 Metric': 86.33, 'Track2 Metric': 3183}
Completed evaluation


In [5]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
]
device = get_device()
multi_scores = evaluate_experiment(cfgs=cfgs, trial_name="multi-instance",
                                   multi_instance=True, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [07:34<00:00, 31.08it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 56.0, 'Accuracy': 72.62, 'PSC': (25.67, 2.7, 6.06, 17.21), 'PSC_total': (1617, 170, 92, 261), 'Track1 Metric': 87.29, 'Track2 Metric': 2939}
Completed evaluation


In [6]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
]
device = get_device()
multicrop_scores = evaluate_experiment(cfgs=cfgs, trial_name="multi-instance_multicrop",
                                       multi_instance=True, device=device, multicrop=True, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [27:26<00:00,  8.57it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 55.5, 'Accuracy': 72.34, 'PSC': (26.13, 2.76, 5.87, 16.68), 'PSC_total': (1646, 174, 89, 253), 'Track1 Metric': 87.38, 'Track2 Metric': 2945}
Completed evaluation


In [7]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
]
device = get_device()
multicrop_single_scores = evaluate_experiment(cfgs=cfgs, trial_name="multicrop",
                                              multi_instance=False, multicrop=True, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [15:19<00:00,  8.50it/s]

Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 54.2, 'Accuracy': 70.65, 'PSC': (27.5, 3.02, 6.86, 17.67), 'PSC_total': (1732, 190, 104, 268), 'Track1 Metric': 86.46, 'Track2 Metric': 3168}
Completed evaluation





In [8]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
    
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-focal-balanced-sampling-paused",
    "image_size": 768,},
]
device = get_device()
scores_ensemble_multi_multicrop = evaluate_experiment(cfgs=cfgs, trial_name="ensemble_multi-instance_multicrop",
                                                      multi_instance=True, multicrop=True, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [35:31<00:00,  6.62it/s] 


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [49:41<00:00,  4.73it/s] 


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 58.04, 'Accuracy': 73.16, 'PSC': (25.19, 2.73, 5.74, 16.61), 'PSC_total': (1587, 172, 87, 252), 'Track1 Metric': 87.77, 'Track2 Metric': 2870}
Completed evaluation


In [9]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
    
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-focal-balanced-sampling-paused",
    "image_size": 768,},

    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-weighted_venom_loss",
    "image_size": 768,},
]
device = get_device()
scores_three_ensemble = evaluate_experiment(cfgs=cfgs, trial_name="three_ensemble",
                                            multi_instance=False, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [07:12<00:00, 18.05it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [07:02<00:00, 18.50it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [06:55<00:00, 18.80it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 56.4, 'Accuracy': 71.49, 'PSC': (26.43, 3.03, 6.53, 18.0), 'PSC_total': (1665, 191, 99, 273), 'Track1 Metric': 86.84, 'Track2 Metric': 3088}
Completed evaluation


In [10]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 768,},
    
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-focal-balanced-sampling-paused",
    "image_size": 768,},

    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-weighted_venom_loss",
    "image_size": 768,},
]
device = get_device()
scores_three_ensemble_multi_multicrop = evaluate_experiment(cfgs=cfgs, trial_name="three_ensemble_multi-instance_multicrop",
                                                            multi_instance=True, multicrop=True, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [33:19<00:00,  7.06it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [31:52<00:00,  7.38it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [24:55<00:00,  9.44it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 58.68, 'Accuracy': 73.44, 'PSC': (24.72, 2.81, 5.74, 16.81), 'PSC_total': (1557, 177, 87, 255), 'Track1 Metric': 87.82, 'Track2 Metric': 2856}
Completed evaluation


In [21]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 576,},
    
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-focal-balanced-sampling-paused",
    "image_size": 576,},

    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-weighted_venom_loss",
    "image_size": 576,},
]
device = get_device()
scores_three_ensemble_576 = evaluate_experiment(cfgs=cfgs, trial_name="three_ensemble_576",
                                                            multi_instance=False, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [03:05<00:00, 42.23it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [03:01<00:00, 43.12it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [03:02<00:00, 42.92it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 56.66, 'Accuracy': 71.48, 'PSC': (26.83, 2.94, 5.8, 17.53), 'PSC_total': (1690, 185, 88, 266), 'Track1 Metric': 87.26, 'Track2 Metric': 3032}
Completed evaluation


In [26]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 576,},
]
device = get_device()
scores_multiinstance_576 = evaluate_experiment(cfgs=cfgs, trial_name="multi-instance_576",
                                                            multi_instance=True, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [05:39<00:00, 41.62it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 55.85, 'Accuracy': 72.52, 'PSC': (25.94, 2.94, 5.14, 16.55), 'PSC_total': (1634, 185, 78, 251), 'Track1 Metric': 87.75, 'Track2 Metric': 2896}
Completed evaluation


In [30]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 576,},
]
device = get_device()
scores_576 = evaluate_experiment(cfgs=cfgs, trial_name="576",
                                                            multi_instance=False, multicrop=False, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 7816/7816 [03:01<00:00, 42.95it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 53.52, 'Accuracy': 70.51, 'PSC': (27.73, 3.18, 5.93, 17.67), 'PSC_total': (1747, 200, 90, 268), 'Track1 Metric': 86.77, 'Track2 Metric': 3133}
Completed evaluation


In [33]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 576,},
]
device = get_device()
scores_multiinstance_multicrop_576 = evaluate_experiment(cfgs=cfgs, trial_name="multiinstance_multicrop_576", multi_instance=True,
                                                         multicrop=True, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [11:40<00:00, 20.17it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 56.05, 'Accuracy': 72.76, 'PSC': (25.78, 2.95, 5.34, 15.69), 'PSC_total': (1624, 186, 81, 238), 'Track1 Metric': 87.84, 'Track2 Metric': 2877}
Completed evaluation


In [36]:
cfgs = [
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-05 22:41:41.115323",
    "image_size": 576,},
    
    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-focal-balanced-sampling-paused",
    "image_size": 576,},

    {"model_id": "caformer_s18.sail_in22k_ft_in1k_384",
     "experiment_id": "2024-05-08-caformer_s18-weighted_venom_loss",
    "image_size": 576,},
]
device = get_device()
scores_three_ensemble_multiinstance_multicrop_576 = evaluate_experiment(cfgs=cfgs, trial_name="three_ensemble_multiinstance_multicrop_576",
                                                            multi_instance=True, multicrop=True, device=device, debug=False)

Using device: cuda:0
Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [11:36<00:00, 20.27it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [11:22<00:00, 20.67it/s]


Setting up Pytorch Model
Not loading pre-trained weights
Freezing hidden layers...


100%|██████████| 14117/14117 [12:47<00:00, 18.39it/s]


Starting Evaluation.....
Evaluating for Prediction-based Phase
Evaluated scores: {'F1 Score': 59.1, 'Accuracy': 73.76, 'PSC': (24.8, 2.67, 5.08, 16.08), 'PSC_total': (1562, 168, 77, 244), 'Track1 Metric': 88.31, 'Track2 Metric': 2771}
Completed evaluation


In [34]:
named_scores = {
    "ensemble_multi-instance": scores_ensemble_multi,
    "ensemble": scores_ensemble,
    "multi-instance": multi_scores,
    "multi-instance_multicrop": multicrop_scores,
    "multicrop": multicrop_single_scores,
    "baseline": scores,
    "ensemble_multi-instance_multicrop": scores_ensemble_multi_multicrop,
    "three_ensemble": scores_three_ensemble,
    "three_ensemble_multi-instance_multicrop": scores_three_ensemble_multi_multicrop,
    "three_ensemble_576": scores_three_ensemble_576,
    "multi-instance_576": scores_multiinstance_576,
    "576": scores_576,
    "multiinstance_multicrop_576": scores_multiinstance_multicrop_576,
    "three_ensemble_multiinstance_multicrop_576": scores_three_ensemble_multiinstance_multicrop_576,
}
# TODO: ability to load submission by trial_name

dfs = []
for name, score in named_scores.items():
    keep_keys = {"F1 Score", "Accuracy", "Track1 Metric", "Track2 Metric"}
    df = pd.DataFrame({k:v for k,v in score.items() if k in keep_keys}, index=[0])
    df["experiment"] = name
    dfs.append(df)

score_comparison_df = pd.concat(dfs, ignore_index=True)

In [35]:
score_comparison_df.sort_values("Track1 Metric", ascending=False)

Unnamed: 0,F1 Score,Accuracy,Track1 Metric,Track2 Metric,experiment
12,56.05,72.76,87.84,2877,multiinstance_multicrop_576
8,58.68,73.44,87.82,2856,three_ensemble_multi-instance_multicrop
6,58.04,73.16,87.77,2870,ensemble_multi-instance_multicrop
10,55.85,72.52,87.75,2896,multi-instance_576
0,57.67,73.14,87.49,2901,ensemble_multi-instance
3,55.5,72.34,87.38,2945,multi-instance_multicrop
2,56.0,72.62,87.29,2939,multi-instance
9,56.66,71.48,87.26,3032,three_ensemble_576
7,56.4,71.49,86.84,3088,three_ensemble
11,53.52,70.51,86.77,3133,576


In [13]:
def compare(base_model, treatment):
    for metric in ['Track1 Metric', 'Track2 Metric']:
        print(metric, (score_comparison_df[score_comparison_df['experiment']==treatment][metric].values[0] - 
               score_comparison_df[score_comparison_df['experiment']==base_model][metric].values[0]))

In [14]:
compare("single model", "multi-crop")

Track1 Metric 0.12999999999999545
Track2 Metric -15


In [15]:
compare("multi-crop", "multi-crop multi-instance")

Track1 Metric 0.9200000000000017
Track2 Metric -223


In [16]:
compare("single model", "ensemble")

Track1 Metric 0.21999999999999886
Track2 Metric -34


In [17]:
compare("ensemble", "three ensemble")

Track1 Metric 0.29000000000000625
Track2 Metric -61


In [18]:
compare("single model", "multi-instance")

Track1 Metric 0.960000000000008
Track2 Metric -244


In [19]:
compare("multi-instance", "ensembled multi-instance")

Track1 Metric 0.19999999999998863
Track2 Metric -38


In [20]:
compare("ensembled multi-instance", "ensembled multi-crop multi-instance")

Track1 Metric 0.28000000000000114
Track2 Metric -31


## multi-instance > ensemble > multi-crop