In [None]:
import mlflow
import os
import pandas as pd
import numpy as np

In [None]:
retrieve_from_mlflow = False
save_results_file = "test_results.csv"

In [None]:
if retrieve_from_mlflow:
    split_path = "../data/splits_kfold_s0"
    splits = os.listdir(split_path)
    mlflow.set_tracking_uri("../mlruns") # local
    runs_splits = {}

    client = mlflow.MlflowClient()
    experiments = []
    for split_name in splits:
        experiment_name = f"test_{split_name}"
        current_experiment = dict(client.get_experiment_by_name(experiment_name))
        print("found", current_experiment["name"])
        experiments.append(current_experiment['experiment_id'])

    runs = mlflow.search_runs(experiments, filter_string="attributes.status = 'FINISHED'", order_by=["start_time DESC"])
    runs_u = runs.copy()
    runs_u["task"] = runs_u["params.task_name"].apply(lambda x: x[:x.rfind("_")])

    runs_u["decision"] = runs_u["tags.mlflow.runName"].apply(lambda x: x[:x.find("_")] if x[:x.find("_")] in ["allvideo", "cumulative"] else "")

    runs_u_metrics = runs_u[runs_u["decision"].isin(["allvideo", "cumulative"])]

    runs_u_metrics["full_name"] = runs_u_metrics["tags.mlflow.runName"].apply(lambda x: x[:x.find("_k")])
    runs_u_metrics["full_name_unique"] = runs_u_metrics["tags.mlflow.runName"].apply(lambda x: x[:x.find("_k")+3])

    runs_u_metrics = runs_u_metrics.drop_duplicates(subset=["full_name_unique"])

    runs_metrics = runs_u_metrics.filter(regex="^metrics", axis=1)
    runs_metrics = runs_metrics.join(runs_u_metrics[["task", "decision" ,"full_name" ,"full_name_unique"]])

    runs_metrics.to_csv(save_results_file)
else:
    runs_u_metrics = pd.read_csv(save_results_file, index_col=0)
    

In [None]:
metrics = ["metrics.midv-holo-test_fscore", "metrics.midv-holo-pr-notseen_recall", "metrics.midv-2020_recall"]

In [None]:
df_display_str = runs_u_metrics.groupby(["task", "decision"])[metrics].agg(lambda x: f"{np.rint(x.mean()*100).astype(np.int32)} ± {np.rint(x.std()*100).astype(np.int32)}" if len(x) == 5 and not np.isnan(x).max() else np.nan)
df_display_str = df_display_str.dropna()

In [None]:
df_display_str

In [None]:
print(df_display_str.to_markdown())

Result of the previous cell:
|                                                       | metrics.midv-holo-test_fscore   | metrics.midv-holo-pr-notseen_recall   | metrics.midv-2020_recall   |
|:------------------------------------------------------|:--------------------------------|:--------------------------------------|:---------------------------|
| ('classifier_mobilenetv3_small_050', 'allvideo')      | 89 ± 3                          | 77 ± 12                               | 44 ± 7                     |
| ('classifier_mobilevit_xxs', 'allvideo')              | 94 ± 3                          | 85 ± 11                               | 59 ± 4                     |
| ('classifier_resnet18', 'allvideo')                   | 92 ± 1                          | 76 ± 10                               | 76 ± 14                    |
| ('imagenet_mobilenetv3_small_050', 'allvideo')        | 73 ± 6                          | 81 ± 15                               | 61 ± 19                    |
| ('imagenet_mobilevit_xxs', 'allvideo')                | 67 ± 1                          | 92 ± 10                               | 82 ± 7                     |
| ('imagenet_resnet18', 'allvideo')                     | 77 ± 7                          | 76 ± 19                               | 59 ± 16                    |
| ('midv_baseline_roi', 'allvideo')                     | 80 ± 3                          | 63 ± 10                               | 92 ± 2                     |
| ('midv_baseline_roi', 'cumulative')                   | 82 ± 4                          | 66 ± 10                               | 93 ± 0                     |
| ('wsl_mobilenetv3_small_050', 'allvideo')             | 88 ± 3                          | 93 ± 8                                | 92 ± 5                     |
| ('wsl_mobilevit_xxs', 'allvideo')                     | 90 ± 2                          | 87 ± 14                               | 93 ± 6                     |
| ('wsl_mobilevit_xxs', 'cumulative')                   | 86 ± 5                          | 84 ± 11                               | 94 ± 4                     |
| ('wsl_noaugment_mobilenetv3_small_050', 'allvideo')   | 83 ± 6                          | 75 ± 17                               | 86 ± 7                     |
| ('wsl_noaugment_mobilevit_xxs', 'allvideo')           | 87 ± 12                         | 65 ± 20                               | 87 ± 7                     |
| ('wsl_noaugment_resnet18', 'allvideo')                | 88 ± 6                          | 81 ± 13                               | 83 ± 5                     |
| ('wsl_onlyorigins_mobilenetv3_small_050', 'allvideo') | 82 ± 7                          | 89 ± 11                               | 94 ± 4                     |
| ('wsl_onlyorigins_mobilevit_xxs', 'allvideo')         | 84 ± 4                          | 87 ± 18                               | 89 ± 9                     |
| ('wsl_onlyorigins_resnet18', 'allvideo')              | 83 ± 2                          | 84 ± 13                               | 87 ± 8                     |
| ('wsl_resnet18', 'allvideo')                          | 88 ± 2                          | 91 ± 7                                | 93 ± 5                     |

In [None]:
print(df_display_str.to_latex())

## Results by decision

## Table 2
### Whole video

In [None]:
decision = "allvideo"
allvideo = runs_u_metrics[runs_u_metrics["decision"] == decision].groupby(["task"])[metrics].agg(lambda x: f"{np.rint(x.mean()*100).astype(np.int32)} ± {np.rint(x.std()*100).astype(np.int32)}" if len(x) == 5 else None)
print(decision)
# allvideo

In [None]:
print(decision)
allvideo[allvideo.index.str.startswith("wsl_mobilevit")|allvideo.index.str.startswith("midv")]

In [None]:
print(allvideo[allvideo.index.str.startswith("wsl_mobilevit")|allvideo.index.str.startswith("midv")].to_latex())

In [None]:
print(allvideo[allvideo.index.str.startswith("wsl_mobilevit")|allvideo.index.str.startswith("midv")].to_markdown())

result of the previous cell:
| task              | metrics.midv-holo-test_fscore   | metrics.midv-holo-pr-notseen_recall   | metrics.midv-2020_recall   |
|:------------------|:--------------------------------|:--------------------------------------|:---------------------------|
| midv_baseline_roi | 80 ± 3                          | 63 ± 10                               | 92 ± 2                     |
| wsl_mobilevit_xxs | 90 ± 2                          | 87 ± 14                               | 93 ± 6                     |

### Cumulative

In [None]:
decision = "cumulative"
cumulative = runs_u_metrics[runs_u_metrics["decision"] == decision].groupby(["task"])[metrics].agg(lambda x: f"{np.rint(x.mean()*100).astype(np.int32)} ± {np.rint(x.std()*100).astype(np.int32)}" if len(x) == 5 and not np.isnan(x).max() else None)
cumulative = cumulative.dropna()
print(decision)
print(cumulative[cumulative.index.str.startswith("wsl_mobilevit")|cumulative.index.str.startswith("midv")].to_latex())

In [None]:
print(cumulative[cumulative.index.str.startswith("wsl_mobilevit")|cumulative.index.str.startswith("midv_baseline_roi")].to_markdown())

result of the previous cell (cumulative):
| task              | metrics.midv-holo-test_fscore   | metrics.midv-holo-pr-notseen_recall   | metrics.midv-2020_recall   |
|:------------------|:--------------------------------|:--------------------------------------|:---------------------------|
| midv_baseline_roi | 82 ± 4                          | 66 ± 10                               | 93 ± 0                     |
| wsl_mobilevit_xxs | 86 ± 5                          | 84 ± 11                               | 94 ± 4                     |

In [None]:
print("MIDV baseline Full document in cumulative over the train/test splits")
if set(["metrics.midv-holo-test-fulldoc_fscore", "metrics.midv-holo-pr-notseen-fulldoc_recall","metrics.midv-2020-fulldoc_recall"]).issubset(runs_u_metrics.columns):
    df_display_str2 = runs_u_metrics.groupby(["task", "decision"])[["metrics.midv-holo-test-fulldoc_fscore", "metrics.midv-holo-pr-notseen-fulldoc_recall","metrics.midv-2020-fulldoc_recall"]].agg(lambda x: f"{np.rint(x.mean()*100).astype(np.int32)} ± {np.rint(x.std()*100).astype(np.int32)}" if len(x) == 5 and not np.isnan(x).max() else None)
    df_display_str2 = df_display_str2.dropna()
    print(df_display_str2.to_latex())

result of the previous cell
```latex
\begin{tabular}{lllll}
\toprule
 &  & metrics.midv-holo-test-fulldoc_fscore & metrics.midv-holo-pr-notseen-fulldoc_recall & metrics.midv-2020-fulldoc_recall \\
task & decision &  &  &  \\
\midrule
midv_baseline_fulldoc & cumulative & 77 ± 1 & 27 ± 12 & 84 ± 5 \\
\cline{1-5}
\bottomrule
\end{tabular}
```

## Table 3
### Ablation study on whole video 

In [None]:
decision = "allvideo"
allvideo = runs_u_metrics[runs_u_metrics["decision"] == decision].groupby(["task"])[metrics].agg(lambda x: f"{np.rint(x.mean()*100).astype(np.int32)} ± {np.rint(x.std()*100).astype(np.int32)}" if len(x) == 5 else None)
print(decision)
allvideo

In [None]:
print(allvideo.to_markdown())

result of the previous cell
| task                                  | metrics.midv-holo-test_fscore   | metrics.midv-holo-pr-notseen_recall   | metrics.midv-2020_recall   |
|:--------------------------------------|:--------------------------------|:--------------------------------------|:---------------------------|
| classifier_mobilenetv3_small_050      | 89 ± 3                          | 77 ± 12                               | 44 ± 7                     |
| classifier_mobilevit_xxs              | 94 ± 3                          | 85 ± 11                               | 59 ± 4                     |
| classifier_resnet18                   | 92 ± 1                          | 76 ± 10                               | 76 ± 14                    |
| imagenet_mobilenetv3_small_050        | 73 ± 6                          | 81 ± 15                               | 61 ± 19                    |
| imagenet_mobilevit_xxs                | 67 ± 1                          | 92 ± 10                               | 82 ± 7                     |
| imagenet_resnet18                     | 77 ± 7                          | 76 ± 19                               | 59 ± 16                    |
| midv_baseline_roi                     | 80 ± 3                          | 63 ± 10                               | 92 ± 2                     |
| wsl_mobilenetv3_small_050             | 88 ± 3                          | 93 ± 8                                | 92 ± 5                     |
| wsl_mobilevit_xxs                     | 90 ± 2                          | 87 ± 14                               | 93 ± 6                     |
| wsl_noaugment_mobilenetv3_small_050   | 83 ± 6                          | 75 ± 17                               | 86 ± 7                     |
| wsl_noaugment_mobilevit_xxs           | 87 ± 12                         | 65 ± 20                               | 87 ± 7                     |
| wsl_noaugment_resnet18                | 88 ± 6                          | 81 ± 13                               | 83 ± 5                     |
| wsl_onlyorigins_mobilenetv3_small_050 | 82 ± 7                          | 89 ± 11                               | 94 ± 4                     |
| wsl_onlyorigins_mobilevit_xxs         | 84 ± 4                          | 87 ± 18                               | 89 ± 9                     |
| wsl_onlyorigins_resnet18              | 83 ± 2                          | 84 ± 13                               | 87 ± 8                     |
| wsl_resnet18                          | 88 ± 2                          | 91 ± 7                                | 93 ± 5                     |

In [None]:
print("Weakly supervised models trained on different architectures")
res = allvideo[allvideo.index.str.startswith("wsl_mobile")|allvideo.index.str.startswith("wsl_resnet")]
res

In [None]:
print("Weakly supervised models trained on different architectures")
print(res.to_latex())

In [None]:
print("No augmentations")
res = allvideo[allvideo.index.str.startswith("wsl_noa")]
res

In [None]:
print("No augmentations")
print(res.to_latex())

In [None]:
print("Classifier")
res = allvideo[allvideo.index.str.startswith("classifier")]
print(res.to_latex())

In [None]:
print("Only trained on originals")
res = allvideo[allvideo.index.str.startswith("wsl_onlyorigins")]
print(res.to_latex())

In [None]:
print("Only trained on ImageNet")
res = allvideo[allvideo.index.str.startswith("imagenet")]
print(res.to_latex())