In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os

import pandas as pd
import joblib
import json
import numpy as np
import matplotlib.pyplot as plt

sys.path.append("..")



In [7]:
from alternative_image_clustering.paper.load_benchmark_data import (
    create_kmeans_row,
    create_nr_baseline_row,
    create_per_prompt_rows,
    create_table_head,
)

base_dir = "/mnt/data/stephana93dm/storage/projects/alternative_image_clustering"

datasets = ["fruit360", "gtsrb", "nrobjects", "cards"]

metrics = ["ACC", "AMI"]

rows = [
    create_kmeans_row(base_dir, "image", datasets, metrics),
    create_nr_baseline_row(base_dir, "image", "nrkmeans", datasets, metrics),
]

for embedding_type in ["tfidf", "sbert_concat"]:
    rows.extend(create_per_prompt_rows(base_dir, embedding_type, datasets, metrics))
    rows.append(create_kmeans_row(base_dir, embedding_type, datasets, metrics, "per_category_kmeans"))
    rows.append(create_kmeans_row(base_dir, embedding_type, datasets, metrics, "full_kmeans"))

    rows.append(create_nr_baseline_row(base_dir, embedding_type, "nrkmeans", datasets, metrics))


df = pd.DataFrame(rows, columns=pd.MultiIndex.from_tuples(create_table_head(base_dir, datasets, metrics)))
# df[df.columns[2:]] = (df[df.columns[2:]] * 100).round(2)
df.set_index(pd.MultiIndex.from_arrays([df.values[:, 0], df.values[:, 1]]), inplace=True)

df = df[df.columns[2:]]
df = (df*100).round(2)

In [14]:
df.values[:, 0:2]

array([['image', 'kmeans'],
       ['image', 'nrkmeans'],
       ['tfidf', 'per_prompt'],
       ['tfidf', 'per_prompt_max'],
       ['tfidf', 'per_category_kmeans'],
       ['tfidf', 'full_kmeans'],
       ['tfidf', 'nrkmeans'],
       ['sbert_concat', 'per_prompt'],
       ['sbert_concat', 'per_prompt_max'],
       ['sbert_concat', 'per_category_kmeans'],
       ['sbert_concat', 'full_kmeans'],
       ['sbert_concat', 'nrkmeans']], dtype=object)

In [3]:
df.set_index(pd.MultiIndex.from_arrays([df.values[:, 0], df.values[:, 1]]), inplace=True)

In [6]:
df[df.columns[2:]]

Unnamed: 0_level_0,Unnamed: 1_level_0,fruit360,fruit360,fruit360,fruit360,gtsrb,gtsrb,gtsrb,gtsrb,nrobjects,nrobjects,nrobjects,nrobjects,nrobjects,nrobjects,cards,cards,cards,cards
Unnamed: 0_level_1,Unnamed: 1_level_1,fruit,fruit,colour,colour,type,type,colour,colour,shape,shape,material,material,colour,colour,rank,rank,suit,suit
Unnamed: 0_level_2,Unnamed: 1_level_2,ACC,AMI,ACC,AMI,ACC,AMI,ACC,AMI,ACC,AMI,ACC,AMI,ACC,AMI,ACC,AMI,ACC,AMI
image,kmeans,53.14,41.28,56.15,47.29,61.2,51.55,73.99,33.37,96.66,96.22,50.53,0.0,30.74,21.73,41.04,47.83,51.9,34.36
image,nrkmeans,62.97,50.61,58.69,55.47,58.33,38.62,68.4,43.25,82.41,82.22,62.99,32.68,61.27,65.68,45.65,55.18,33.12,16.57
tfidf,per_prompt,48.85,24.78,62.31,50.67,67.94,52.37,93.47,73.68,82.38,71.31,61.41,10.07,79.95,81.2,39.59,41.32,51.79,27.38
tfidf,per_prompt_max,55.75,34.77,72.71,67.38,73.0,58.36,96.95,81.76,99.86,99.17,68.73,20.07,82.94,86.15,42.2,48.44,56.13,36.65
tfidf,per_category_kmeans,53.37,32.22,69.36,65.53,78.5,67.93,96.76,82.33,95.49,96.52,56.35,1.77,93.92,95.27,40.66,41.61,61.05,37.08
tfidf,full_kmeans,51.08,26.4,60.66,56.02,76.52,68.93,89.35,70.44,93.46,91.6,50.97,0.02,25.9,13.25,39.06,39.85,59.18,34.92
tfidf,nrkmeans,49.47,30.05,62.83,55.35,78.5,62.59,84.06,53.46,82.44,84.04,56.47,12.98,52.89,55.16,42.14,38.04,45.56,23.86
sbert_concat,per_prompt,48.3,25.01,66.27,56.92,67.7,55.54,97.04,82.75,86.77,79.36,63.56,15.02,88.09,88.28,47.19,49.99,57.07,33.63
sbert_concat,per_prompt_max,56.67,35.93,71.38,68.81,74.26,61.01,97.71,85.3,99.99,99.92,77.71,40.56,96.22,95.16,51.21,56.09,62.35,37.32
sbert_concat,per_category_kmeans,51.66,27.64,72.0,62.58,76.18,62.56,97.43,84.49,100.0,100.0,51.27,0.06,84.75,88.73,69.74,70.34,57.28,34.38
