In [5]:
import os
import pathlib

import numpy as np
import pandas as pd

from tqdm import tqdm

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

#

from src.datasets import deep_fashion_ctsrbm

import src.utils.json

---

In [6]:
eval_data_filename_1 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train", "convnext_t", "test_002", "eval_ctsrbm_accuracy__data.json")
eval_data_filename_2 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train_old_smpl", "convnext_t_s2", "exp_006", "eval_ctsrbm_accuracy__data.json")
eval_data_filename_3 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train_old_smpl", "swint_v2_t_s2", "run_002", "eval_ctsrbm_accuracy__data.json")

#

eval_data_1 = src.utils.json.load_json_dict(eval_data_filename_1)
eval_data_2 = src.utils.json.load_json_dict(eval_data_filename_2)
eval_data_3 = src.utils.json.load_json_dict(eval_data_filename_3)

---

In [7]:
ctsrbm_dataset_dir = os.path.join(pathlib.Path.home(), "data", "DeepFashion", "Consumer-to-shop Clothes Retrieval Benchmark")
ctsrbm_dataset = deep_fashion_ctsrbm.ConsToShopClothRetrBmkImageLoader(ctsrbm_dataset_dir, img_transform=None)

#

train_cons_idxs = ctsrbm_dataset.get_subset_indices(split="train", domain="consumer")
val_cons_idxs = ctsrbm_dataset.get_subset_indices(split="val", domain="consumer")
test_cons_idxs = ctsrbm_dataset.get_subset_indices(split="test", domain="consumer")

#

cons_idxs = np.concatenate([train_cons_idxs, val_cons_idxs, test_cons_idxs])
num_cons_imgs = len(cons_idxs)

In [8]:
acc_array_1 = np.empty(num_cons_imgs, dtype="float")
acc_array_2 = np.empty(num_cons_imgs, dtype="float")
acc_array_3 = np.empty(num_cons_imgs, dtype="float")
num_array = np.empty(num_cons_imgs, dtype="int")

#

zidx = 0
for idx, acc, num  in eval_data_1["results"]["train"]:
    acc_array_1[zidx] = acc
    num_array[zidx] = num
    zidx += 1
for idx, acc, num  in eval_data_1["results"]["val"]:
    acc_array_1[zidx] = acc
    num_array[zidx] = num
    zidx += 1
for idx, acc, num  in eval_data_1["results"]["test"]:
    acc_array_1[zidx] = acc
    num_array[zidx] = num
    zidx += 1

#

zidx = 0
for idx, acc, num in eval_data_2["results"]["train"]:
    acc_array_2[zidx] = acc
    zidx += 1
for idx, acc, num  in eval_data_2["results"]["val"]:
    acc_array_2[zidx] = acc
    zidx += 1
for idx, acc, num  in eval_data_2["results"]["test"]:
    acc_array_2[zidx] = acc
    zidx += 1

#

zidx = 0
for idx, acc, num in eval_data_3["results"]["train"]:
    acc_array_3[zidx] = acc
    zidx += 1
for idx, acc, num  in eval_data_3["results"]["val"]:
    acc_array_3[zidx] = acc
    zidx += 1
for idx, acc, num  in eval_data_3["results"]["test"]:
    acc_array_3[zidx] = acc
    zidx += 1

In [13]:
split_arr =\
    ["train"] * len(train_cons_idxs) +\
    ["val"] * len(val_cons_idxs) +\
    ["test"] * len(test_cons_idxs)

results_df = pd.DataFrame(
    data = {
        "img_idxs": cons_idxs,
        "split": split_arr,
        "acc_1": acc_array_1,
        "acc_2": acc_array_2,
        "acc_3": acc_array_3,
        "num_shop_imgs": num_array
    }
)

In [12]:
results_df.head(10)

Unnamed: 0,img_idxs,split,acc_1,acc_2,acc_3,num_shop_imgs
0,0,train,0.0,0.0,0.0,1
1,1,train,0.0,0.0,0.0,1
2,2,train,0.0,0.0,1.0,1
3,8,train,0.0,0.0,0.666667,3
4,9,train,0.0,0.0,0.0,3
5,10,train,0.0,0.0,0.333333,3
6,11,train,0.0,0.333333,0.333333,3
7,15,train,0.0,0.0,0.0,1
8,25,train,0.0,0.0,0.5,2
9,26,train,0.0,0.0,0.0,2


---

In [24]:
results_split_df = results_df.copy()
results_split_df = results_split_df[results_split_df["split"] == "test"]

results_split_df["gain_1"] = results_split_df["acc_1"] - results_split_df["acc_2"] - results_split_df["acc_3"]
results_split_df["gain_2"] = results_split_df["acc_2"] - results_split_df["acc_1"] - results_split_df["acc_3"]
results_split_df["gain_3"] = results_split_df["acc_3"] - results_split_df["acc_1"] - results_split_df["acc_2"]

In [33]:
results_split_df.sort_values(by="gain_1", ascending=False).head(50)

Unnamed: 0,img_idxs,split,acc_1,acc_2,acc_3,num_shop_imgs,gain_1,gain_2,gain_3
149842,15301,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
149227,12668,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
188225,208076,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
189805,216050,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
188230,208081,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
188232,208084,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
149216,12603,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
175546,142696,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
151657,24079,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0
175547,142697,test,1.0,0.0,0.0,1,1.0,-1.0,-1.0


In [34]:
results_split_df.sort_values(by="gain_2", ascending=False).head(50)

Unnamed: 0,img_idxs,split,acc_1,acc_2,acc_3,num_shop_imgs,gain_1,gain_2,gain_3
149628,14083,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
186420,198076,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
153322,32827,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
149033,12011,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
147616,4137,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
177275,150508,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
187997,206662,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
161613,73223,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
193129,233740,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0
193127,233738,test,0.0,1.0,0.0,1,-1.0,1.0,-1.0


In [35]:
results_split_df.sort_values(by="gain_3", ascending=False).head(50)

Unnamed: 0,img_idxs,split,acc_1,acc_2,acc_3,num_shop_imgs,gain_1,gain_2,gain_3
169469,112924,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
189297,213705,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
168244,106808,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
168245,106809,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
184196,187042,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
168248,106815,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
189361,213952,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
151256,22020,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
189359,213949,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0
168253,106820,test,0.0,0.0,1.0,1,-1.0,-1.0,1.0


In [37]:
all_img_idxs = []
num_cons_imgs = 50

sorted_results_split_df = results_split_df.sort_values(by="gain_1", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs)["img_idxs"].tolist()
all_img_idxs += img_idxs

print("\"desired_cons_img_idxs\": " + str(img_idxs) + ",\n")

sorted_results_split_df = results_split_df.sort_values(by="gain_2", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs)["img_idxs"].tolist()
all_img_idxs += img_idxs

print("\"desired_cons_img_idxs\": " + str(img_idxs) + ",\n")

sorted_results_split_df = results_split_df.sort_values(by="gain_3", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs)["img_idxs"].tolist()
all_img_idxs += img_idxs

print("\"desired_cons_img_idxs\": " + str(img_idxs) + ",\n")

print("\"desired_cons_img_idxs\": " + str(all_img_idxs) + ",\n")

"desired_cons_img_idxs": [15301, 12668, 208076, 216050, 208081, 208084, 12603, 142696, 24079, 142697, 233283, 149246, 74415, 233269, 31781, 17880, 223114, 4402, 223115, 163813, 49133, 232994, 63440, 216098, 100765, 234065, 49304, 81500, 81501, 234045, 174430, 31494, 81504, 134366, 31502, 49492, 81511, 174442, 42764, 101011, 207949, 190775, 12740, 17719, 4018, 164474, 142549, 233538, 155632, 232959],

"desired_cons_img_idxs": [14083, 198076, 32827, 12011, 4137, 150508, 206662, 73223, 233740, 233738, 186086, 136698, 136697, 227255, 82677, 43448, 211257, 186081, 175258, 136689, 25907, 186080, 32946, 186078, 186074, 233505, 4322, 73225, 143920, 76568, 94752, 51038, 234148, 179887, 206501, 18500, 206505, 123040, 128339, 215929, 63960, 148578, 112194, 215925, 112220, 123017, 66195, 136839, 128425, 103844],

"desired_cons_img_idxs": [112924, 213705, 106808, 106809, 187042, 106815, 213952, 22020, 213949, 106820, 230431, 187086, 106822, 106823, 106837, 153583, 153582, 153578, 153568, 132634, 13