In [26]:
import os
import pathlib

import numpy as np
import pandas as pd

from tqdm import tqdm

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

#

from src.datasets import deep_fashion_ctsrbm

import src.utils.json

---

In [27]:
#eval_data_filename_1 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train", "convnext_t", "test_002", "eval_ctsrbm_accuracy__data.json")
#eval_data_filename_2 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train_old_smpl", "convnext_t_s2", "exp_006", "eval_ctsrbm_accuracy__data.json")
#eval_data_filename_3 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train_old_smpl", "swint_v2_t_s2", "run_002", "eval_ctsrbm_accuracy__data.json")

eval_data_filename_1 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train_old_smpl", "convnext_t_s2", "exp_006", "eval_ctsrbm_accuracy_2__data.json")
eval_data_filename_3 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train_old_smpl", "swint_v2_t_s2", "run_002", "eval_ctsrbm_accuracy_2__data.json")
eval_data_filename_2 = os.path.join(pathlib.Path.home(), "data", "fashion_retrieval", "ret_train_old_smpl_2", "convnext_t_s2", "glam_000_exp_007", "eval_ctsrbm_accuracy_2__data.json")

#

eval_data_1 = src.utils.json.load_json_dict(eval_data_filename_1)
eval_data_2 = src.utils.json.load_json_dict(eval_data_filename_2)
eval_data_3 = src.utils.json.load_json_dict(eval_data_filename_3)

---

In [28]:
ctsrbm_dataset_dir = os.path.join(pathlib.Path.home(), "data", "DeepFashion", "Consumer-to-shop Clothes Retrieval Benchmark")
ctsrbm_dataset = deep_fashion_ctsrbm.ConsToShopClothRetrBmkImageLoader(ctsrbm_dataset_dir, img_transform=None)

#

train_cons_idxs = ctsrbm_dataset.get_subset_indices(split="train", domain="consumer")
val_cons_idxs = ctsrbm_dataset.get_subset_indices(split="val", domain="consumer")
test_cons_idxs = ctsrbm_dataset.get_subset_indices(split="test", domain="consumer")

#

cons_idxs = np.concatenate([train_cons_idxs, val_cons_idxs, test_cons_idxs])
num_cons_imgs = len(cons_idxs)

In [29]:
ap_array_1 = np.empty(num_cons_imgs, dtype="float")
ap_array_2 = np.empty(num_cons_imgs, dtype="float")
ap_array_3 = np.empty(num_cons_imgs, dtype="float")
num_array = np.empty(num_cons_imgs, dtype="int")

#

zidx = 0
for idx, ap, num  in eval_data_1["results"]["train"]:
    ap_array_1[zidx] = ap
    num_array[zidx] = num
    zidx += 1
for idx, ap, num  in eval_data_1["results"]["val"]:
    ap_array_1[zidx] = ap
    num_array[zidx] = num
    zidx += 1
for idx, ap, num  in eval_data_1["results"]["test"]:
    ap_array_1[zidx] = ap
    num_array[zidx] = num
    zidx += 1

#

zidx = 0
for idx, ap, num in eval_data_2["results"]["train"]:
    ap_array_2[zidx] = ap
    zidx += 1
for idx, ap, num  in eval_data_2["results"]["val"]:
    ap_array_2[zidx] = ap
    zidx += 1
for idx, ap, num  in eval_data_2["results"]["test"]:
    ap_array_2[zidx] = ap
    zidx += 1

#

zidx = 0
for idx, ap, num in eval_data_3["results"]["train"]:
    ap_array_3[zidx] = ap
    zidx += 1
for idx, ap, num  in eval_data_3["results"]["val"]:
    ap_array_3[zidx] = ap
    zidx += 1
for idx, ap, num  in eval_data_3["results"]["test"]:
    ap_array_3[zidx] = ap
    zidx += 1

In [30]:
split_arr =\
    ["train"] * len(train_cons_idxs) +\
    ["val"] * len(val_cons_idxs) +\
    ["test"] * len(test_cons_idxs)

results_df = pd.DataFrame(
    data = {
        "img_idxs": cons_idxs,
        "split": split_arr,
        "ap_1": ap_array_1,
        "ap_2": ap_array_2,
        "ap_3": ap_array_3,
        "num_shop_imgs": num_array
    }
)

In [31]:
results_df.head(10)

Unnamed: 0,img_idxs,split,ap_1,ap_2,ap_3,num_shop_imgs
0,0,train,0.001898,0.000608,0.005682,1
1,1,train,0.000296,0.000159,0.00431,1
2,2,train,0.037037,1.0,1.0,1
3,8,train,0.006774,0.670782,0.397585,3
4,9,train,0.022447,0.035119,0.005133,3
5,10,train,0.027437,0.001621,0.204,3
6,11,train,0.336281,0.337881,0.566667,3
7,15,train,0.125,0.25,0.05,1
8,25,train,0.08421,0.501028,0.516949,2
9,26,train,0.001648,0.001723,0.006394,2


---

In [32]:
results_split_df = results_df.copy()
results_split_df = results_split_df[results_split_df["split"] == "test"]

results_split_df["gain_1"] = results_split_df["ap_1"] - results_split_df["ap_2"] - results_split_df["ap_3"]
results_split_df["gain_2"] = results_split_df["ap_2"] - results_split_df["ap_1"] - results_split_df["ap_3"]
results_split_df["gain_3"] = results_split_df["ap_3"] - results_split_df["ap_1"] - results_split_df["ap_2"]

In [33]:
results_split_df[results_split_df["num_shop_imgs"] == 1].sort_values(by="gain_1", ascending=False).head(40)

Unnamed: 0,img_idxs,split,ap_1,ap_2,ap_3,num_shop_imgs,gain_1,gain_2,gain_3
159223,60703,test,1.0,0.000401,0.001709,1,0.997889,-1.001308,-0.998692
155234,42683,test,1.0,0.005556,0.00641,1,0.988034,-1.000855,-0.999145
163560,83315,test,1.0,0.004926,0.013699,1,0.981375,-1.008773,-0.991227
171592,123017,test,1.0,0.009434,0.01,1,0.980566,-1.000566,-0.999434
149150,12285,test,1.0,0.012987,0.010204,1,0.976809,-0.997217,-1.002783
160338,66533,test,1.0,0.016393,0.009009,1,0.974598,-0.992616,-1.007384
159561,62672,test,1.0,0.019231,0.006494,1,0.974276,-0.987263,-1.012737
188645,210449,test,1.0,0.022222,0.008621,1,0.969157,-0.986398,-1.013602
174652,138311,test,1.0,0.025,0.007692,1,0.967308,-0.982692,-1.017308
166461,98482,test,1.0,0.013699,0.020408,1,0.965893,-1.00671,-0.99329


In [34]:
results_split_df[results_split_df["num_shop_imgs"] > 1].sort_values(by="gain_1", ascending=False).head(10)

Unnamed: 0,img_idxs,split,ap_1,ap_2,ap_3,num_shop_imgs,gain_1,gain_2,gain_3
176470,146712,test,0.868708,0.113121,0.067287,7,0.6883,-0.822873,-0.914542
153312,32735,test,0.75,0.196429,0.012499,2,0.541072,-0.56607,-0.93393
190312,218158,test,0.611111,0.049107,0.030833,2,0.531171,-0.592837,-0.629385
153144,31856,test,0.5625,0.010764,0.037211,2,0.514525,-0.588946,-0.536054
182809,179388,test,0.510309,0.003853,0.002602,2,0.503854,-0.509059,-0.51156
186881,200318,test,0.527027,0.010956,0.01808,2,0.497992,-0.534151,-0.519903
174173,135873,test,0.501229,0.002777,0.002429,2,0.496023,-0.500881,-0.501576
171097,120685,test,0.50033,0.008201,0.000598,2,0.491531,-0.492727,-0.507933
154071,36414,test,0.501178,0.007938,0.005241,2,0.487998,-0.498481,-0.503875
191680,225292,test,0.500953,0.005426,0.01187,2,0.483657,-0.507398,-0.494509


In [35]:
results_split_df[results_split_df["num_shop_imgs"] == 1].sort_values(by="gain_2", ascending=False).head(40)

Unnamed: 0,img_idxs,split,ap_1,ap_2,ap_3,num_shop_imgs,gain_1,gain_2,gain_3
173231,131482,test,0.000903,1.0,0.002994,1,-1.002091,0.996103,-0.997909
192483,229438,test,0.000573,1.0,0.005747,1,-1.005174,0.993679,-0.994826
154612,39566,test,0.005405,1.0,0.001185,1,-0.995779,0.99341,-1.004221
178090,155155,test,0.003584,1.0,0.003802,1,-1.000218,0.992613,-0.999782
165217,92842,test,0.003534,1.0,0.004717,1,-1.001183,0.991749,-0.998817
186279,197203,test,0.000983,1.0,0.007692,1,-1.006709,0.991324,-0.993291
184606,189267,test,0.001323,1.0,0.007463,1,-1.00614,0.991215,-0.99386
162328,77581,test,0.002217,1.0,0.006623,1,-1.004405,0.99116,-0.995595
184538,188887,test,0.011905,1.0,0.00266,1,-0.990755,0.985436,-1.009245
180544,167468,test,0.003846,1.0,0.011236,1,-1.00739,0.984918,-0.99261


In [36]:
results_split_df[results_split_df["num_shop_imgs"] > 1].sort_values(by="gain_2", ascending=False).head(10)

Unnamed: 0,img_idxs,split,ap_1,ap_2,ap_3,num_shop_imgs,gain_1,gain_2,gain_3
179313,161207,test,0.018904,1.0,0.018348,2,-0.999444,0.962748,-1.000556
187487,203725,test,0.014427,1.0,0.05125,2,-1.036823,0.934323,-0.963177
147729,4633,test,0.043844,1.0,0.033315,2,-0.989471,0.92284,-1.010529
180144,165114,test,0.037305,1.0,0.05756,2,-1.020255,0.905136,-0.979745
175973,144719,test,0.06403,1.0,0.049412,2,-0.985382,0.886558,-1.014618
193987,238289,test,0.096847,1.0,0.074728,2,-0.977881,0.828425,-1.022119
182629,178454,test,0.089286,1.0,0.090659,2,-1.001374,0.820055,-0.998626
147026,1203,test,0.089048,1.0,0.111111,2,-1.022063,0.799841,-0.977937
171492,122616,test,0.190909,1.0,0.013876,2,-0.822967,0.795215,-1.177033
151510,23480,test,0.009692,1.0,0.196429,2,-1.186737,0.79388,-0.813263


In [37]:
results_split_df[results_split_df["num_shop_imgs"] == 1].sort_values(by="gain_3", ascending=False).head(40)

Unnamed: 0,img_idxs,split,ap_1,ap_2,ap_3,num_shop_imgs,gain_1,gain_2,gain_3
193573,236146,test,0.001838,0.001471,1.0,1,-0.999632,-1.000368,0.996691
148086,6714,test,0.001773,0.001835,1.0,1,-1.000062,-0.999938,0.996392
166449,98469,test,0.002994,0.001328,1.0,1,-0.998334,-1.001666,0.995678
178502,157365,test,0.003788,0.000649,1.0,1,-0.996861,-1.003139,0.995563
154609,39563,test,0.000507,0.004202,1.0,1,-1.003695,-0.996305,0.995292
177449,151380,test,0.001517,0.003226,1.0,1,-1.001708,-0.998292,0.995257
193795,237310,test,0.00289,0.001988,1.0,1,-0.999098,-1.000902,0.995122
147181,1923,test,0.004566,0.000716,1.0,1,-0.99615,-1.00385,0.994717
186257,197113,test,0.001488,0.003906,1.0,1,-1.002418,-0.997582,0.994606
166884,100461,test,0.002833,0.002695,1.0,1,-0.999863,-1.000137,0.994472


In [38]:
results_split_df[results_split_df["num_shop_imgs"] > 1].sort_values(by="gain_3", ascending=False).head(10)

Unnamed: 0,img_idxs,split,ap_1,ap_2,ap_3,num_shop_imgs,gain_1,gain_2,gain_3
181135,170712,test,0.003652,0.02382,1.0,2,-1.020168,-0.979832,0.972528
173275,131734,test,0.032143,0.016314,1.0,2,-0.984171,-1.015829,0.951543
182602,178421,test,0.070481,0.004559,1.0,2,-0.934078,-1.065922,0.92496
193419,235428,test,0.0289,0.071795,1.0,2,-1.042895,-0.957105,0.899305
181969,175289,test,0.024184,0.105128,1.0,2,-1.080944,-0.919056,0.870688
161469,72260,test,0.094298,0.062271,1.0,2,-0.967973,-1.032027,0.843431
190723,220098,test,0.072505,0.085756,1.0,2,-1.013251,-0.986749,0.841739
149277,12825,test,0.078125,0.081169,1.0,2,-1.003044,-0.996956,0.840706
149278,12827,test,0.088972,0.085859,1.0,2,-0.996886,-1.003114,0.825169
193434,235445,test,0.068382,0.114925,1.0,2,-1.046543,-0.953457,0.816692


In [39]:
num_cons_imgs_1 = 40
num_cons_imgs_2 = 10

img_idxs_1 = []
img_idxs_2 = []
img_idxs_3 = []

sorted_results_split_df = results_split_df[results_split_df["num_shop_imgs"] == 1].sort_values(by="gain_1", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs_1)["img_idxs"].tolist()
img_idxs_1 += img_idxs

sorted_results_split_df = results_split_df[results_split_df["num_shop_imgs"] > 1].sort_values(by="gain_1", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs_2)["img_idxs"].tolist()
img_idxs_1 += img_idxs

print("\"desired_cons_img_idxs\": " + str(img_idxs_1) + ",\n")

sorted_results_split_df = results_split_df[results_split_df["num_shop_imgs"] == 1].sort_values(by="gain_2", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs_1)["img_idxs"].tolist()
img_idxs_2 += img_idxs

sorted_results_split_df = results_split_df[results_split_df["num_shop_imgs"] > 1].sort_values(by="gain_2", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs_2)["img_idxs"].tolist()
img_idxs_2 += img_idxs

print("\"desired_cons_img_idxs\": " + str(img_idxs_2) + ",\n")

sorted_results_split_df = results_split_df[results_split_df["num_shop_imgs"] == 1].sort_values(by="gain_3", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs_1)["img_idxs"].tolist()
img_idxs_3 += img_idxs

sorted_results_split_df = results_split_df[results_split_df["num_shop_imgs"] > 1].sort_values(by="gain_3", ascending=False)
img_idxs = sorted_results_split_df.head(num_cons_imgs_2)["img_idxs"].tolist()
img_idxs_3 += img_idxs

print("\"desired_cons_img_idxs\": " + str(img_idxs_3) + ",\n")

all_img_idxs = img_idxs_1 + img_idxs_2 + img_idxs_3

print("\"desired_cons_img_idxs\": " + str(all_img_idxs) + ",\n")

"desired_cons_img_idxs": [60703, 42683, 83315, 123017, 12285, 66533, 62672, 210449, 138311, 98482, 31652, 202993, 180690, 227943, 231916, 83325, 57661, 145913, 99301, 114769, 17732, 108645, 47667, 189941, 48149, 142027, 239113, 52371, 183687, 134161, 53230, 102295, 238155, 146309, 157965, 134146, 45454, 229550, 191279, 63971, 146712, 32735, 218158, 31856, 179388, 200318, 135873, 120685, 36414, 225292],

"desired_cons_img_idxs": [131482, 229438, 39566, 155155, 92842, 197203, 189267, 77581, 188887, 167468, 176691, 167467, 191105, 181319, 140558, 87496, 5694, 126917, 186085, 124477, 109766, 191946, 177258, 48964, 191870, 150803, 31091, 140031, 175947, 196623, 192287, 140010, 784, 33601, 96429, 41142, 7996, 41884, 31232, 119830, 161207, 203725, 4633, 165114, 144719, 238289, 178454, 1203, 122616, 23480],

"desired_cons_img_idxs": [236146, 6714, 98469, 157365, 39563, 151380, 237310, 1923, 197113, 100461, 96266, 133388, 13069, 77067, 192595, 19978, 162816, 64557, 117483, 107844, 98446, 210806

In [40]:
len(set(all_img_idxs))

150