In [1]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt

NUMERICS = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

In [None]:
sampled_path = "../../results/sampled/"
mirror_path = "../../results/mirror_true/"
passive_path = "../../results/sampled_passive/"

# Load the data
sample_rank = pd.read_csv(os.path.join(sampled_path, "rank.txt"), sep='\t', header=None, names=["entity", "type"])
mirror_rank = pd.read_csv(os.path.join(mirror_path, "rank.txt"), sep='\t', header=None, names=["entity", "type"])
passive_rank = pd.read_csv(os.path.join(passive_path, "rank.txt"), sep='\t', header=None, names=["entity", "type"])

sample_rank['rank'] = sample_rank['type'].apply(lambda x: int(x.split(' ')[1]))
mirror_rank['rank'] = mirror_rank['type'].apply(lambda x: int(x.split(' ')[1]))
passive_rank['rank'] = passive_rank['type'].apply(lambda x: int(x.split(' ')[1]))

sample_rank['type'] = sample_rank['type'].apply(lambda x: x.split(' ')[0])
mirror_rank['type'] = mirror_rank['type'].apply(lambda x: x.split(' ')[0])
passive_rank['type'] = passive_rank['type'].apply(lambda x: x.split(' ')[0])

sample_rank

Unnamed: 0,entity,type,rank
0,Andrey_Yeshchenko,wikicat_Russian_footballers,9
1,Daniel_Pendín,wikicat_Expatriate_footballers_in_Spain,8
2,Turkey,wikicat_Middle_Eastern_countries,5
3,Edinburgh,wordnet_administrative_district_108491826,3
4,Bangladesh,wikicat_Countries,1
...,...,...,...
5859,Roger_Milla,wikicat_1994_FIFA_World_Cup_players,33
5860,Gabri_García,wikicat_FC_Barcelona_B_footballers,1
5861,Doris_Lessing,wikicat_21st-century_dramatists_and_playwrights,7419
5862,Queen_Beatrix_International_Airport,wikicat_Airports_in_the_Netherlands,8320


In [32]:
def get_cluster_entity(entity):
    if len(entity.split('_')) > 1 and entity.split('_')[-1].isdigit() and len(entity.split('_')[-1]) == 1:
        return int(entity.split('_')[-1])
    
def get_entity(entity):
    if len(entity.split('_')) > 1 and entity.split('_')[-1].isdigit() and len(entity.split('_')[-1]) == 1:
        return '_'.join(entity.split('_')[:-1])
    else:
        return entity
    
mirror_rank['cluster'] = mirror_rank['entity'].apply(get_cluster_entity)
mirror_rank['entity'] = mirror_rank['entity'].apply(get_entity)
mirror_rank

Unnamed: 0,entity,type,rank,cluster
0,Andrey_Yeshchenko,wikicat_Russian_footballers,3,
1,Daniel_Pendín,wikicat_Expatriate_footballers_in_Spain,6,
2,Turkey,wikicat_Middle_Eastern_countries,3,1.0
3,Edinburgh,wordnet_administrative_district_108491826,2,
4,Bangladesh,wikicat_Countries,1,
...,...,...,...,...
5859,Roger_Milla,wikicat_1994_FIFA_World_Cup_players,2,1.0
5860,Gabri_García,wikicat_FC_Barcelona_B_footballers,1,
5861,Doris_Lessing,wikicat_21st-century_dramatists_and_playwrights,13968,
5862,Queen_Beatrix_International_Airport,wikicat_Airports_in_the_Netherlands,5526,


In [82]:
mirror_sample_rank_diff = sample_rank.merge(mirror_rank, on=['entity', 'type'], suffixes=('_sample', '_mirror'))
mirror_sample_rank_diff['rank_diff'] = mirror_sample_rank_diff['rank_sample'] - mirror_sample_rank_diff['rank_mirror']
mirror_sample_rank_diff['upgrade_score'] = (mirror_sample_rank_diff['rank_sample'] - mirror_sample_rank_diff['rank_mirror'] )/ np.minimum(mirror_sample_rank_diff['rank_mirror'], mirror_sample_rank_diff['rank_sample'])

mirror_sample_rank_diff

Unnamed: 0,entity,type,rank_sample,rank_mirror,cluster,rank_diff,upgrade_score
0,Andrey_Yeshchenko,wikicat_Russian_footballers,9,3,,6,2.000000
1,Daniel_Pendín,wikicat_Expatriate_footballers_in_Spain,8,6,,2,0.333333
2,Turkey,wikicat_Middle_Eastern_countries,5,3,1.0,2,0.666667
3,Edinburgh,wordnet_administrative_district_108491826,3,2,,1,0.500000
4,Bangladesh,wikicat_Countries,1,1,,0,0.000000
...,...,...,...,...,...,...,...
5859,Roger_Milla,wikicat_1994_FIFA_World_Cup_players,33,2,1.0,31,15.500000
5860,Gabri_García,wikicat_FC_Barcelona_B_footballers,1,1,,0,0.000000
5861,Doris_Lessing,wikicat_21st-century_dramatists_and_playwrights,7419,13968,,-6549,-0.882734
5862,Queen_Beatrix_International_Airport,wikicat_Airports_in_the_Netherlands,8320,5526,,2794,0.505610


In [83]:
mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()].describe()

Unnamed: 0,rank_sample,rank_mirror,cluster,rank_diff,upgrade_score
count,1894.0,1894.0,1894.0,1894.0,1894.0
mean,963.770855,1073.901267,0.900739,-110.130412,0.913965
std,2465.969244,3050.099149,1.023067,1532.615144,52.849779
min,1.0,1.0,0.0,-13990.0,-1271.0
25%,2.0,2.0,0.0,-15.0,-0.9868
50%,18.0,17.0,1.0,0.0,0.0
75%,168.75,111.5,1.0,23.0,1.5
max,15739.0,15614.0,9.0,15155.0,1140.0


In [None]:
def compute_mrr(ranks):
    """
    Compute Mean Reciprocal Rank (MRR) for the given ranks.
    """
    ranks = np.array(ranks)
    ranks = ranks[ranks > 0]  # éviter division par 0
    if len(ranks) == 0:
        return 0.0
    return np.mean(1.0 / ranks)

def compute_hit_rate(ranks, k):
	"""
	Compute Hit Rate at k for the given ranks.
	"""
	ranks = np.array(ranks)
	ranks = ranks[ranks > 0]  # éviter division par 0
	if len(ranks) == 0:
		return 0.0
	return np.mean(ranks <= k)

print('=========General stats=========')
print('average rank diff :', mirror_sample_rank_diff['rank_diff'].mean())
print('std rank diff :', mirror_sample_rank_diff['rank_diff'].std())
print('average upgrade score :', mirror_sample_rank_diff['upgrade_score'].mean())
print('Almost same rank diff :', len(mirror_sample_rank_diff['rank_diff'][mirror_sample_rank_diff['rank_diff'].abs() < 5]) / len(mirror_sample_rank_diff))
print('count upgrade :', len(mirror_sample_rank_diff[(mirror_sample_rank_diff['rank_diff'] > 5)]))
print('count downgrade :', len(mirror_sample_rank_diff[(mirror_sample_rank_diff['rank_diff'] < -5)]))

print('MRR mirror :', compute_mrr(mirror_sample_rank_diff['rank_mirror'].values))
print('MRR sampled :', compute_mrr(mirror_sample_rank_diff['rank_sample'].values))

print(f"hit@1 mirror : {compute_hit_rate(mirror_sample_rank_diff['rank_mirror'].values, 1):.3%}")
print(f"hit@1 sampled : {compute_hit_rate(mirror_sample_rank_diff['rank_sample'].values, 1):.3%}")
print(f"hit@3 mirror : {compute_hit_rate(mirror_sample_rank_diff['rank_mirror'].values, 3):.3%}")
print(f"hit@3 sampled : {compute_hit_rate(mirror_sample_rank_diff['rank_sample'].values, 3):.3%}")
print(f"hit@10 mirror : {compute_hit_rate(mirror_sample_rank_diff['rank_mirror'].values, 10):.3%}")
print(f"hit@10 sampled : {compute_hit_rate(mirror_sample_rank_diff['rank_sample'].values, 10):.3%}")


print('\n=========not clusterized stats=========')
print('average rank diff not clusterized :', mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_diff'].mean())
print('std rank diff not clusterized :', mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_diff'].std())
print('average upgrade score not clusterized :', mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['upgrade_score'].mean())


print(f"\nMRR mirror not clusterized : {compute_mrr(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values):.3f}")
print(f"MRR sampled not clusterized : {compute_mrr(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values):.3f}")

print('Almost same rank diff not clusterized :', len(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_diff'][mirror_sample_rank_diff['rank_diff'].abs() < 5]) / len(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]))

print('count upgrade not clusterized :', len(mirror_sample_rank_diff[(mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] > 5)]))
print('count downgrade not clusterized :', len(mirror_sample_rank_diff[(mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] < -5)]))

print(f"hit@1 mirror not clusterized : {compute_hit_rate(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values, 1):.3%}")
print(f"hit@1 sampled not clusterized : {compute_hit_rate(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values, 1):.3%}")
print(f"hit@3 mirror not clusterized : {compute_hit_rate(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values, 3):.3%}")
print(f"hit@3 sampled not clusterized : {compute_hit_rate(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values, 3):.3%}")
print(f"hit@10 mirror not clusterized : {compute_hit_rate(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values, 10):.3%}")
print(f"hit@10 sampled not clusterized : {compute_hit_rate(mirror_sample_rank_diff[mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values, 10):.3%}")

print('\n=========clusterized stats=========')
print('average rank diff clusterized :', mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_diff'].mean())
print('std rank diff clusterized :', mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_diff'].std())
print('average upgrade score clusterized :', mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['upgrade_score'].mean())
print(f"\nMRR mirror clusterized : {compute_mrr(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values):.3f}")
print(f"MRR sampled clusterized : {compute_mrr(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values):.3f}")

print('Almost same rank diff clusterized :', len(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_diff'][mirror_sample_rank_diff['rank_diff'].abs() < 5]) / len(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]))

print('count upgrade clusterized :', len(mirror_sample_rank_diff[(~mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] > 5)]))
print('count downgrade clusterized :', len(mirror_sample_rank_diff[(~mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] < -5)]))

print('count enter hit@10 clusterized :', len(mirror_sample_rank_diff[(~mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] > 5) & (mirror_sample_rank_diff['rank_mirror'] <= 10) & (mirror_sample_rank_diff['rank_sample'] > 10)]))
print('count exit hit@10 clusterized :', len(mirror_sample_rank_diff[(~mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] < -5) & (mirror_sample_rank_diff['rank_mirror'] > 10) & (mirror_sample_rank_diff['rank_sample'] <= 10)]))

print(f"hit@1 mirror clusterized : {compute_hit_rate(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values, 1):.3%}")
print(f"hit@1 sampled clusterized : {compute_hit_rate(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values, 1):.3%}")
print(f"hit@3 mirror clusterized : {compute_hit_rate(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values, 3):.3%}")
print(f"hit@3 sampled clusterized : {compute_hit_rate(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values, 3):.3%}")
print(f"hit@10 mirror clusterized : {compute_hit_rate(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_mirror'].values, 10):.3%}")
print(f"hit@10 sampled clusterized : {compute_hit_rate(mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]['rank_sample'].values, 10):.3%}")

average rank diff : -181.24010914051843
std rank diff : 1711.3457102080733
average upgrade score : 0.10379685870688429
Almost same rank diff : 0.34021145975443384
count upgrade : 1791
count downgrade : 1965
MRR mirror : 0.29530070144331727
MRR sampled : 0.3013276190178945
hit@1 mirror : 22.374%
hit@1 sampled : 23.312%
hit@3 mirror : 31.719%
hit@3 sampled : 32.145%
hit@10 mirror : 43.537%
hit@10 sampled : 43.878%

average rank diff not clusterized : -215.16498740554155
std rank diff not clusterized : 1789.5275724223634
average upgrade score not clusterized : -0.28271644504015386

MRR mirror not clusterized : 0.294
MRR sampled not clusterized : 0.298
Almost same rank diff not clusterized : 0.3410579345088161
count upgrade not clusterized : 1163
count downgrade not clusterized : 1371
hit@1 mirror not clusterized : 22.393%
hit@1 sampled not clusterized : 22.922%
hit@3 mirror not clusterized : 31.385%
hit@3 sampled not clusterized : 31.990%
hit@10 mirror not clusterized : 43.526%
hit@10 sam

In [93]:
upgrade_mirror = mirror_sample_rank_diff[(~mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] > 5)]
upgrade_mirror.describe()

Unnamed: 0,rank_sample,rank_mirror,cluster,rank_diff,upgrade_score
count,628.0,628.0,628.0,628.0,628.0
mean,1065.423567,462.318471,0.894904,603.105096,12.698211
std,2437.814753,1569.578689,1.017443,1590.744891,57.572429
min,7.0,1.0,0.0,6.0,0.016507
25%,40.0,7.0,0.0,23.0,0.921474
50%,123.0,32.5,1.0,68.5,2.819928
75%,561.75,129.75,1.0,335.5,7.825397
max,15739.0,10387.0,6.0,15155.0,1140.0


In [95]:
upgrade_mirror.sort_values(by='upgrade_score', ascending=False, inplace=True)
upgrade_mirror.head(20)

Unnamed: 0,entity,type,rank_sample,rank_mirror,cluster,rank_diff,upgrade_score
1350,Rachel_Weisz,wikicat_English_people_of_Hungarian-Jewish_des...,2282,2,5.0,2280,1140.0
4806,Greg_Bear,wikicat_20th-century_writers,3687,8,0.0,3679,459.875
3589,Julianne_Moore,wikicat_People_from_North_Carolina,13313,38,2.0,13275,349.342105
2224,Janet_Frame,wikicat_New_Zealand_short_story_writers,1338,4,0.0,1334,333.5
5217,Erwin_Schrödinger,"wikicat_Alumni_of_Magdalen_College,_Oxford",4958,16,1.0,4942,308.875
836,Kid_Rock,wikicat_21st-century_American_musicians,561,2,2.0,559,279.5
1779,John_Maynard_Keynes,wikicat_LGBT_people_from_England,526,2,3.0,524,262.0
4734,Alex_Nyarko,wikicat_Expatriate_footballers_in_Switzerland,173,1,2.0,172,172.0
5853,Ioan_Sabău,wikicat_Romanian_expatriates_in_the_Netherlands,2433,19,0.0,2414,127.052632
1837,Subrahmanyan_Chandrasekhar,wikicat_American_people,119,1,2.0,118,118.0


In [97]:
downgrade_mirror = mirror_sample_rank_diff[(~mirror_sample_rank_diff['cluster'].isna()) & (mirror_sample_rank_diff['rank_diff'] < -5)]
downgrade_mirror.describe()

Unnamed: 0,rank_sample,rank_mirror,cluster,rank_diff,upgrade_score
count,594.0,594.0,594.0,594.0,594.0
mean,1940.195286,2929.018519,0.897306,-988.823232,-10.540614
std,3334.943766,4685.727393,0.947826,1869.984779,71.626669
min,1.0,7.0,0.0,-13990.0,-1271.0
25%,6.0,30.0,0.0,-1085.25,-6.0
50%,39.0,114.5,1.0,-63.5,-1.688179
75%,2063.0,5030.25,1.0,-18.25,-0.510934
max,11719.0,15614.0,4.0,-6.0,-0.010106


In [None]:
downgrade_mirror.sort_values(by='upgrade_score', ascending=True, inplace=True)
downgrade_mirror.head(20)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downgrade_mirror.sort_values(by='upgrade_score', ascending=True, inplace=True)


Unnamed: 0,entity,type,rank_sample,rank_mirror,cluster,rank_diff,upgrade_score
2641,Ever_Cantero,wikicat_Paraguayan_footballers,1,1272,0.0,-1271,-1271.0
292,Brasília,wikicat_States_of_Brazil,1,1096,0.0,-1095,-1095.0
2278,Cole_Porter,wikicat_LGBT_people,1,281,1.0,-280,-280.0
3546,Mounir_El_Hamdaoui,wikicat_Serie_A_footballers,1,269,4.0,-268,-268.0
3785,Daisuke_Matsui,wikicat_Bulgarian_A_Professional_Football_Grou...,1,238,1.0,-237,-237.0
3567,Benjamin_Fondane,wikicat_Surrealist_poets,1,154,0.0,-153,-153.0
736,GLAAD_Media_Award,wikicat_American_television_awards,2,198,0.0,-196,-98.0
64,Danny_Califf,wikicat_LA_Galaxy_players,33,2064,2.0,-2031,-61.545455
649,Marcos_Alonso_Peña,wikicat_Racing_de_Santander_managers,1,56,0.0,-55,-55.0
3968,Andrija_Kaluđerović,wikicat_Serbian_expatriates_in_Spain,1,55,0.0,-54,-54.0


In [103]:
cluster_mirror = mirror_sample_rank_diff[~mirror_sample_rank_diff['cluster'].isna()]
cluster_mirror_grp = cluster_mirror.groupby('entity').agg({
    'rank_diff': ['mean', 'std'],
	'rank_mirror': ['mean', 'std'],
	'rank_sample': ['mean', 'std'],
	'upgrade_score': ['mean', 'std'],
	'cluster': ['nunique']
}).reset_index()
cluster_mirror_grp.columns = ['_'.join(col).strip() for col in cluster_mirror_grp.columns.values]
cluster_mirror_grp

Unnamed: 0,entity_,rank_diff_mean,rank_diff_std,rank_mirror_mean,rank_mirror_std,rank_sample_mean,rank_sample_std,upgrade_score_mean,upgrade_score_std,cluster_nunique
0,50_Cent,-756.333333,1040.872262,4771.166667,5234.516001,4014.833333,4448.253519,-0.107085,0.610165,2
1,Aaran_Lines,-3080.000000,3915.957354,3307.500000,3837.468501,227.500000,78.488853,-17.552377,23.268674,1
2,Aaron_Brown_(footballer_born_1980),-9.000000,,12.000000,,3.000000,,-3.000000,,1
3,Aarón_Ñíguez,-26.500000,37.476659,31.500000,43.133514,5.000000,5.656854,-2.944444,4.164073,2
4,Abdel_Sattar_Sabry,52.000000,,12.000000,,64.000000,,4.333333,,1
...,...,...,...,...,...,...,...,...,...,...
979,Émile_Mpenza,-13.000000,44.788391,24.333333,44.477710,11.333333,16.573071,2.875000,19.740662,2
980,Óscar_Alcides_Mena,41.000000,,57.000000,,98.000000,,0.719298,,1
981,Özden_Öngün,-4.000000,5.656854,105.000000,130.107648,101.000000,124.450793,-0.021164,0.029930,2
982,İbrahim_Aydemir,-481.000000,,6865.000000,,6384.000000,,-0.075345,,1


In [106]:
mirror_sample_rank_diff[mirror_sample_rank_diff['entity'] == 'Rachel_Weisz'].head(20)

Unnamed: 0,entity,type,rank_sample,rank_mirror,cluster,rank_diff,upgrade_score
819,Rachel_Weisz,wikicat_People_of_Hungarian_descent,8630,9440,,-810,-0.093859
1350,Rachel_Weisz,wikicat_English_people_of_Hungarian-Jewish_des...,2282,2,5.0,2280,1140.0
2059,Rachel_Weisz,wikicat_English_female_models,11353,4723,,6630,1.403769
2370,Rachel_Weisz,wikicat_Jewish_actors,16,14,0.0,2,0.142857


In [72]:
passive_sample_rank_diff = sample_rank.merge(passive_rank, on=['entity', 'type'], suffixes=('_sample', '_passive'))
passive_sample_rank_diff['rank_diff'] = passive_sample_rank_diff['rank_sample'] - passive_sample_rank_diff['rank_passive']
passive_sample_rank_diff.describe()

Unnamed: 0,rank_sample,rank_passive,rank_diff
count,5864.0,5864.0,5864.0
mean,1130.29792,1193.439291,-63.141371
std,2663.692132,2838.703443,1483.560601
min,1.0,1.0,-14040.0
25%,2.0,2.0,-13.0
50%,18.0,17.0,0.0
75%,244.25,226.25,13.0
max,17897.0,16474.0,14362.0


In [78]:
print('average rank diff not clusterized :', passive_sample_rank_diff['rank_diff'].mean())
print('std rank diff not clusterized :', passive_sample_rank_diff['rank_diff'].std())

print('Almost same rank diff :', len(passive_sample_rank_diff['rank_diff'][passive_sample_rank_diff['rank_diff'].abs() < 5]) / len(passive_sample_rank_diff))
print('count upgrade :', len(passive_sample_rank_diff[(passive_sample_rank_diff['rank_diff'] > 5)]))
print('count downgrade :', len(passive_sample_rank_diff[(passive_sample_rank_diff['rank_diff'] < -5)]))

print('MRR passive :', compute_mrr(passive_sample_rank_diff['rank_passive'].values))
print('MRR sampled :', compute_mrr(passive_sample_rank_diff['rank_sample'].values))

print(f"hit@1 passive : {compute_hit_rate(passive_sample_rank_diff['rank_passive'].values, 1):.3%}")
print(f"hit@1 sampled : {compute_hit_rate(passive_sample_rank_diff['rank_sample'].values, 1):.3%}")
print(f"hit@3 passive : {compute_hit_rate(passive_sample_rank_diff['rank_passive'].values, 3):.3%}")
print(f"hit@3 sampled : {compute_hit_rate(passive_sample_rank_diff['rank_sample'].values, 3):.3%}")
print(f"hit@10 passive : {compute_hit_rate(passive_sample_rank_diff['rank_passive'].values, 10):.3%}")
print(f"hit@10 sampled : {compute_hit_rate(passive_sample_rank_diff['rank_sample'].values, 10):.3%}")




average rank diff not clusterized : -63.14137107776262
std rank diff not clusterized : 1483.5606008018822
Almost same rank diff : 0.38693724420190995
count upgrade : 1749
count downgrade : 1735
MRR passive : 0.31222532787615753
MRR sampled : 0.3013276190178945
hit@1 passive : 24.318%
hit@1 sampled : 23.312%
hit@3 passive : 33.237%
hit@3 sampled : 32.145%
hit@10 passive : 44.901%
hit@10 sampled : 43.878%
