In [2]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt

NUMERICS = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

# Passive

In [3]:
def read_rank_file(file_path):
	rank_df = pd.read_csv(file_path, sep='\t', header=None, names=['entity', 'type'])
	rank_df['rank'] = rank_df['type'].apply(lambda x: int(x.split(' ')[1]))
	rank_df['type'] = rank_df['type'].apply(lambda x: x.split(' ')[0])
	return rank_df

rank_passive = read_rank_file('../results/passive/rank.txt')
rank = read_rank_file('../results/sampling-2-nopre/rank.txt')

rank.describe()

Unnamed: 0,rank
count,5864.0
mean,1712.82998
std,4397.984323
min,1.0
25%,1.0
50%,11.5
75%,155.0
max,17982.0


In [4]:
rank_passive.describe()

Unnamed: 0,rank
count,5864.0
mean,1684.912858
std,4357.998231
min,1.0
25%,2.0
50%,13.0
75%,168.0
max,17806.0


In [5]:
df_rank_diff = pd.merge(rank, rank_passive, on=['entity', 'type'], suffixes=('_sampling', '_passive'))
df_rank_diff["rank_diff"] = df_rank_diff['rank_sampling'] - df_rank_diff['rank_passive']
df_rank_diff.describe()

Unnamed: 0,rank_sampling,rank_passive,rank_diff
count,5864.0,5864.0,5864.0
mean,1712.82998,1684.912858,27.917121
std,4397.984323,4357.998231,1363.120152
min,1.0,1.0,-11669.0
25%,1.0,2.0,-9.0
50%,11.5,13.0,0.0
75%,155.0,168.0,8.0
max,17982.0,17806.0,16606.0


In [6]:
print('Nombre de prédiction entrant dans le top 10 :', df_rank_diff[(df_rank_diff["rank_diff"] > 0) & (df_rank_diff['rank_passive'] <= 10) & (df_rank_diff['rank_sampling'] > 10)].count().values[0])
print('Nombre de prédiction entrant dans le top 3  :', df_rank_diff[(df_rank_diff["rank_diff"] > 0) & (df_rank_diff['rank_passive'] <= 3) & (df_rank_diff['rank_sampling'] > 3)].count().values[0])
print('Nombre de prédiction entrant dans le top 1  :', df_rank_diff[(df_rank_diff["rank_diff"] > 0) & (df_rank_diff['rank_passive'] <= 1) & (df_rank_diff['rank_sampling'] > 1)].count().values[0])
print('Total entrant dans le top 10     :', df_rank_diff[(df_rank_diff['rank_passive'] <= 10)].count().values[0])
print()
print('Nombre de prédiction sortant du top 10 :', df_rank_diff[(df_rank_diff["rank_diff"] < 0) & (df_rank_diff['rank_passive'] > 10) & (df_rank_diff['rank_sampling'] <= 10)].count().values[0])
print('Nombre de prédiction sortant du top 3  :', df_rank_diff[(df_rank_diff["rank_diff"] < 0) & (df_rank_diff['rank_passive'] > 3) & (df_rank_diff['rank_sampling'] <= 3)].count().values[0])
print('Nombre de prédiction sortant du top 1  :', df_rank_diff[(df_rank_diff["rank_diff"] < 0) & (df_rank_diff['rank_passive'] > 1) & (df_rank_diff['rank_sampling'] <= 1)].count().values[0])
print('Total sortant du top 10        :', df_rank_diff[(df_rank_diff['rank_passive'] > 10)].count().values[0])


Nombre de prédiction entrant dans le top 10 : 299
Nombre de prédiction entrant dans le top 3  : 309
Nombre de prédiction entrant dans le top 1  : 253
Total entrant dans le top 10     : 2803

Nombre de prédiction sortant du top 10 : 369
Nombre de prédiction sortant du top 3  : 356
Nombre de prédiction sortant du top 1  : 286
Total sortant du top 10        : 3061


In [19]:
df_rank_diff['upgrade_score'] = (df_rank_diff['rank_sampling'] - df_rank_diff['rank_passive'] )/ np.minimum(df_rank_diff['rank_passive'], df_rank_diff['rank_sampling'])
df_rank_diff.sort_values(by='upgrade_score', ascending=False, inplace=True)
df_rank_diff.describe()

Unnamed: 0,rank_sampling,rank_passive,rank_diff,upgrade_score
count,5864.0,5864.0,5864.0,5864.0
mean,1712.82998,1684.912858,27.917121,3.017743
std,4397.984323,4357.998231,1363.120152,225.320453
min,1.0,1.0,-11669.0,-1643.333333
25%,1.0,2.0,-9.0,-0.500611
50%,11.5,13.0,0.0,0.0
75%,155.0,168.0,8.0,0.5
max,17982.0,17806.0,16606.0,16328.0


In [21]:
print('sum upgrade :', df_rank_diff[(df_rank_diff['upgrade_score'] >= 0)]['upgrade_score'].sum())
print('sum downgrade :', df_rank_diff[df_rank_diff['upgrade_score'] < 0]['upgrade_score'].sum())
print('mean upgrade :', df_rank_diff[(df_rank_diff['upgrade_score'] >= 0)]['upgrade_score'].mean())
print('mean downgrade :', df_rank_diff[df_rank_diff['upgrade_score'] < 0]['upgrade_score'].mean())

sum upgrade : 29972.389034743574
sum downgrade : -12276.345628350897
mean upgrade : 8.30950624750307
mean downgrade : -5.439231558861717


In [17]:
df_rank_diff.sort_values(by="upgrade_score", ascending=False, inplace=True)
df_rank_diff.to_csv('rank_diff_ordered.tsv', sep='\t', index=False, header=True)

In [22]:
df_kg = pd.read_csv('../data/YAGO_sampled/KG_train.txt', sep='\t', header=None, names=['head', 'relation', 'tail'])

df_head_count_rel = df_kg.groupby(['head', 'relation']).count().reset_index()
df_head_count_rel = df_head_count_rel.rename(columns={'head': 'entity', 'tail': 'count',})
df_tail_count_rel = df_kg.groupby(['tail', 'relation']).count().reset_index()
df_tail_count_rel = df_tail_count_rel.rename(columns={'tail': 'entity', 'head': 'count',})

df_count_rel = pd.concat([df_head_count_rel, df_tail_count_rel], axis=0).groupby(['entity', 'relation']).sum().reset_index()
df_count_rel = df_count_rel.pivot(index='entity', columns='relation', values='count').fillna(0).reset_index()
df_count_rel_norm = df_count_rel.copy()
df_count_rel_norm.iloc[:, 1:] = df_count_rel.iloc[:, 1:].div(df_count_rel.iloc[:, 1:].sum(axis=1), axis=0)
df_count_rel_norm

relation,entity,actedIn,created,dealsWith,diedIn,directed,exports,graduatedFrom,happenedIn,hasAcademicAdvisor,...,isLocatedIn,isMarriedTo,isPoliticianOf,livesIn,owns,participatedIn,playsFor,wasBornIn,worksAt,wroteMusicFor
0,1._FC_Bocholt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0
1,1._FC_Cologne,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
2,1._FC_Kaiserslautern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0
3,1._FC_Köln,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0
4,1._FC_Lok_Stendal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6706,Šiauliai,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
6707,Šumadija,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
6708,Žakanje,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
6709,Žilina_Region,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0


In [23]:
def show_hit_acc(df):
	print('hit@10 sampled :', df[df['rank_sampling'] <= 10].count().values[0]/df.count().values[0])
	print('hit@10 passive :', df[df['rank_passive'] <= 10].count().values[0]/df.count().values[0])
	print('hit@3 sampled :', df[df['rank_sampling'] <= 3].count().values[0]/df.count().values[0])
	print('hit@3 passive :', df[df['rank_passive'] <= 3].count().values[0]/df.count().values[0])
	print('hit@1 sampled :', df[df['rank_sampling'] <= 1].count().values[0]/df.count().values[0])
	print('hit@1 passive :', df[df['rank_passive'] <= 1].count().values[0]/df.count().values[0])

show_hit_acc(df_rank_diff)

hit@10 sampled : 0.48993860845839016
hit@10 passive : 0.4780013642564802
hit@3 sampled : 0.3659618008185539
hit@3 passive : 0.3579467939972715
hit@1 sampled : 0.2508526603001364
hit@1 passive : 0.24522510231923603


In [25]:
df_rank_diff[df_rank_diff['type'].str.contains('Counties')].describe()

Unnamed: 0,rank_sampling,rank_passive,rank_diff,upgrade_score
count,22.0,22.0,22.0,22.0
mean,811.636364,949.136364,-137.5,-2.387197
std,3610.182485,3223.363436,878.561148,4.709408
min,1.0,1.0,-2743.0,-16.0
25%,1.0,2.0,-13.75,-3.8125
50%,2.5,7.5,-0.5,-0.25
75%,36.5,24.75,0.0,0.0
max,16968.0,14898.0,2070.0,2.066667


In [26]:
# Liste de chaînes de caractères à rechercher
search_list = ['Counties', 'Regions', 'Cities', 'City', 'Countries', 'Country', 'Provinces', 'Provinces', 'States', 'State', 'Districts', 'District']

# Création d'une expression régulière qui combine tous les éléments de la liste
pattern = '|'.join(search_list)

# Filtrage du DataFrame et comptage des occurrences correspondantes
df_rank_diff[df_rank_diff['type'].str.contains(pattern, case=False, na=False)].describe()

Unnamed: 0,rank_sampling,rank_passive,rank_diff,upgrade_score
count,895.0,895.0,895.0,895.0
mean,1102.422346,1110.896089,-8.473743,22.202265
std,3584.844901,3643.637519,1421.740758,572.550932
min,1.0,1.0,-9799.0,-1031.0
25%,1.0,1.0,-5.0,-0.664352
50%,3.0,4.0,0.0,0.0
75%,32.5,42.5,1.0,0.208701
max,17461.0,17369.0,16606.0,16328.0


In [27]:
show_hit_acc(df_rank_diff[df_rank_diff['type'].str.contains(pattern, case=False, na=False)])
print()
show_hit_acc(df_rank_diff[~df_rank_diff['type'].str.contains(pattern, case=False, na=False)])

hit@10 sampled : 0.6435754189944134
hit@10 passive : 0.6100558659217877
hit@3 sampled : 0.511731843575419
hit@3 passive : 0.49162011173184356
hit@1 sampled : 0.3888268156424581
hit@1 passive : 0.3687150837988827

hit@10 sampled : 0.46226604950694306
hit@10 passive : 0.45421614006842426
hit@3 sampled : 0.33970617830549404
hit@3 passive : 0.33386999396256795
hit@1 sampled : 0.22600120748641578
hit@1 passive : 0.22298249144697121


In [31]:
df_rank_diff_type_grp = df_rank_diff.groupby(['type']).agg({"rank_diff": ['mean', 'std'], 'entity': 'count', 'upgrade_score': 'mean'}).reset_index()
df_rank_diff_type_grp.columns = ['type', 'rank_diff_mean', 'rank_diff_std', 'entity_count', 'upgrade_score_mean']

df_rank_diff_type_grp.sort_values(by='upgrade_score_mean', ascending=False, inplace=True)
df_rank_diff_type_grp.head(50)

Unnamed: 0,type,rank_diff_mean,rank_diff_std,entity_count,upgrade_score_mean
1582,wikicat_Football_clubs_in_Rio_de_Janeiro_(city),4022.0,,1,804.4
1551,wikicat_Football_clubs_in_Denmark,306.0,,1,153.0
3378,wordnet_administrative_district_108491826,184.320261,1838.630915,153,128.531883
1008,wikicat_Club_Bolívar_players,5213.0,,1,94.781818
2123,wikicat_Monarcas_Morelia_footballers,401.0,,1,80.2
525,wikicat_Association_football_clubs_established...,3494.0,,1,69.88
1561,wikicat_Football_clubs_in_Hungary,11358.0,,1,66.034884
531,wikicat_Association_football_clubs_established...,14829.0,,1,65.906667
483,wikicat_Association_football_clubs_established...,8312.0,,1,62.029851
1487,wikicat_Fan-owned_English_League_football_clubs,44.0,,1,44.0


In [32]:
df_rank_diff[(df_rank_diff['rank_sampling'] == 1) & (df_rank_diff['rank_passive'] != 1)].sort_values(by="rank_diff", ascending=True).head(50)

Unnamed: 0,entity,type,rank_sampling,rank_passive,rank_diff,upgrade_score
5443,Umeå,wordnet_administrative_district_108491826,1,1032,-1031,-1031.0
2187,"Canonsburg,_Pennsylvania",wikicat_Cities_in_Pennsylvania,1,505,-504,-504.0
5318,Don_Hutchison,wikicat_People_from_Gateshead,1,299,-298,-298.0
1077,Order_of_the_Nile,wikicat_Orders_of_knighthood_awarded_to_heads_...,1,118,-117,-117.0
2735,Robert_Fogel,wikicat_American_historians,1,115,-114,-114.0
4808,Kangerlussuaq_Airport,wordnet_airport_102692232,1,83,-82,-82.0
1671,Épinal,wikicat_Prefectures_in_France,1,65,-64,-64.0
4083,Jerry_Siegel,wordnet_person_100007846,1,63,-62,-62.0
3383,"Inglewood,_California",wordnet_administrative_district_108491826,1,61,-60,-60.0
4572,Amica_Wronki,wordnet_club_108227214,1,49,-48,-48.0


In [36]:
df_tmp = df_rank_diff.merge(df_count_rel_norm, on='entity', how='left')
df_tmp

Unnamed: 0,entity,type,rank_sampling,rank_passive,rank_diff,upgrade_score,actedIn,created,dealsWith,diedIn,...,isLocatedIn,isMarriedTo,isPoliticianOf,livesIn,owns,participatedIn,playsFor,wasBornIn,worksAt,wroteMusicFor
0,"Division_No._11,_Alberta",wordnet_administrative_district_108491826,16329,1,16328,16328.000000,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Gmina_Tarnawatka,wordnet_administrative_district_108491826,4982,1,4981,4981.000000,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Fluminense_F.C.,wikicat_Football_clubs_in_Rio_de_Janeiro_(city),4027,5,4022,804.400000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Orange_County_(Florida),wordnet_county_108546183,696,1,695,695.000000,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Unión_Española,wordnet_club_108227214,384,1,383,383.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5859,Nobel_Prize_in_Chemistry,wikicat_Swedish_awards,3,1367,-1364,-454.666667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5860,S.V._Zulte_Waregem,wikicat_Football_clubs_in_Belgium,2,923,-921,-460.500000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
5861,"Canonsburg,_Pennsylvania",wikicat_Cities_in_Pennsylvania,1,505,-504,-504.000000,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5862,Umeå,wordnet_administrative_district_108491826,1,1032,-1031,-1031.000000,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
l = []
for rel in df_tmp.columns[6:].tolist():
    df_tmp_rel = df_tmp[df_tmp[rel] > 0.5].copy()
    l.append({
        'relation': rel,
		'rank_diff_std': df_tmp_rel['rank_diff'].std(),
		'entity_count': df_tmp_rel['entity'].count(),
		'rank_diff_mean': df_tmp_rel['rank_diff'].mean(),
        'rank_sampling_mean': df_tmp_rel['rank_sampling'].mean(),
        'rank_passive_mean': df_tmp_rel['rank_passive'].mean(),
        'relative_improvement': (df_tmp_rel['rank_sampling'].mean() - df_tmp_rel['rank_passive'].mean()) / df_tmp_rel['rank_sampling'].mean(),
        'count_better': df_tmp_rel[df_tmp_rel['rank_diff'] > 0].count().values[0],
		'count_worse': df_tmp_rel[df_tmp_rel['rank_diff'] < 0].count().values[0],
        'count_in_top_10': df_tmp_rel[(df_tmp_rel['rank_passive'] <= 10) & (df_tmp_rel['rank_sampling'] > 10)].count().values[0],
		'count_out_top_10': df_tmp_rel[(df_tmp_rel['rank_passive'] > 10) & (df_tmp_rel['rank_sampling'] <= 10)].count().values[0],
        'min_rank_diff': df_tmp_rel['rank_diff'].min(),
		'max_rank_diff': df_tmp_rel['rank_diff'].max(),
        'mean_upgrade_score': df_tmp_rel['upgrade_score'].mean(),
	})
    
df_rel = pd.DataFrame(l)
df_rel.sort_values(by='rank_diff_mean', ascending=False, inplace=True)
df_rel

Unnamed: 0,relation,rank_diff_std,entity_count,rank_diff_mean,rank_sampling_mean,rank_passive_mean,relative_improvement,count_better,count_worse,count_in_top_10,count_out_top_10,min_rank_diff,max_rank_diff,mean_upgrade_score
4,directed,966.61497,2,671.5,7840.5,7169.0,0.085645,1,1,0,1,-12.0,1355.0,-1.452695
13,hasNeighbor,676.63417,5,302.6,2330.4,2027.8,0.129849,1,0,0,0,0.0,1513.0,0.029857
17,influences,1269.376142,195,284.994872,2340.805128,2055.810256,0.121751,90,80,7,10,-3426.0,7057.0,0.200118
9,hasCapital,2160.468661,52,283.096154,2052.423077,1769.326923,0.137933,18,19,6,2,-7682.0,7616.0,-1.227499
6,graduatedFrom,2483.806841,56,204.392857,3289.303571,3084.910714,0.062139,12,24,0,4,-6620.0,11308.0,-1.154808
32,wroteMusicFor,31.819805,2,113.5,196.5,83.0,0.577608,2,0,0,0,91.0,136.0,4.575073
1,created,544.417234,8,108.875,2186.625,2077.75,0.049791,3,4,1,0,-302.0,1419.0,-0.722925
11,hasGender,1171.269334,55,68.872727,3400.327273,3331.454545,0.020255,23,21,3,2,-5634.0,4582.0,0.083629
29,playsFor,1540.90214,240,54.508333,899.241667,844.733333,0.060616,97,77,22,17,-9527.0,8679.0,0.333888
18,isAffiliatedTo,1608.234368,298,48.550336,1375.348993,1326.798658,0.0353,109,99,23,22,-5598.0,14829.0,-1.900826


In [60]:
good_inverted = df_rel[df_rel['mean_upgrade_score'] >= 0]['relation']
bad_inverted = df_rel[df_rel['mean_upgrade_score'] < 0]['relation'].tolist()
print(good_inverted)
good_inverted.to_csv('relation_list.csv', sep='\t', index=False, header=False)

13           hasNeighbor
17            influences
32         wroteMusicFor
11             hasGender
29              playsFor
28        participatedIn
24           isMarriedTo
26               livesIn
19           isCitizenOf
23           isLocatedIn
27                  owns
7             happenedIn
3                 diedIn
8     hasAcademicAdvisor
0                actedIn
12        hasMusicalRole
Name: relation, dtype: object


In [57]:
df_tmp[(df_tmp[good_inverted].sum(axis=1) > 0.5) ].describe()

Unnamed: 0,rank_sampling,rank_passive,rank_diff,upgrade_score,actedIn,created,dealsWith,diedIn,directed,exports,...,isLocatedIn,isMarriedTo,isPoliticianOf,livesIn,owns,participatedIn,playsFor,wasBornIn,worksAt,wroteMusicFor
count,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,...,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0,2823.0
mean,1804.550832,1784.559334,19.991498,7.21993,0.014187,0.000708,0.002953,0.04344,0.0,0.001046,...,0.241619,0.047148,0.001474,0.007329,0.002443,0.017081,0.175029,0.024087,0.000863,0.00124
std,4452.833726,4429.535922,1449.401463,322.596051,0.098424,0.012971,0.022618,0.144891,0.0,0.009599,...,0.417259,0.197996,0.01505,0.053287,0.040661,0.121787,0.27847,0.073847,0.0105,0.028321
min,1.0,1.0,-11669.0,-1031.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,2.0,-9.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,12.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,180.0,183.5,8.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.333333,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0
max,17982.0,17806.0,16328.0,16328.0,1.0,0.333333,0.4,1.0,0.0,0.142857,...,1.0,1.0,0.25,1.0,1.0,1.0,1.0,0.428571,0.2,1.0


In [58]:
print(df_tmp[(df_tmp[good_inverted].sum(axis=1) > 0.5)  & (df_tmp['rank_passive'] <= 10) & (df_tmp['rank_sampling'] > 10)].count().values[0])
print(df_tmp[(df_tmp[good_inverted].sum(axis=1) > 0.5)  & (df_tmp['rank_passive'] <= 3) & (df_tmp['rank_sampling'] > 3)].count().values[0])
print(df_tmp[(df_tmp[good_inverted].sum(axis=1) > 0.5)  & (df_tmp['rank_passive'] <= 1) & (df_tmp['rank_sampling'] > 1)].count().values[0])

print(df_tmp[(df_tmp[good_inverted].sum(axis=1) > 0.5)  & (df_tmp['rank_passive'] >10 ) & (df_tmp['rank_sampling'] <= 10)].count().values[0])
print(df_tmp[(df_tmp[good_inverted].sum(axis=1) > 0.5)  & (df_tmp['rank_passive'] >3 ) & (df_tmp['rank_sampling'] <= 3)].count().values[0])
print(df_tmp[(df_tmp[good_inverted].sum(axis=1) > 0.5)  & (df_tmp['rank_passive'] >1 ) & (df_tmp['rank_sampling'] <= 1)].count().values[0])

137
161
132
173
151
127
