In [1]:
import numpy as np
from scipy .stats import kendalltau
from scipy .stats import rankdata
from itertools import permutations
import pandas as pd

# Aggregating the results of different metrics

## Kameny consensus

Each metrics will provide a ranking of different algorithms, that could be represented as a permutation of [1:N]. Insofar these rankings might be different according to the metrics, it is necessary to find a way to generate a new permutation that would be close to each of the permutations generated by the benchmark metrics. The first approach, called Kameny consensus, is based on a distance (the Kendall distance), that measures the dissimilarity between two permutations. 
Scipy as a module that computes a Kendall correlation (not to be confounded with a Kendall Tau distance).
Our first goal is to code a function to evaluate the Kendall distance between two permutations

In [2]:
kendalltau([1,2,3], [1,2,3])
#Not what we are looking for

KendalltauResult(correlation=1.0, pvalue=0.3333333333333333)

In [3]:
def is_negative(n) :
    if n < 0:
        return 1
    return 0

def Kendall_distance(l1, l2):
    if len(l1) != len(l2):
        raise ValueError("Les permutations n'ont pas la même longueur")
    result = 0
    for i in range (len(l1)):
        for j in range (len(l1)):
            result += is_negative((l1[i]-l1[j])*(l2[i]-l2[j]))
    normalization = len(l1)*(len(l1)-1)
    #normalization is made make the distance be in [0,1]
    return result/normalization

In [4]:
print(Kendall_distance([0, 1, 2], [0, 1, 2]))
print(Kendall_distance([0, 1, 2], [2, 1, 0]))
print(Kendall_distance([0, 1, 2], [1, 0, 2]))

0.0
1.0
0.3333333333333333


In [5]:

def Kameny_consensus_step1(M, perm):
    #M is an array in which M[i,j] is the ranking of algorithm i at test j
    result = 0
    for i in range (M.shape[0]):
        result += Kendall_distance (perm, M[i])
    return result

def Kameny_consensus(M) : 
    L = []
    dist = Kameny_consensus_step1(M, M[0])
    perm = permutations(M[0])
    for permutation in list(perm) : 
        dist_travail = Kameny_consensus_step1(M, permutation)
        if dist_travail < dist:
            L=[]
            L += [np.array(permutation)]
            dist = dist_travail
        elif dist == dist_travail:
            L += [np.array(permutation)]
    return L, dist

In [6]:
M = np.array([[3, 2, 1, 0], [1, 2, 3, 0], [1, 3, 2, 0], [0, 1, 3, 2]])
perm1 = [0, 1, 2, 3]
print (Kameny_consensus_step1(M, perm1))
perm2 = [3, 2, 1, 0]
print (Kameny_consensus_step1(M, perm2))


2.333333333333333
1.6666666666666665


In [7]:
Kameny_consensus(M)

([array([1, 3, 2, 0]), array([1, 2, 3, 0])], 1.0)

## First improvement of Kameny consensus

All the rankings, given the different metrics have exactly the same weights. This might be a problem if we consider that some of the metrics are more relevant than others, or if some metrics are usually very close. So we can improve the Kameny consensus by giving a higher weight to metrics that are more relevant

In [8]:
def Weighted_Gameny_consensus_step1(M, perm, Weights):
    #M is an array in which M[i,j] is the ranking of algorithm j at test i
    #Weights[i] is the weight associated to test i
    result = 0
    for i in range (M.shape[0]):
        result += Kendall_distance (perm, M[i]) * Weights[i]
    return result

def Weighted_Kameny_consensus(M, Weights) : 
    L = []
    dist = Weighted_Gameny_consensus_step1(M, M[0], Weights)
    perm = permutations(M[0])
    for permutation in list(perm) : 
        dist_travail = Weighted_Gameny_consensus_step1(M, permutation, Weights)
        if dist_travail < dist:
            L=[]
            L += [np.array(permutation)]
            dist = dist_travail
        elif dist == dist_travail:
            L += [np.array(permutation)]
    return L, dist

In [9]:
Weights=[0.7, 0.1, 0.1, 0.1]
Weighted_Kameny_consensus(M, Weights)

([array([3, 2, 1, 0])], 0.16666666666666669)

In [10]:
def using_indexed_assignment(x):
    result = np.empty(len(x), dtype=int)
    x=-x
    temp = x.argsort()
    result[temp] = np.arange(len(x))
    return result

def get_ranking(df):
    ranking = df
    for col in df.columns:
        ranking[col] = using_indexed_assignment(df[col])
    return ranking

In [11]:
using_indexed_assignment(np.array([1, 5, 2, 3]))

array([3, 0, 2, 1])

In [12]:
df=pd.DataFrame([[1.2, 1.7, 2],[2, 1.7, 1.2]], index=['Algo1', 'Algo2'], columns=['Test1', 'Test2', 'Test3'])

In [13]:
ranking = get_ranking(df).transpose()

In [14]:
ranking

Unnamed: 0,Algo1,Algo2
Test1,1,0
Test2,0,1
Test3,0,1


In [15]:
df_Kendall_cs_en = pd.read_csv('corr_kendall_2016_cs-en.csv').set_index('Unnamed: 0').abs()
df_Pearson_cs_en = pd.read_csv('corr_pearson_2016_cs-en.csv').set_index('Unnamed: 0').abs()
df_Spearman_cs_en = pd.read_csv('corr_spearman_2016_cs-en.csv').set_index('Unnamed: 0').abs()
df_Kendall_de_en = pd.read_csv('corr_kendall_2016_de-en.csv').set_index('Unnamed: 0').abs()
df_Pearson_de_en = pd.read_csv('corr_pearson_2016_de-en.csv').set_index('Unnamed: 0').abs()
df_Spearman_de_en = pd.read_csv('corr_spearman_2016_de-en.csv').set_index('Unnamed: 0').abs()
df_Kendall_fi_en = pd.read_csv('corr_kendall_2016_fi-en.csv').set_index('Unnamed: 0').abs()
df_Pearson_fi_en = pd.read_csv('corr_pearson_2016_fi-en.csv').set_index('Unnamed: 0').abs()
df_Spearman_fi_en = pd.read_csv('corr_spearman_2016_fi-en.csv').set_index('Unnamed: 0').abs()
df_Kendall_ro_en = pd.read_csv('corr_kendall_2016_ro-en.csv').set_index('Unnamed: 0').abs()
df_Pearson_ro_en = pd.read_csv('corr_pearson_2016_ro-en.csv').set_index('Unnamed: 0').abs()
df_Spearman_ro_en = pd.read_csv('corr_spearman_2016_ro-en.csv').set_index('Unnamed: 0').abs()
df_Kendall_ru_en = pd.read_csv('corr_kendall_2016_ru-en.csv').set_index('Unnamed: 0').abs()
df_Pearson_ru_en = pd.read_csv('corr_pearson_2016_ru-en.csv').set_index('Unnamed: 0').abs()
df_Spearman_ru_en = pd.read_csv('corr_spearman_2016_ru-en.csv').set_index('Unnamed: 0').abs()

In [16]:
ranking_Kendall_cs = get_ranking(df_Kendall_cs_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Kendall_cs'})).transpose()
ranking_Pearson_cs = get_ranking(df_Pearson_cs_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Pearson_cs'})).transpose()
ranking_Spearman_cs = get_ranking(df_Spearman_cs_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Spearman_cs'})).transpose()
ranking_Kendall_de = get_ranking(df_Kendall_de_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Kendall_de'})).transpose()
ranking_Pearson_de = get_ranking(df_Pearson_de_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Pearson_de'})).transpose()
ranking_Spearman_de = get_ranking(df_Spearman_de_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Spearman_de'})).transpose()
ranking_Kendall_fi = get_ranking(df_Kendall_fi_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Kendall_fi'})).transpose()
ranking_Pearson_fi = get_ranking(df_Pearson_fi_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Pearson_fi'})).transpose()
ranking_Spearman_fi = get_ranking(df_Spearman_fi_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Spearman_fi'})).transpose()
ranking_Kendall_ro = get_ranking(df_Kendall_ro_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Kendall_ro'})).transpose()
ranking_Pearson_ro = get_ranking(df_Pearson_ro_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Pearson_ro'})).transpose()
ranking_Spearman_ro = get_ranking(df_Spearman_ro_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Spearman_ro'})).transpose()
ranking_Kendall_ru = get_ranking(df_Kendall_ru_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Kendall_ru'})).transpose()
ranking_Pearson_ru = get_ranking(df_Pearson_ru_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Pearson_ru'})).transpose()
ranking_Spearman_ru = get_ranking(df_Spearman_ru_en.loc[:,['human_scores']].rename(columns= {'human_scores' : 'Spearman_ru'})).transpose()

In [17]:
ranking_Kendall_ro
ranking_Pearson_ro
ranking_Spearman_ro

Unnamed: 0,human_scores,bary,bertscore,bleu,chrf,depth,meteor,rouge1,rouge2,rougeL,sacrebleu,ter
Spearman_ro,0,2,1,10,3,11,4,7,6,5,8,9


In [18]:
#pd.concat([ranking_Kendall_cs, ranking_Pearson_cs, ranking_Spearman_cs,
#           ranking_Kendall_de, ranking_Pearson_de, ranking_Spearman_de,
#           ranking_Kendall_fi, ranking_Pearson_fi, ranking_Spearman_fi,
#           ranking_Kendall_ro, ranking_Pearson_ro, ranking_Spearman_ro,
#           ranking_Kendall_ru, ranking_Pearson_ru, ranking_Spearman_ru], axis = 0)

Kendall_scores = pd.concat([ranking_Kendall_cs,
           ranking_Kendall_de,
           ranking_Kendall_fi,
           ranking_Kendall_ro,
           ranking_Kendall_ru], axis = 0)

Pearson_scores = pd.concat([ranking_Pearson_cs,
           ranking_Pearson_de,
           ranking_Pearson_fi,
           ranking_Pearson_ro,
           ranking_Pearson_ru,], axis = 0)


Spearman_scores = pd.concat([ranking_Spearman_cs,
           ranking_Spearman_de,
           ranking_Spearman_fi,
           ranking_Spearman_ro,
           ranking_Spearman_ru,], axis = 0)

In [19]:
Kameny_consensus(Spearman_scores.drop(['depth', 'rouge2', 'human_scores'], axis=1).to_numpy())

([array([ 2,  1, 10,  5,  4,  6,  3,  9,  8])], 0.2222222222222222)

In [20]:
Kameny_consensus(Pearson_scores.drop(['depth', 'rouge2', 'human_scores'], axis=1).to_numpy())

([array([ 2,  1, 10,  4,  5,  6,  3,  8,  9])], 0.2222222222222222)

In [21]:
Kameny_consensus(Kendall_scores.drop(['depth', 'rouge2', 'human_scores'], axis=1).to_numpy())

([array([ 2,  1, 10,  5,  4,  6,  3,  9,  8])], 0.3055555555555556)

In [22]:
Kendall_scores


Unnamed: 0,human_scores,bary,bertscore,bleu,chrf,depth,meteor,rouge1,rouge2,rougeL,sacrebleu,ter
Kendall_cs,0,2,1,10,4,11,6,5,7,3,9,8
Kendall_de,0,2,1,10,6,11,4,7,8,3,9,5
Kendall_fi,0,2,1,11,5,10,4,6,7,3,8,9
Kendall_ro,0,2,1,10,3,11,4,7,6,5,8,9
Kendall_ru,0,2,1,10,4,11,3,6,7,5,9,8


In [23]:
Kendall_scores.drop(['depth', 'rouge2', 'human_scores'], axis=1)

Unnamed: 0,bary,bertscore,bleu,chrf,meteor,rouge1,rougeL,sacrebleu,ter
Kendall_cs,2,1,10,4,6,5,3,9,8
Kendall_de,2,1,10,6,4,7,3,9,5
Kendall_fi,2,1,11,5,4,6,3,8,9
Kendall_ro,2,1,10,3,4,7,5,8,9
Kendall_ru,2,1,10,4,3,6,5,9,8
