In [1]:
from scipy.stats import pearsonr, spearmanr
import pandas as pd

In [2]:
models = ['bloom-560m', 'bloom-1b1', 'bloom-1b7']
layers = ['inter-layer-13' , 'inter-layer-17']

# variable_list = ['ovlp-rate', 'f1-score', 'cos-similarity-nl', 'cos-similarity-code', 'xnli_acc']
langs_cos = ['ar', 'es', 'eu', 'fr', 'hi', 'pt', 'ta', 'ur', 'vi']
langs_f1 = ['ar', 'en', 'es', 'eu', 'fr', 'hi', 'mr', 'pt', 'ta', 'ur', 'vi', 'zh']
langs_ovlp = ['ar', 'eu', 'ca', 'zh', 'en', 'fr', 'hi', 'mr', 'pt', 'es', 'ta', 'ur', 'vi']
langs_acc = ['ar', 'bg', 'de', 'el', 'en', 'es', 'fr', 'hi', 'ru', 'sw', 'th', 'tr', 'ur', 'vi', 'zh']

In [7]:
def read_data_by_idx(variable, model, layer, avg=False, remove_best=False):

    if variable == 'ovlp-rate':

        if avg:  # a dict will be returned: {ckpt: avg_ovlp_rate}
            dict = {}
            with open(f'csv_files/{model}_{layer}_avg-ovlp-rate-by-ckpt.txt', 'r') as f:
                lines = f.readlines()
                for line in lines:
                    list = line.split(',')
                    dict[list[0]] = float(list[1])
                if remove_best:
                    del dict['best']
            return dict
        else:
            # in pairwise setting we cannot use multiple categories because there is a mismatch of category number between languages
            # so we use ovlp-rate for POS only
            df = pd.read_csv(f'csv_files/{model}_{layer}_ovlp-rate_POS.csv', index_col=0)
            if remove_best:
                df = df.drop(columns=['best'])
            return df 
        
    elif variable == 'f1-score':
        df = pd.read_csv(f'csv_files/{model}_f1-score.csv', index_col=0)
        if remove_best:
            df = df.drop(columns=['best'])
        return df
    
    elif variable == 'cos-similarity-nl':
        df = pd.read_csv(f'csv_files/{model}_{layer}_cos-similarity.csv', index_col=0)
        df = df.drop(index=('code'))
        if remove_best:
            df = df.drop(columns=['best'])
        return df
    
    elif variable == 'cos-similarity-code':
        df = pd.read_csv(f'csv_files/{model}_{layer}_cos-similarity.csv', index_col=0)
        dict = df.loc['code'].to_dict()
        if remove_best:
            del dict['best']
        return dict
    elif variable == 'xnli_acc':
        df = pd.read_csv(f'csv_files/{model}_xnli_acc.csv', index_col=0)
        return df

def intersection(lang_list_1, lang_list_2):
    return list(set(lang_list_1) & set(lang_list_2))

def print_pearson(variable_list_1, variable_list_2):
    pearson_corr, pearson_p_value = pearsonr(variable_list_1, variable_list_2)
    print('Pearsons correlation: %.3f' % pearson_corr)
    print('Pearsons P value: %.3e' % pearson_p_value)
    return pearson_corr, pearson_p_value

def print_spearman(variable_list_1, variable_list_2):
    spearman_corr, spearman_p_value = spearmanr(variable_list_1, variable_list_2)
    print('Spearmans correlation: %.3f' % spearman_corr)
    print('Spearmans P value: %.3e' % spearman_p_value)
    return spearman_corr, spearman_p_value


def compute_df_avg(df, lang_list):
    '''
    this function will return two lists of data extracted from df.

    returns:
        element list: each element in df, appended by columns
        avg_list: the avg element value of df through columns
    '''
    element_list = []
    avg_list = []

    ckpts = df.columns
    langs = lang_list

    for ckpt in ckpts:
        temp_list = []
        for lang in langs:
            temp_list.append(float(df[ckpt][lang]))
        
        element_list += temp_list
        avg_list.append(sum(temp_list)/len(temp_list))
    
    return element_list, avg_list


In [8]:
all_avg_1 = []
all_pairwise_1 = []
all_avg_2 = []
all_pairwise_2 = []
# variable_list = ['ovlp-rate', 'f1-score', 'cos-similarity-nl', 'cos-similarity-code']
# 'last-layer'

for model in models:
    for layer in layers:
        print("find the intersection of two language list: ")
        langs_list = intersection(langs_ovlp, langs_acc)
        print(langs_list)

        df_1 = read_data_by_idx('ovlp-rate', model, layer, remove_best=True)
        print(df_1)
        all_avg_1 += read_data_by_idx('ovlp-rate', model, layer, avg=True, remove_best=True).values()
        all_pairwise_1 += compute_df_avg(df_1, langs_list)[0]
        print(all_avg_1)
        print(all_pairwise_1)

        df_2 = read_data_by_idx('xnli_acc', model, layer)
        print(df_2)
        all_avg_2 += compute_df_avg(df_2, langs_list)[1]
        all_pairwise_2 += compute_df_avg(df_2, langs_list)[0]
        print(all_avg_2)
        print(all_pairwise_2)
        
        # df_1 = read_data_by_idx('cos-similarity-nl', model, layer)
        # print(df_1)
        # all_avg_1 += compute_df_avg(df_1, langs_cos)[1]
        # all_pairwise_1 += compute_df_avg(df_1, langs_cos)[0]

        # df_2 = read_data_by_idx('cos-similarity-nl', model, 'last-layer')
        # print(df_2)
        # all_avg_2 += compute_df_avg(df_2, langs_cos)[1]
        # all_pairwise_2 += compute_df_avg(df_2, langs_cos)[0]

        # list_2 = read_data_by_idx('cos-similarity-code', model, 'last-layer').values()
        # print(list_2)
        # all_avg_2 += list_2
        # # no pairwise for code   

        # df_2 = read_data_by_idx('f1-score', model, layer)
        # print(df_2)
        # all_avg_2 += compute_df_avg(df_2, langs_f1)[1]
        # all_pairwise_2 += compute_df_avg(df_2, langs_f1)[0]

print("Average:")
print_pearson(all_avg_1, all_avg_2)
print_spearman(all_avg_1, all_avg_2)
print("Pairwise:")
print_pearson(all_pairwise_1, all_pairwise_2)
print_spearman(all_pairwise_1, all_pairwise_2)

find the intersection of two language list: 
['ur', 'zh', 'fr', 'en', 'es', 'ar', 'vi', 'hi']
    1000  100000  200000  300000  400000  600000
ar  0.06    0.06    0.14    0.12    0.20    0.04
ca  0.00    0.16    0.18    0.18    0.22    0.04
en  1.00    1.00    1.00    1.00    1.00    1.00
eu  0.04    0.08    0.06    0.14    0.04    0.02
fr  0.02    0.18    0.28    0.22    0.26    0.06
hi  0.08    0.04    0.10    0.10    0.12    0.02
mr  0.00    0.02    0.04    0.02    0.06    0.08
pt  0.06    0.14    0.20    0.20    0.26    0.06
es  0.12    0.16    0.16    0.18    0.26    0.06
ta  0.06    0.08    0.10    0.16    0.14    0.02
ur  0.10    0.08    0.12    0.10    0.18    0.04
vi  0.08    0.04    0.12    0.14    0.20    0.04
zh  0.04    0.10    0.18    0.22    0.16    0.00
[0.0681104451104451, 0.11838006438006438, 0.1383046953046953, 0.15514496614496612, 0.16551159951159952, 0.05090653790653791]
[0.1, 0.04, 0.02, 1.0, 0.12, 0.06, 0.08, 0.08, 0.08, 0.1, 0.18, 1.0, 0.16, 0.06, 0.04, 0.04, 0.

(0.5663634126424916, 7.866533491738558e-26)