In [1]:
from scipy.stats import pearsonr, spearmanr
import pandas as pd

In [2]:
models = ['bloom-560m', 'bloom-1b1', 'bloom-1b7']
layers = ['inter-layer-13' , 'inter-layer-17']

# variable_list = ['ovlp-rate', 'f1-score', 'cos-similarity-nl', 'cos-similarity-code']
langs_cos = ['ar', 'es', 'eu', 'fr', 'hi', 'pt', 'ta', 'ur', 'vi']
langs_f1 = ['ar', 'en', 'es', 'eu', 'fr', 'hi', 'mr', 'pt', 'ta', 'ur', 'vi', 'zh']
langs_acc = ['ar', 'bg', 'de', 'el', 'en', 'es', 'fr', 'hi', 'ru', 'sw', 'th', 'tr', 'ur', 'vi', 'zh']

In [3]:
def read_data_by_idx(variable, model, layer, avg=False):

    if variable == 'ovlp-rate':

        if avg:  # a dict will be returned: {ckpt: avg_ovlp_rate}
            dict = {}
            with open(f'csv_files/{model}_{layer}_avg-ovlp-rate-by-ckpt.txt', 'r') as f:
                lines = f.readlines()
                for line in lines:
                    list = line.split(',')
                    dict[list[0]] = float(list[1])
            return dict
        else:
            # in pairwise setting we cannot use multiple categories because there is a mismatch of category number between languages
            # so we use ovlp-rate for POS only
            df = pd.read_csv(f'csv_files/{model}_{layer}_ovlp-rate_POS.csv', index_col=0)
            return df 
        
    elif variable == 'f1-score':
        df = pd.read_csv(f'csv_files/{model}_f1-score.csv', index_col=0)
        return df
    
    elif variable == 'cos-similarity-nl':
        df = pd.read_csv(f'csv_files/{model}_{layer}_cos-similarity.csv', index_col=0)
        df = df.drop(index=('code'))
        return df
    
    elif variable == 'cos-similarity-code':
        df = pd.read_csv(f'csv_files/{model}_{layer}_cos-similarity.csv', index_col=0)
        dict = df.loc['code'].to_dict()
        return dict


def print_pearson(variable_list_1, variable_list_2):
    pearson_corr, pearson_p_value = pearsonr(variable_list_1, variable_list_2)
    print('Pearsons correlation: %.3f' % pearson_corr)
    print('Pearsons P value: %.3e' % pearson_p_value)
    return pearson_corr, pearson_p_value

def print_spearman(variable_list_1, variable_list_2):
    spearman_corr, spearman_p_value = spearmanr(variable_list_1, variable_list_2)
    print('Spearmans correlation: %.3f' % spearman_corr)
    print('Spearmans P value: %.3e' % spearman_p_value)
    return spearman_corr, spearman_p_value


def compute_df_avg(df, lang_list):
    '''
    this function will return two lists of data extracted from df.

    returns:
        element list: each element in df, appended by columns
        avg_list: the avg element value of df through columns
    '''
    element_list = []
    avg_list = []

    ckpts = df.columns
    langs = lang_list

    for ckpt in ckpts:
        temp_list = []
        for lang in langs:
            temp_list.append(float(df[ckpt][lang]))
        
        element_list += temp_list
        avg_list.append(sum(temp_list)/len(temp_list))
    
    return element_list, avg_list


In [4]:
all_avg_1 = []
all_pairwise_1 = []
all_avg_2 = []
all_pairwise_2 = []
# variable_list = ['ovlp-rate', 'f1-score', 'cos-similarity-nl', 'cos-similarity-code']
# 'last-layer'

for model in models:
    for layer in layers:
        # df_1 = read_data_by_idx('ovlp-rate', model, layer)
        # print(df_1)
        # all_avg_1 += read_data_by_idx('ovlp-rate', model, layer, avg=True).values()
        # print
        # all_pairwise_1 += compute_df_avg(df_1, langs_cos)[0]
        df_1 = read_data_by_idx('cos-similarity-nl', model, layer)
        print(df_1)
        all_avg_1 += compute_df_avg(df_1, langs_cos)[1]
        all_pairwise_1 += compute_df_avg(df_1, langs_cos)[0]

        df_2 = read_data_by_idx('cos-similarity-nl', model, 'last-layer')
        print(df_2)
        all_avg_2 += compute_df_avg(df_2, langs_cos)[1]
        all_pairwise_2 += compute_df_avg(df_2, langs_cos)[0]

        # list_2 = read_data_by_idx('cos-similarity-code', model, 'last-layer').values()
        # print(list_2)
        # all_avg_2 += list_2
        # # no pairwise for code   

        # df_2 = read_data_by_idx('f1-score', model, layer)
        # print(df_2)
        # all_avg_2 += compute_df_avg(df_2, langs_f1)[1]
        # all_pairwise_2 += compute_df_avg(df_2, langs_f1)[0]

print("Average:")
print_pearson(all_avg_1, all_avg_2)
print_spearman(all_avg_1, all_avg_2)
print("Pairwise:")
print_pearson(all_pairwise_1, all_pairwise_2)
print_spearman(all_pairwise_1, all_pairwise_2)

        best      1000     10000    100000    200000    300000    400000  \
ar  0.748548  0.576550  0.759758  0.741250  0.792347  0.850337  0.814171   
es  0.900085  0.826456  0.863101  0.876039  0.877547  0.932560  0.915948   
eu  0.847554  0.789216  0.828222  0.842276  0.822413  0.911603  0.888746   
fr  0.892216  0.828664  0.820374  0.822413  0.817134  0.929041  0.912058   
hi  0.839903  0.774214  0.914283  0.941447  0.926125  0.907810  0.878478   
pt  0.887483  0.851780  0.859532  0.890726  0.885060  0.924713  0.910442   
ta  0.763544  0.732989  0.901410  0.920799  0.821304  0.878144  0.849418   
ur  0.754028  0.599727  0.852203  0.820301  0.865578  0.850300  0.819323   
vi  0.884882  0.816436  0.887017  0.919977  0.917397  0.928600  0.910204   

      500000    600000  
ar  0.759758  0.324517  
es  0.863101  0.765517  
eu  0.828222  0.785242  
fr  0.820374  0.770745  
hi  0.914283  0.785880  
pt  0.859532  0.801973  
ta  0.901410  0.800879  
ur  0.852203  0.564095  
vi  0.887017  

(0.4138903854999044, 4.731630544228817e-20)