In [None]:
import json

# Eval Dataset

In [None]:
entity_types=['war','leader','national_park','monuments']
countries=['china','germany','india',
           'japan','mexico','russia','spain'
          ,'uk','us'
           ]
languages=['Hindi','English','Spanish','Mandarin','Japanese','Russian','German']
model_names=[
     'bloomz-7b1',
     'Llama-2-7-b-chat-hf',
     'Llama-2-13-b-chat-hf',
     'Mistral-7B-Instruct',
     'Meta-Llama-3-8b-Instruct',
     'Aya',
     'gpt-4',
     'Mixtral-8x7B'
     ]
def uniform_options(pred):
    options=['A ','B ','C ','D ']
    pred=pred.replace('(','')
    pred=pred.replace(')','')
    pred=pred.replace('</s>','')
    pred=pred.replace('ãƒ»','')
    pred=pred.strip()
    if len(pred.split())>0:
        if pred[-1]!='.':
            pred+='.'
    for option in options:
        dot_option=option.strip()+'.'+' '
        pred=pred.replace(option,dot_option)
    return pred

def extract_option(pred,options):
    options_index=['A.','B.','C.','D.']
    for i,option in enumerate(options):
        if option in pred:
            return options_index[i]
    for option in options_index:
        if option in pred:
            return option
    return pred.strip('.')

def get_options(prompt):
    options=prompt.split('\n')[-5:-1]
    options=[' '.join(i.split()[1:]) for i in options]
    return options

In [None]:
for entity_type in entity_types: 
   # Add outputs in files
    for country in countries:
        with open(f'data/processed/{entity_type}_qa_{country}.json') as data_file:
                data_loaded = json.load(data_file)
        for model_name in model_names:
            with open(f'data/output/{entity_type}/{country}_{entity_type}_{model_name}.json') as data_file:
                data_output = json.load(data_file)
            
            for data in data_loaded:
                id=data['id']
                output=data_output[id]
                if 'output' in data:
                    data['output'][model_name]=output
                else:
                    data['output']={model_name:output}
        with open(f'data/processed/{entity_type}_qa_{country}.json', 'w') as f:
            json.dump(data_loaded, f)
    #add generatons in files to score
    data_to_save=[]
    for country in countries:
        with open(f'data/processed/{entity_type}_qa_{country}.json') as data_file:
                data_loaded = json.load(data_file)
        for model_name in model_names:
            for data in data_loaded:
                id=data['id']
                data_answers={}
                for lang in languages:
                    outputs=data['output'][model_name][lang]
                    prompts=data['prompts'][lang][0]
                    options=get_options(prompts)
                    answers=[]
                    for output in outputs:
                        output=uniform_options(output)
                        output=extract_option(output,options)
                        answers.append(output)
                    data_answers[lang]=answers
                if 'score' in data:
                    data['score'][model_name]=data_answers
                else:
                    data['score']={model_name:data_answers}

        with open(f'data/processed/{entity_type}_qa_{country}.json', 'w') as f:
            json.dump(data_loaded, f)

# Figures

In [None]:
import json

In [None]:
countries=['china','germany','india',
           'japan','mexico','russia',
           'spain','uk','us'
           ]
languages=['English','German','Spanish','Hindi','Russian',"Japanese",'Mandarin']
model_names=[
     'bloomz-7b1',
     'Llama-2-7-b-chat-hf',
     'Mistral-7B-Instruct',
     'Meta-Llama-3-8b-Instruct',
     'Llama-2-13-b-chat-hf',
     'Aya',
     'gpt-4',
     'Mixtral-8x7B'
     ]

In [None]:
entity_types=[
'monuments',
'leader',
'war',
'national_park'
]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
countries_dict=['China','India',
           'Japan','Mexico','Russia','Spain'
          ,'UK','US','Germany'
           ]

In [None]:
data_to_save={
     'model':[],
     'languages':[],
     'score':[],
     'country':[],
     'id':[],
     'entity_type':[]
}
for entity in entity_types:
    for country in countries:
        with open(f'data/processed/{entity}_qa_{country}.json') as data_file:
                data_loaded = json.load(data_file)
        for model_name in model_names:
            for data in data_loaded:
                for lang in languages:
                    outputs=data['score'][model_name][lang]
                    true_labels=data['prompt_ans'][lang]
                    score=[]
                    for output in outputs:
                        if output in true_labels:
                            score.append(1)
                        else:
                            score.append(0)
                    

                    score=sum(score)/len(score)
                    data_to_save['model'].append(model_name)
                    data_to_save['languages'].append(lang)
                    data_to_save['score'].append(score)
                    data_to_save['id'].append(data['id'])
                    data_to_save['country'].append(country)   
                    data_to_save['entity_type'].append(entity)  


In [None]:
import pandas as pd
df=pd.DataFrame(data_to_save)

In [None]:
for entity_type in entity_types: 
    print(entity_type)
    radar_plot_values={}
    countries_list=['us','china','india','uk','japan','germany', 'russia','mexico', 'spain']
    dfe=df[df['entity_type']==entity_type]
    for model_name in model_names:
        df1=dfe[dfe['model']==model_name]
        sd_languages_countries=[]
        acc_languages_countries=[]
        heat_map_countries=[]
        heat_map_languages=[]
        for country in countries_list:
            heat_map_countries.append(country)
            df11=df1[df1['country']==country]
            accuracies_language=[]
            for lang in languages:
                df111=df11[df11['languages']==lang]
                accuracies_language.append(sum(df111['score'])/len(df111)*100)
            mean = sum(accuracies_language) / len(accuracies_language) 
            variance = sum([((x - mean) ** 2) for x in accuracies_language]) / len(accuracies_language) 
            res = variance ** 0.5
            sd_languages_countries.append(res)
            acc_languages_countries.append(mean)
        print(model_name+' &$'+'$&$'.join([str('%.2f'%y)+'+/-'+str('%.2f'%x) for y,x in zip(acc_languages_countries,sd_languages_countries)])
              +'$\\\\')


In [None]:
languages_iso=['EN','DE','ES','HI','RU','JA','ZH']

In [None]:
# TC
aligned={'bloomz-7b1':{},
     'Llama-2-7-b-chat-hf':{},
     'Mistral-7B-Instruct':{},
     'Meta-Llama-3-8b-Instruct':{},
     'Llama-2-13-b-chat-hf':{},
     'Aya':{},
     'gpt-4':{},
     'Mixtral-8x7B':{}}
ans={'bloomz-7b1':[],
     'Llama-2-7-b-chat-hf':[],
     'Mistral-7B-Instruct':[],
     'Meta-Llama-3-8b-Instruct':[],
     'Llama-2-13-b-chat-hf':[],
     'Aya':[],
     'gpt-4':[],
     'Mixtral-8x7B':[]}
U_set=set()

for model in model_names:
    intersect=U_set
    union=set()
    m=0
    for k,v in aligned[model].items():
        m=max(m,len(v))
        intersect=intersect.intersection(set(v))
        union=union.union(set(v))
    score=len(intersect)/len(union)*100
    ans[model].append(score)

non_hindi=['English','German','Spanish','Russian',"Japanese",'Mandarin']

for model in model_names:
    intersect=U_set
    m=0
    for l in non_hindi:
        v=aligned[model][l]
        m=max(m,len(v))
        intersect=intersect.intersection(v)
        union=union.union(set(v))
    score=len(intersect)/len(union)*100
    ans[model].append(score)

europian=['English','German','Spanish','Russian']

for model in model_names:
    intersect=U_set
    m=0
    for l in europian:
        v=aligned[model][l]
        m=max(m,len(v))
        intersect=intersect.intersection(v)
        union=union.union(set(v))
    score=len(intersect)/len(union)*100
    ans[model].append(score)

non_europian=['English','Hindi',"Japanese",'Mandarin']

for model in model_names:
    intersect=U_set
    m=0
    for l in non_europian:
        v=aligned[model][l]
        m=max(m,len(v))
        intersect=intersect.intersection(v)
        union=union.union(set(v))
    score=len(intersect)/len(union)*100
    ans[model].append(score)

In [None]:
for model,scores in ans.items():
    print(model,'&','&'.join([str("%.2f"%x) for x in scores])+'\\\\')

In [None]:
# pairwise alignment
# model_names=['Meta-Llama-3-8b-Instruct']
import numpy as np 
import seaborn as sn 
import matplotlib.pyplot as plt 
plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams.update({'font.size': 15})
for model_name in model_names:
    print(model_name)
    coverage_model=aligned[model_name]
    accuracies_languages_countries=[]
    heat_map_lang1=[]
    heat_map_languages=[]
    for lang1 in languages:
        heat_map_lang1.append(lang1)
        coverage_l1=set(coverage_model[lang1])
        accuracies_language=[]
        for lang2 in languages:
            coverage_l2=coverage_model[lang2]
            Tc=len(coverage_l1.intersection(coverage_l2))/len(coverage_l1.union(coverage_l2))
            accuracies_language.append(int(Tc*100))
        accuracies_languages_countries.append(accuracies_language)
    data=np.array(accuracies_languages_countries)
    mask = np.triu(np.ones_like(data),k=1) 
    hm=sn.heatmap(
        data=data,
        xticklabels=languages_iso,
        cmap="YlGnBu",
        mask=mask,
        annot=True,
         fmt='g',
        yticklabels=languages_iso)
    plt.xticks(rotation=0)
    plt.yticks(rotation=0)
    plt.gca().collections[0].set_clim(0,100)
    plt.savefig(f'plots/cover_{model_name}.png', bbox_inches="tight")
    plt.show()

In [None]:
save={}
for i,row in df.iterrows():
    lang=row['languages']
    mod=row['model']
    score=row['score']
    if mod in save:
        if lang in save[mod]:
            save[mod][lang].append(score)
        else:
            save[mod][lang]=[score]
    else:
        save[mod]={lang:[score]}

In [None]:
for m,v in save.items():
    for lang,scores in v.items():
        v[lang]=sum(scores)/len(scores)

In [None]:
import numpy as np 
import matplotlib.pyplot as plt 

In [None]:
plot_models=model_names
plot_languages=list(save['bloomz-7b1'].keys())
barWidth = 0.10
r=np.arange(len(plot_languages))
plt.rcParams["figure.figsize"] = (15,3)

In [None]:
Model_names=[
     'Bloomz-7b1',
     'LLaMA-2-7-b-chat',
     'Mistral-7B-Instruct',
     'Meta-LLaMA-3-8b-Instruct',
     'LLaMA-2-13-b-chat',
     'Aya',
     'GPT-4',
     'Mixtral-8x7B'
     ]

In [None]:
plot_languages

In [None]:
#Tavle 
countries_list=['us','china','india','uk','japan','germany', 'russia','mexico', 'spain']
for entity_type in entity_types: 
    print(entity_type)
    dfe=df[df['entity_type']==entity_type]
    save={}
    for i,row in dfe.iterrows():
        lang=row['languages']
        mod=row['model']
        score=row['score']
        if mod in save:
            if lang in save[mod]:
                save[mod][lang].append(score)
            else:
                save[mod][lang]=[score]
        else:
            save[mod]={lang:[score]}
    for m,v in save.items():
        for lang,scores in v.items():
            v[lang]=sum(scores)/len(scores)
    for i,model in enumerate(plot_models):
        Model_name=Model_names[i]
        values=list(save[model].values())
        values_f=[(i*100) for i in values]
        values=[str("%.2f"%(i*100)) for i in values]
        avg=sum(values_f)/len(values_f)
        avg_w=(values_f[0]+values_f[1]+values_f[2]+values_f[4])/4
        values_o='&'.join(values)+'&'+str("%.2f"%avg)+'&'+str("%.2f"%avg_w)+'\\\\'
        print(Model_name+'&'+values_o)

In [None]:
avg={}
for model,lang_acc in save.items():
    accs=[]
    for lang,acc in lang_acc.items():
        accs.append(acc)
    avg[model]=sum(accs)/len(accs)

In [None]:
avg

In [None]:
df

In [None]:
plt.rcParams["figure.figsize"] = (9,7)
plt.rcParams.update({'font.size': 13})

In [None]:
df.head()

In [None]:
model_names_dic={
     'bloomz-7b1':'Bloomz-7b1',
     'Llama-2-7-b-chat-hf':'Llama-2-7-b-chat',
     'Mistral-7B-Instruct':'Mistral-7B-Instruct',
     'Meta-Llama-3-8b-Instruct':'Meta-Llama-3-8b-Instruct',
     'Llama-2-13-b-chat-hf':'Llama-2-13-b-chat-hf',
     'Aya':'Aya',
     'gpt-4':'GPT-4',
     'Mixtral-8x7B':'Mixtral-8x7B'
}

In [None]:
df.head()

In [None]:
languages

In [None]:
languages_iso=['EN','DE','ES','HI','RU','JA','ZH']

In [None]:
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.patches import Circle, RegularPolygon
from matplotlib.path import Path
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D


def radar_factory(num_vars, frame='circle'):
    """Create a radar chart with `num_vars` axes.

    This function creates a RadarAxes projection and registers it.

    Parameters
    ----------
    num_vars : int
        Number of variables for radar chart.
    frame : {'circle' | 'polygon'}
        Shape of frame surrounding axes.

    """
    # calculate evenly-spaced axis angles
    theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
    
    class RadarTransform(PolarAxes.PolarTransform):
        def transform_path_non_affine(self, path):
            # Paths with non-unit interpolation steps correspond to gridlines,
            # in which case we force interpolation (to defeat PolarTransform's
            # autoconversion to circular arcs).
            if path._interpolation_steps > 1:
                path = path.interpolated(num_vars)
            return Path(self.transform(path.vertices), path.codes)

    class RadarAxes(PolarAxes):

        name = 'radar'
        
        PolarTransform = RadarTransform

        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # rotate plot such that the first axis is at the top
            self.set_theta_zero_location('N')

        # def fill(self, *args, closed=True, **kwargs):
        #     """Override fill so that line is closed by default"""
        #     return super().fill(closed=closed, *args, **kwargs)

        def plot(self, *args, **kwargs):
            """Override plot so that line is closed by default"""
            lines = super().plot(*args, **kwargs)
            for line in lines:
                self._close_line(line)

        def _close_line(self, line):
            x, y = line.get_data()
            # FIXME: markers at x[0], y[0] get doubled-up
            if x[0] != x[-1]:
                x = np.concatenate((x, [x[0]]))
                y = np.concatenate((y, [y[0]]))
                line.set_data(x, y)

        def set_varlabels(self, labels):
            self.set_thetagrids(np.degrees(theta), labels)

        # def set_varlabels(self, labels):
        #     labels_with_newlines = [l.replace(' ', '\n') for l in labels]
        #     _lines, texts = self.set_thetagrids(np.degrees(theta), labels_with_newlines)
        #     half = (len(texts) - 1) // 2
        #     for t in texts[1:half]:
        #         t.set_horizontalalignment('left')
        #     for t in texts[-half + 1:]:
        #         t.set_horizontalalignment('right')

        def _gen_axes_patch(self):
            # The Axes patch must be centered at (0.5, 0.5) and of radius 0.5
            # in axes coordinates.
            if frame == 'circle':
                return Circle((0.5, 0.5), 0.5)
            elif frame == 'polygon':
                return RegularPolygon((0.5, 0.5), num_vars,
                                      radius=.5, edgecolor="k")
            else:
                raise ValueError("unknown value for 'frame': %s" % frame)

        def draw(self, renderer):
            """ Draw. If frame is polygon, make gridlines polygon-shaped """
            if frame == 'polygon':
                gridlines = self.yaxis.get_gridlines()
                for gl in gridlines:
                    gl.get_path()._interpolation_steps = num_vars
            super().draw(renderer)


        def _gen_axes_spines(self):
            if frame == 'circle':
                return super()._gen_axes_spines()
            elif frame == 'polygon':
                # spine_type must be 'left'/'right'/'top'/'bottom'/'circle'.
                spine = Spine(axes=self,
                              spine_type='circle',
                              path=Path.unit_regular_polygon(num_vars))
                # unit_regular_polygon gives a polygon of radius 1 centered at
                # (0, 0) but we want a polygon of radius 0.5 centered at (0.5,
                # 0.5) in axes coordinates.
                spine.set_transform(Affine2D().scale(.5).translate(.5, .5)
                                    + self.transAxes)


                return {'polar': spine}
            else:
                raise ValueError("unknown value for 'frame': %s" % frame)

    register_projection(RadarAxes)
    return theta

import numpy as np 
import seaborn as sn 
import matplotlib.pyplot as plt
for entity_type in entity_types: 
    print(entity_type)
    radar_plot_values={}
# for _ in range(1):
    dfe=df[df['entity_type']==entity_type]
    # radar_plot_values[entity_type]={}
    for model_name in model_names:
        df1=dfe[dfe['model']==model_name]
        accuracies_languages_countries=[]
        heat_map_countries=[]
        heat_map_languages=[]
        for country in countries:
            heat_map_countries.append(country)
            # print("For country",country)
            df11=df1[df1['country']==country]
            accuracies_language=[]
            for lang in languages:
                df111=df11[df11['languages']==lang]
                # print('Average Accuracy ',lang,sum(df111['score'])/len(df111)*100)
                acc=sum(df111['score'])/len(df111)
                accuracies_language.append(acc)
            accuracies_languages_countries.append(accuracies_language)
        data=np.array(accuracies_languages_countries)

        for i in range(data.shape[-1]):
            if model_name in radar_plot_values:
                radar_plot_values[model_name].append(data[:,i])
            else:
                radar_plot_values[model_name]=[data[:,i]]

    N = len(countries)
    theta = radar_factory(N, frame='polygon')
    spoke_labels=['China', 'Germany ', 'India  ', 'Japan ', 'Mexico', 'Russia', '  Spain', 'UK', 'US']
    
    # fig,axs=plt.subplots(figsize=(15, 13), nrows=2, ncols=4,subplot_kw=dict(projection='radar'))
    # fig.subplots_adjust(wspace=0.7, hspace=0.5, top=0.3, bottom=0)

    fig,axs=plt.subplots(figsize=(10, 15), nrows=4, ncols=2,subplot_kw=dict(projection='radar'))
    fig.subplots_adjust(wspace=0.1, hspace=0.3, top=1, bottom=0)
    colors = ['b', 'r', 'g', 'm', 'y','c','k']
    for ax,(title,case_data) in zip(axs.flat,radar_plot_values.items()):
        title_name=model_names_dic[title]
        ax.set_rgrids([0.2, 0.4, 0.6, 0.8])
        ax.set_title(title_name, weight='bold', size='medium', position=(0.5, 1.1),
                        horizontalalignment='center', verticalalignment='center')
        for d, color in zip(case_data, colors):
            ax.plot(theta, d, color=color)
            # ax.fill(theta, d, facecolor=color, alpha=0.25, label='_nolegend_')
        ax.set_varlabels(spoke_labels)
    labels = languages_iso
    # legend = axs[0, 0].legend(labels, loc=(0.92,-0.16),ncon=6,
    #         labelspacing=0.1, fontsize='small')
    fig.legend(labels=labels,bbox_to_anchor=(0.93,-0.03),
                ncol=7)
    plt.savefig(f'plots/radial_plots/{entity_type}_country.pdf', bbox_inches="tight")
    plt.show()
    

In [None]:
spoke_labels=['China', 'Germany ', 'India  ', 'Japan ', 'Mexico', 'Russia', '  Spain', 'UK', 'US']

In [None]:
import numpy as np 
import seaborn as sn 
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt 
plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams.update({'font.size': 15})
entity_types=['monuments','national_park']

for entity_type in entity_types: 
    print(entity_type)
    radar_plot_values={}
# for _ in range(1):
    dfe=df[df['entity_type']==entity_type]
    for model_name in model_names:
        df1=dfe[dfe['model']==model_name]
        accuracies_languages_countries=[]
        heat_map_countries=[]
        heat_map_languages=[]
        for country in countries:
            heat_map_countries.append(country)
            df11=df1[df1['country']==country]
            accuracies_language=[]
            for lang in languages:
                df111=df11[df11['languages']==lang]
                acc=int(sum(df111['score'])/len(df111)*100)
                accuracies_language.append(acc)
            accuracies_languages_countries.append(accuracies_language)
        data=np.array(accuracies_languages_countries)

        hm=sn.heatmap(
        data=data,
        xticklabels=languages_iso,
        cmap="YlGnBu",
        # mask=mask,
        annot=True,
         fmt='g',
        yticklabels=spoke_labels)
        plt.xticks(rotation=0)
        plt.yticks(rotation=0)
        plt.gca().collections[0].set_clim(0,100)
        plt.show()

In [None]:
import numpy as np 
import seaborn as sn 
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt 
plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams.update({'font.size': 15})
plot_info={}
countries_list=['us','china','india','uk','japan','germany', 'russia','mexico', 'spain']
for _ in range(1): 
    dfe=df.copy()
    for model_name in model_names:
        df1=dfe[dfe['model']==model_name]
        accuracies_languages_countries=[]
        heat_map_countries=[]
        heat_map_languages=[]
        
        for country in countries_list:
            heat_map_countries.append(country)
            df11=df1[df1['country']==country]
            accuracies_language=[]
            for lang in languages:
                df111=df11[df11['languages']==lang]
                accuracies_language.append(int(sum(df111['score'])/len(df111)*100))
            accuracies_languages_countries.append(accuracies_language)
        data=np.array(accuracies_languages_countries)
        data=np.mean(data,axis=1,keepdims=True)

        plot_info[model_name]=data

In [None]:
countries_list=['US','China','India','Uk','Japan','Germany', 'Russia','Mexico', 'Spain']

In [None]:
model_names_dic={
     'bloomz-7b1':'Bloomz-7b1',
     'Llama-2-7-b-chat-hf':'Llama-2-7-b-chat',
     'Mistral-7B-Instruct':'Mistral-7B-Instruct',
     'Meta-Llama-3-8b-Instruct':'Meta-Llama-3-8b-Instruct',
     'Llama-2-13-b-chat-hf':'Llama-2-13-b-chat-hf',
     'Aya':'Aya',
     'gpt-4':'GPT-4',
     'Mixtral-8x7B':'Mixtral-8x7B'
}

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,5) 
plt.rcParams.update({'font.size': 20})
N=len(plot_info)
ind=np.arange(9)-3
width=0.1
for i,(model_name,acc) in enumerate(plot_info.items()):
    acc_list=list(acc.flatten())
    plt.bar(ind+width*i,acc_list,width,label=model_names_dic[model_name])
plt.xticks(ind+0.35,countries_list) 
plt.xlabel("Countries") 
plt.ylabel("Accuracy") 
plt.savefig(f'plots/country_performance.pdf', bbox_inches="tight")
plt.legend(ncol=4,loc=(0,-0.42)) 
plt.show()
