In [43]:
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from statsmodels.stats.multitest import multipletests

In [44]:
models = ['country_based','topic based','random','removed topics']

In [45]:
index = 'token_pairs'

## Evaluating on PEW

In [59]:
list_rows = []
for model in models:
    pew_gpt2 = pd.read_csv(f'../data/pew_gpt2_{model}_on_wvs_{index}.csv')
    pew_gpt2_cultural = pew_gpt2.loc[pew_gpt2['country'] != 'universal']
    r, p = scipy.stats.pearsonr(pew_gpt2_cultural['pew_score'], pew_gpt2_cultural['log prob difference'])
    row = {'model':'gpt2','train_data' : 'WVS', 'eval_data': 'PEW',
          'strategy': model, 'r': r, 'p': p, 'n': len(pew_gpt2_cultural)}
    list_rows.append(row)


In [60]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,train_data,eval_data,strategy,r,p,n
0,gpt2,WVS,PEW,country_based,0.419315,4.093773e-14,312
1,gpt2,WVS,PEW,topic based,0.297012,3.589974e-07,312
2,gpt2,WVS,PEW,random,0.510297,1.7400039999999998e-21,312
3,gpt2,WVS,PEW,removed topics,0.211972,0.0006475481,312


## Evaluating on WVS

We remove the seen topic-country pairs from evaluation set

In [61]:
def included_function(pairs):
    def func(row):
        return (row['country'], row['topic']) in pairs
    return func

In [62]:
all_eval_pairs = pickle.load(open('../data/wvs_eval_pairs.p', 'rb'))
list_rows = []
for model in models:
    eval_pairs = all_eval_pairs[model]
    if model == 'topic based':
        model_refined = 'topic_based'
    elif model == 'removed topics':
        model_refined = 'removed_topic'
    else:
        model_refined = model
    wvs_gpt2 = pd.read_csv(f'../data/wvs_w7_gpt2_{model}_on_wvs_{index}.csv')
    wvs_gpt2_cultural = wvs_gpt2.loc[wvs_gpt2['country'] != 'universal']
    wvs_gpt2_cultural = wvs_gpt2_cultural.loc[~pd.isna(wvs_gpt2_cultural['wvs_score'])]
    
    wvs_gpt2_cultural['in_eval'] = wvs_gpt2_cultural.apply(included_function(eval_pairs), axis = 1)
    wvs_gpt2_cultural = wvs_gpt2_cultural.loc[wvs_gpt2_cultural.in_eval == True]
    
    
    r, p = scipy.stats.pearsonr(wvs_gpt2_cultural['wvs_score'], wvs_gpt2_cultural['log prob difference'])
    row = {'model':'gpt2','train_data' : 'WVS', 'eval_data': 'WVS',
          'strategy': model, 'r': r, 'p': p, 'n': len(wvs_gpt2_cultural)}
    list_rows.append(row)



In [63]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,train_data,eval_data,strategy,r,p,n
0,gpt2,WVS,WVS,country_based,0.75879,1.7067819999999998e-38,202
1,gpt2,WVS,WVS,topic based,0.508033,5.67377e-15,216
2,gpt2,WVS,WVS,random,0.831959,1.727711e-53,206
3,gpt2,WVS,WVS,removed topics,0.664373,9.198057e-28,212


## Variation study

### WVS

In [64]:
topics = ['claiming government benefits to which you are not entitled',
 'avoiding a fare on public transport',
 'stealing property',
 'cheating on taxes',
 'someone accepting a bribe in the course of their duties',
 'homosexuality',
 'prostitution',
 'abortion',
 'divorce',
 'sex before marriage',
 'suicide',
 'euthanasia',
 'for a man to beat his wife',
 'parents beating children',
 'violence against other people',
 'terrorism as a political, ideological or religious mean',
 'having casual sex',
 'political violence',
 'death penalty']

In [65]:
variation_rows = []

for i, model in enumerate(models):

    wvs_gpt2 = pd.read_csv(f'../data/wvs_w7_gpt2_{model}_on_wvs_{index}.csv')
    wvs_gpt2_cultural = wvs_gpt2.loc[wvs_gpt2['country'] != 'universal']
    df = wvs_gpt2_cultural.loc[~pd.isna(wvs_gpt2_cultural['wvs_score'])]

    for t in topics:

        pew_gpt2_t = df.loc[df.topic == t]
        user_var = np.var(pew_gpt2_t['wvs_score'])
        model_var = np.var(pew_gpt2_t['log prob difference'])
        row = {'model': model, 'user variation': user_var, 'model variation': model_var, 'topic': t}
        variation_rows.append(row)

df = pd.DataFrame(variation_rows)

   


        

In [66]:
list_rows = []
for i, model in enumerate(models):
    
    model_df = df.loc[df.model == model] 
    r, p = scipy.stats.pearsonr(model_df['model variation'],model_df['user variation'])
    
    row = {'strategy': model, 'r': r, 'p':p,
          'n': len(model_df)}
    list_rows.append(row)

In [67]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,strategy,r,p,n
0,country_based,0.894113,9.875328e-07,19
1,topic based,0.835123,3.453098e-05,19
2,random,0.892575,1.110276e-06,19
3,removed topics,0.744049,0.001038551,19


## PEW

In [68]:
pew_topics = {'using contraceptives':['using contraceptives', 'use contraceptives'],
           'getting a divorce': ['getting a divorce','get a divorce'], 
           'having an abortion': ['having an abortion','have an abortion'],
           'homosexuality': ['homosexuality','be homosexual'], 
           'drinking alcohol': ['drinking alcohol','drink alcohol'],
           'married people having an affair' : ['married people having an affair', 'have an extramarital affair'],
            'gambling': ['gambling','gamble'],
       'sex between unmarried adults': [ 'sex between unmarried adults','have sex between unmarried adults']
                         }

pew_topics_list = list(pew_topics.keys())

variation_rows = []

for i, model in enumerate(models):

    pew_gpt2 = pd.read_csv(f'../data/pew_gpt2_{model}_on_wvs_{index}.csv')
    pew_gpt2_cultural = pew_gpt2.loc[wvs_gpt2['country'] != 'universal']
    df = pew_gpt2_cultural.loc[~pd.isna(pew_gpt2_cultural['pew_score'])]

    for t in pew_topics:

        pew_gpt2_t = df.loc[df.topic.isin(pew_topics[t])]
        user_var = np.var(pew_gpt2_t['pew_score'])
        model_var = np.var(pew_gpt2_t['log prob difference'])
        row = {'model': model, 'user variation': user_var, 'model variation': model_var, 'topic': t}
        variation_rows.append(row)

df = pd.DataFrame(variation_rows)

   



In [69]:
list_rows = [ ]
for i, model in enumerate(models):
    
    model_df = df.loc[df.model == model]
    
    r, p = scipy.stats.pearsonr(model_df['model variation'],model_df['user variation'])
    
    row = {'strategy': model, 'r': r, 'p':p,
          'n': len(model_df)}
    list_rows.append(row)


In [70]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,strategy,r,p,n
0,country_based,0.570803,0.557971,8
1,topic based,0.395831,1.0,8
2,random,0.62965,0.377324,8
3,removed topics,-0.092149,1.0,8
