In [45]:
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from statsmodels.stats.multitest import multipletests


In [46]:
models = ['country_based','topic based','random','removed topics']


In [47]:
index = 'token_pairs'

## Evaluating on WVS

In [69]:
list_rows = []
for model in models:
    pew_gpt2 = pd.read_csv(f'../data/wvs_w7_gpt2_{model}_on_pew_{index}.csv')
    pew_gpt2_cultural = pew_gpt2.loc[pew_gpt2['country'] != 'universal']
    pew_gpt2_cultural = pew_gpt2_cultural.loc[~pd.isna(pew_gpt2_cultural['wvs_score'])]
    r, p = (scipy.stats.pearsonr(pew_gpt2_cultural['wvs_score'], pew_gpt2_cultural['log prob difference']))
    row = {'model':'gpt2','train_data' : 'PEW', 'eval_data': 'WVS',
          'strategy': model, 'r': r, 'p': p, 'n': len(pew_gpt2_cultural)}
    list_rows.append(row)


In [70]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,train_data,eval_data,strategy,r,p,n
0,gpt2,PEW,WVS,country_based,0.429615,8.0349260000000005e-47,1028
1,gpt2,PEW,WVS,topic based,0.345824,1.19861e-29,1028
2,gpt2,PEW,WVS,random,0.409723,2.7604199999999998e-42,1028
3,gpt2,PEW,WVS,removed topics,0.408505,5.116956e-42,1028


## Evaluating on PEW
we have to remove seen pairs from evaluation set

In [71]:
pew_topics = {'use contraceptives':'using contraceptives',
           'get a divorce':'getting a divorce', 
            'have an abortion': 'having an abortion',
            'be homosexual': 'homosexuality', 
           'drink alcohol': 'drinking alcohol',
           'have an extramarital affair': 'married people having an affair' ,
             'gamble': 'gambling',
       'have sex between unmarried adults':'sex between unmarried adults'
                         }
pew_topics_list = list(pew_topics.keys())



In [72]:
def included_function(pairs):
    def func(row):
        return (row['country'], pew_topics[row['topic']]) in pairs
    return func

In [76]:
all_eval_pairs = pickle.load(open('../data/pew_eval_pairs.p', 'rb'))
list_rows = []
for model in models:
    eval_pairs = all_eval_pairs[model]
    pew_gpt2 = pd.read_csv(f'../data/pew_gpt2_{model}_on_pew_{index}.csv')
    pew_gpt2_cultural = pew_gpt2.loc[pew_gpt2['country'] != 'universal']
    
    pew_gpt2_cultural = pew_gpt2_cultural.loc[~pd.isna(pew_gpt2_cultural['pew_score'])]
    pew_gpt2_cultural['in_eval'] = pew_gpt2_cultural.apply(included_function(eval_pairs), axis = 1)
    pew_gpt2_cultural = pew_gpt2_cultural.loc[pew_gpt2_cultural.in_eval == True]

    r, p = scipy.stats.pearsonr(pew_gpt2_cultural['pew_score'], pew_gpt2_cultural['log prob difference'])
    row = {'model':'gpt2','train_data' : 'PEW', 'eval_data': 'PEW',
          'strategy': model, 'r': r, 'p': p, 'n': len(pew_gpt2_cultural)}
    list_rows.append(row)

In [77]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,train_data,eval_data,strategy,r,p,n
0,gpt2,PEW,PEW,country_based,0.764227,8.10267e-13,64
1,gpt2,PEW,PEW,topic based,0.732871,9.571141e-14,78
2,gpt2,PEW,PEW,random,0.817717,1.153172e-15,63
3,gpt2,PEW,PEW,removed topics,0.7493,1.026196e-21,117


# Variation study

### WVS

In [78]:
topics = ['claiming government benefits to which you are not entitled',
 'avoiding a fare on public transport',
 'stealing property',
 'cheating on taxes',
 'someone accepting a bribe in the course of their duties',
 'homosexuality',
 'prostitution',
 'abortion',
 'divorce',
 'sex before marriage',
 'suicide',
 'euthanasia',
 'for a man to beat his wife',
 'parents beating children',
 'violence against other people',
 'terrorism as a political, ideological or religious mean',
 'having casual sex',
 'political violence',
 'death penalty']

In [79]:
variation_rows = []

for i, model in enumerate(models):
    if model == 'topic based':
        model_refined = 'topic_based'
    elif model == 'removed topics':
        model_refined = 'removed_topic'
    else:
        model_refined = model
    wvs_gpt2 = pd.read_csv(f'../data/wvs_w7_gpt2_{model}_on_pew_{index}.csv')
    wvs_gpt2_cultural = wvs_gpt2.loc[wvs_gpt2['country'] != 'universal']
    df = wvs_gpt2_cultural.loc[~pd.isna(wvs_gpt2_cultural['wvs_score'])]

    for t in topics:

        pew_gpt2_t = df.loc[df.topic == t]
        user_var = np.var(pew_gpt2_t['wvs_score'])
        model_var = np.var(pew_gpt2_t['log prob difference'])
        row = {'model': model, 'user variation': user_var, 'model variation': model_var, 'topic': t}
        variation_rows.append(row)

df = pd.DataFrame(variation_rows)

   


In [80]:
list_rows = []
for i, model in enumerate(models):
    
    model_df = df.loc[df.model == model] 
    r, p = scipy.stats.pearsonr(model_df['model variation'],model_df['user variation'])
    
    row = {'strategy': model, 'r': r, 'p':p,
          'n': len(model_df)}
    list_rows.append(row)

In [81]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,strategy,r,p,n
0,country_based,0.893124,1e-06,19
1,topic based,0.835409,3.4e-05,19
2,random,0.869576,5e-06,19
3,removed topics,0.553394,0.0559,19


## PEW

In [82]:
pew_topics = {'using contraceptives':['using contraceptives', 'use contraceptives'],
           'getting a divorce': ['getting a divorce','get a divorce'], 
           'having an abortion': ['having an abortion','have an abortion'],
           'homosexuality': ['homosexuality','be homosexual'], 
           'drinking alcohol': ['drinking alcohol','drink alcohol'],
           'married people having an affair' : ['married people having an affair', 'have an extramarital affair'],
            'gambling': ['gambling','gamble'],
       'sex between unmarried adults': [ 'sex between unmarried adults','have sex between unmarried adults']
                         }

pew_topics_list = list(pew_topics.keys())

variation_rows = []

for i, model in enumerate(models):
    if model == 'topic based':
        model_refined = 'topic_based'
    elif model == 'removed topics':
        model_refined = 'removed_topic'
    else:
        model_refined = model
    pew_gpt2 = pd.read_csv(f'../data/pew_gpt2_{model}_on_pew_{index}.csv')
    pew_gpt2_cultural = pew_gpt2.loc[wvs_gpt2['country'] != 'universal']
    df = pew_gpt2_cultural.loc[~pd.isna(pew_gpt2_cultural['pew_score'])]

    for t in pew_topics:

        pew_gpt2_t = df.loc[df.topic.isin(pew_topics[t])]
        user_var = np.var(pew_gpt2_t['pew_score'])
        model_var = np.var(pew_gpt2_t['log prob difference'])
        row = {'model': model, 'user variation': user_var, 'model variation': model_var, 'topic': t}
        variation_rows.append(row)

df = pd.DataFrame(variation_rows)

   




In [83]:
list_rows = []
for i, model in enumerate(models):
    
    model_df = df.loc[df.model == model]
    

    slope, intercept, r, p, std_err =\
    scipy.stats.linregress(model_df['model variation'],model_df['user variation'])
    
    
    row = {'strategy': model, 'r': r, 'p':p,'slope':slope,
          'n': len(model_df)}
    list_rows.append(row)
    


In [84]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,strategy,r,p,slope,n
0,country_based,0.839375,0.036609,0.482747,8
1,topic based,0.953592,0.000965,0.693236,8
2,random,0.94457,0.001633,0.517255,8
3,removed topics,0.283979,1.0,0.283432,8
