In [36]:
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from statsmodels.stats.multitest import multipletests



In [2]:
wave = 7

In [3]:
wvs_mcm = pd.read_csv(f'../data/wvs_w{wave}_mcm.csv')
wvs_gpt3 = pd.read_csv('../data/wvs_w7_gpt3.csv')
wvs_gpt2 = pd.read_csv('../data/wvs_w7_gpt2_token_pairs.csv')

In [4]:
gpt2_models = ['gpt2', 'gpt2-medium','gpt2-large']

In [5]:
wvs_gpt2_universal = wvs_gpt2.loc[wvs_gpt2.country == 'universal']

In [25]:
test_types = ['random', 'country_based','topic based','removed topics']

In [11]:
all_eval_pairs = pickle.load(open('../data/wvs_eval_pairs.p','rb'))

In [18]:
wvs_gpt2 = pd.read_csv(f'../data/wvs_w7_gpt2_token_pairs.csv') #PRE-TRAINED MODEL
wvs_gpt2_cultural = wvs_gpt2.loc[wvs_gpt2['country'] != 'universal']
wvs_gpt2_cultural = wvs_gpt2_cultural.loc[~pd.isna(wvs_gpt2_cultural['wvs_score'])] 

In [19]:
pew_topics = {'use contraceptives':'using contraceptives',
           'get a divorce':'getting a divorce', 
            'have an abortion': 'having an abortion',
            'be homosexual': 'homosexuality', 
           'drink alcohol': 'drinking alcohol',
           'have an extramarital affair': 'married people having an affair' ,
             'gamble': 'gambling',
       'have sex between unmarried adults':'sex between unmarried adults'
                         }

In [59]:
def included_function(pairs,data):
    def func(row):
        
        return (row['country'], row['topic']) in pairs
    return func

## Evaluating on WVS

In [33]:
list_rows = []
for test_type in test_types:
    
    eval_pairs = all_eval_pairs[test_type]
    
    wvs_gpt2_cultural_copy = wvs_gpt2_cultural.copy()
    wvs_gpt2_cultural_copy['included'] = wvs_gpt2_cultural_copy.apply(included_function(eval_pairs, 'wvs'), axis = 1)
    wvs_gpt2_cultural_test = wvs_gpt2_cultural_copy.loc[wvs_gpt2_cultural_copy.included == True]
    
    
    r, p = scipy.stats.pearsonr(wvs_gpt2_cultural_test['wvs_score'], wvs_gpt2_cultural_test['log prob difference'])
    row = {'model': 'gpt2', 'test type': test_type, 'r' : r, 'p': p ,'n': len(wvs_gpt2_cultural_test)}
    list_rows.append(row)



In [43]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,test type,r,p,n
0,gpt2,random,0.271075,0.000325,206
1,gpt2,country_based,0.225309,0.005058,202
2,gpt2,topic based,0.285583,8.1e-05,216
3,gpt2,removed topics,0.27392,0.000212,212


## Evaluating on PEW

In [56]:
pew_mcm = pd.read_csv(f'../data/pew_mcm.csv')
pew_gpt3 = pd.read_csv('../data/pew_gpt3.csv')
gpt2_topic_mapping = {'use contraceptives': 'using contraceptices',
                     'get a divorce': 'getting a divorce',
                     'have an abortion': 'having an abortion',
                     'be homosexual': 'Homosexuality',
                     'drink alcohol':'drinking alcohol',
                     'have an extramarital affair':'married people having an affair',
                     'gamble' :'gambling',
                      'have sex between unmarried adults': 'sex between unmarried adults'
                     }

In [61]:
list_rows = []
all_eval_pairs = pickle.load(open('../data/pew_eval_pairs.p','rb'))
pew_gpt2 = pd.read_csv(f'../data/pew_gpt2_token_pairs.csv')
pew_gpt2['topic'] = pew_gpt2['topic'].apply(lambda t : gpt2_topic_mapping[t])
pew_gpt2_cultural = pew_gpt2.loc[pew_gpt2['country'] != 'universal']
for test_type in test_types:
    
    eval_pairs = all_eval_pairs[test_type]
    pew_gpt2_cultural_copy = pew_gpt2_cultural.copy()
    pew_gpt2_cultural_copy['included'] = pew_gpt2_cultural_copy.apply(included_function(eval_pairs, 'pew2'), axis = 1)
    pew_gpt2_cultural_test = pew_gpt2_cultural_copy.loc[pew_gpt2_cultural_copy.included == True]
   
    r, p = scipy.stats.pearsonr(pew_gpt2_cultural_test['pew_score'], pew_gpt2_cultural_test['log prob difference'])
    row = {'model': 'gpt2', 'test type': test_type, 'r' : r, 'p': p ,'n': len(pew_gpt2_cultural_test)}
    list_rows.append(row)

    
    

In [62]:
df = pd.DataFrame(list_rows)
df['p'] = multipletests(df['p'], method = 'bonferroni', alpha = 0.5)[1]
df

Unnamed: 0,model,test type,r,p,n
0,gpt2,random,0.203729,0.760286,43
1,gpt2,country_based,0.054602,1.0,48
2,gpt2,topic based,-0.145769,0.811463,78
3,gpt2,removed topics,-0.108845,1.0,78
