In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy.stats import spearmanr, pearsonr
import statsmodels.api as sm
import pickle
import warnings
import statsmodels.api as sm
import statsmodels.formula.api as smf
warnings.filterwarnings('ignore')

In [2]:
#Loading NYT timeseries data
ts_df = pd.read_csv('./data/SWOW_prediction/eval/time_series/nyt_ts_df.csv')
ts_df = ts_df.groupby(['words','year','property']).outputs_z.mean().reset_index()

### Loading Gallup moral issues


In [3]:
issues = ['abortion', 'animal_cloning', 'human_cloning', 
          'animal_medical_testing',
          'sex', 'birth_control','death_penalty','divorce',
         'samesex','fur','polygamy','pornography','stem_cell',
          'suicide','gambling','euthanasia','affair'
         ]

In [4]:
dfs= {issue: pd.read_csv(f'./data/Gallup/{issue}.csv') for issue in issues}

In [5]:
for issue, df in dfs.items():
    columns = ['Morally acceptable', 'Morally wrong', 'Not a moral issue (vol.)']
    signs = [1, -1, 0]
    df = df.fillna("0%")
    df = df.loc[df.Geography != '0%'] #Removing empty lines
    for column in columns:
        
        df[column] = df[column].apply(lambda x : float(x.replace('%','')))
    df['total_vote'] = df[columns].sum(axis = 1)
    polarity_votes = []
    moral_votes = []
    for i, row in df.iterrows():
        polarity_vote = 0
        moral_vote = 0
        
        
        for column, sign in zip(columns,signs):
            polarity_vote += row[column] * sign
            moral_vote += row[column] * abs(sign)

        moral_vote /= row['total_vote'] 
        polarity_vote /= row['total_vote']
        polarity_votes.append(polarity_vote)
        moral_votes.append(moral_vote)
    
    df['polarity_vote'] = polarity_votes
    df['morality_vote'] = moral_votes
    dfs[issue] = df
    
        

In [6]:
token_mapping = {'abortion':['abortion'],
                 'animal_cloning':['animal cloning'],
                 'human_cloning': ['human cloning'],
                 'birth_control': ['birth control','contraceptive','contraception'],
                 'death_penalty': ['death penalty'],
                 'animal_medical_testing': ['animal testing'],
                 'divorce': ['divorce'],
                'samesex':['homosexuality','homosexual'],
                'fur':['fur'],
                 'sex':['sex'],
                'polygamy': ['polygamy'],
                'pornography': ['pornography','porn'],
                'stem_cell': ['stem cell'],
                'suicide': ['suicide'],
                'gambling':['gambling'],
                'euthanasia': ['euthanasia'],
                'affair': ['adultery']}


In [7]:
total_df = pd.DataFrame()
for issue in issues:
    issue_df = dfs[issue]
    tokens = token_mapping[issue]
    token_df = ts_df.loc[ts_df.words.isin(tokens)].reset_index()
    moral_votes = []
    polarity_votes = []
    for i, row in token_df.iterrows():
        year = row['year']
        moral_vote = issue_df.loc[issue_df.Time == year]
        if len(moral_vote) > 0:
            moral_votes.append(moral_vote.iloc[0]['morality_vote'])
            polarity_votes.append(moral_vote.iloc[0]['polarity_vote'])
        else:
            moral_votes.append(None)
            polarity_votes.append(None)
    
    token_df['morality_vote'] =  moral_votes
    token_df['polarity_vote'] = polarity_votes
    token_df['issue'] = [issue] * len(token_df)
    total_df = pd.concat((total_df, token_df), ignore_index = True)
        
    

In [8]:
total_df = total_df.loc[~pd.isna(total_df.morality_vote)]

In [9]:
total_df

Unnamed: 0,index,words,year,property,outputs_z,morality_vote,polarity_vote,issue
28,824,abortion,2001,polarity,-0.102111,0.988764,-0.044944,abortion
29,825,abortion,2001,previous_link,1.097170,0.988764,-0.044944,abortion
30,826,abortion,2002,polarity,-0.087773,1.000000,-0.155556,abortion
31,827,abortion,2002,previous_link,1.002303,1.000000,-0.155556,abortion
32,828,abortion,2003,polarity,0.021050,1.000000,-0.177778,abortion
...,...,...,...,...,...,...,...,...
637,7909,adultery,2005,previous_link,3.113356,1.000000,-0.897959,affair
638,7910,adultery,2006,polarity,-1.404861,1.000000,-0.917526,affair
639,7911,adultery,2006,previous_link,2.745052,1.000000,-0.917526,affair
640,7912,adultery,2007,polarity,-1.624830,1.000000,-0.876289,affair


In [10]:
total_df = total_df.groupby(['issue','year','property','words'])[['outputs_z','morality_vote','polarity_vote']].mean().reset_index()

In [11]:
total_df

Unnamed: 0,issue,year,property,words,outputs_z,morality_vote,polarity_vote
0,abortion,2001,polarity,abortion,-0.102111,0.988764,-0.044944
1,abortion,2001,previous_link,abortion,1.097170,0.988764,-0.044944
2,abortion,2002,polarity,abortion,-0.087773,1.000000,-0.155556
3,abortion,2002,previous_link,abortion,1.002303,1.000000,-0.155556
4,abortion,2003,polarity,abortion,0.021050,1.000000,-0.177778
...,...,...,...,...,...,...,...
125,suicide,2005,previous_link,suicide,2.139112,1.000000,-0.726316
126,suicide,2006,polarity,suicide,-2.806628,0.989362,-0.670213
127,suicide,2006,previous_link,suicide,1.887492,0.989362,-0.670213
128,suicide,2007,polarity,suicide,-2.652657,0.989474,-0.652632


In [13]:
#Saving moral scores and emprical ratings for every two consecutive years
issues = total_df.issue.unique()
list_rows = []
for i, row in total_df.iterrows():
    issue = row['issue']
    year = row['year']
    p = row['property']
    word = row['words']
    
    new_df = total_df.loc[(total_df.issue == issue) & (total_df.property == p) & (total_df.year == year + 1)]
    if len(new_df) > 0:
        polarity_df = new_df.copy(deep = True)
        p1 = row['outputs_z']
        p2 = polarity_df.outputs_z.mean()
        pol_vote_1 = row['polarity_vote']
        pol_vote_2 = polarity_df.polarity_vote.mean()

        morality_vote_1 = row['morality_vote']
        morality_vote_2 = polarity_df.morality_vote.mean()


        list_rows.append({
            'words':word,
            'property':p,
            'p1':p1,
            'p2':p2,
            'p_change': p2 - p1,
            'pol_vote_1':pol_vote_1,
            'pol_vote_2':pol_vote_2,
            'pol_vote_change':(pol_vote_2 - pol_vote_1),
            'morality_vote_1':morality_vote_1,
            'morality_vote_2':morality_vote_2,
            'morality_vote_change':(morality_vote_2 - morality_vote_1),
            'year1':year,
            'year2':year + 1,
            'issue': issue,
        })
rate_df = pd.DataFrame(list_rows)

In [14]:

rate_df['p_change_z'] = (rate_df['p_change'] - rate_df['p_change'].mean()) / rate_df['p_change'].std()


rate_df['pol_vote_change_z'] = (rate_df['pol_vote_change'] - rate_df['pol_vote_change'].mean()) / rate_df['pol_vote_change'].std()
rate_df['morality_vote_change_z'] = (rate_df['morality_vote_change'] - rate_df['morality_vote_change'].mean()) / rate_df['morality_vote_change'].std()

#Normalize morality_vote and polarity_vote
rate_df['pol_vote_1'] = (rate_df['pol_vote_1'] - rate_df['pol_vote_1'].mean()) / rate_df['pol_vote_1'].std()
rate_df['pol_vote_2'] = (rate_df['pol_vote_2'] - rate_df['pol_vote_2'].mean()) / rate_df['pol_vote_2'].std()
rate_df['morality_vote_1'] = (rate_df['morality_vote_1'] - rate_df['morality_vote_1'].mean()) / rate_df['morality_vote_1'].std()
rate_df['morality_vote_2'] = (rate_df['morality_vote_2'] - rate_df['morality_vote_2'].mean()) / rate_df['morality_vote_2'].std()


In [15]:
rate_df = rate_df.dropna().reset_index(drop = True)

In [16]:
rate_df_relevance = rate_df.loc[rate_df.property == 'previous_link']
rate_df_polarity = rate_df.loc[rate_df.property == 'polarity']

indices = ['words','pol_vote_1','pol_vote_2','pol_vote_change_z','pol_vote_change','morality_vote_1','morality_vote_2','morality_vote_change_z','morality_vote_change','issue','year1','year2']
rate_df = rate_df_relevance.set_index(indices).\
join(
    rate_df_polarity.set_index(indices),
    lsuffix = '_relevance', rsuffix = '_polarity'
)



In [17]:
rate_df.reset_index(inplace = True)

In [18]:
rate_df

Unnamed: 0,words,pol_vote_1,pol_vote_2,pol_vote_change_z,pol_vote_change,morality_vote_1,morality_vote_2,morality_vote_change_z,morality_vote_change,issue,...,property_relevance,p1_relevance,p2_relevance,p_change_relevance,p_change_z_relevance,property_polarity,p1_polarity,p2_polarity,p_change_polarity,p_change_z_polarity
0,abortion,-0.096147,-0.351897,-2.161673,-0.110612,-0.772897,0.760075,1.292182,0.011236,abortion,...,previous_link,1.09717,1.002303,-0.094867,-0.738329,polarity,-0.102111,-0.087773,0.014338,0.078405
1,abortion,-0.338576,-0.400256,-0.540207,-0.022222,0.724535,0.760075,-0.051166,0.0,abortion,...,previous_link,1.002303,0.893652,-0.108651,-0.841413,polarity,-0.087773,0.02105,0.108823,0.785045
2,abortion,-0.387281,-0.255179,1.090419,0.066667,0.724535,0.760075,-0.051166,0.0,abortion,...,previous_link,0.893652,0.981614,0.087962,0.629031,polarity,0.02105,-0.070877,-0.091927,-0.716337
3,abortion,-0.241167,-0.276435,-0.311741,-0.009768,0.724535,0.760075,-0.051166,0.0,abortion,...,previous_link,0.981614,0.956254,-0.02536,-0.218495,polarity,-0.070877,-0.091917,-0.02104,-0.186185
4,abortion,-0.262575,-0.038397,1.874065,0.109385,0.724535,0.760075,-0.051166,0.0,abortion,...,previous_link,0.956254,0.958333,0.002078,-0.013284,polarity,-0.091917,-0.062178,0.029739,0.193583
5,abortion,-0.022835,-0.276435,-2.139166,-0.109385,0.724535,0.760075,-0.051166,0.0,abortion,...,previous_link,0.958333,0.885232,-0.0731,-0.575538,polarity,-0.062178,-0.08452,-0.022342,-0.195921
6,adultery,-1.869734,-1.763284,0.785464,0.050043,0.724535,-0.804406,-1.283722,-0.010309,affair,...,previous_link,2.817441,2.754854,-0.062588,-0.496915,polarity,-1.651861,-1.391914,0.259947,1.915286
7,adultery,-1.760054,-1.925765,-1.502231,-0.074664,-0.649397,0.760075,1.18139,0.010309,affair,...,previous_link,2.754854,2.742232,-0.012621,-0.12322,polarity,-1.391914,-1.055634,0.33628,2.486173
8,adultery,-1.923697,-1.859821,0.423344,0.030303,0.724535,-0.7728,-1.258822,-0.010101,affair,...,previous_link,2.742232,2.820829,0.078597,0.558987,polarity,-1.055634,-0.938565,0.117069,0.846715
9,adultery,-1.857281,-1.967485,-1.040135,-0.049474,-0.621641,0.760075,1.15649,0.010101,affair,...,previous_link,2.820829,3.113356,0.292526,2.158942,polarity,-0.938565,-1.190634,-0.252069,-1.914023


In [19]:
formula = 'pol_vote_change ~ year1  + C(issue) + p_change_relevance + p_change_polarity + pol_vote_1'
model = smf.ols(formula, data=rate_df).fit() 
model.summary()


0,1,2,3
Dep. Variable:,pol_vote_change,R-squared:,0.485
Model:,OLS,Adj. R-squared:,0.317
Method:,Least Squares,F-statistic:,2.896
Date:,"Sat, 19 Apr 2025",Prob (F-statistic):,0.00491
Time:,14:29:29,Log-Likelihood:,98.64
No. Observations:,54,AIC:,-169.3
Df Residuals:,40,BIC:,-141.4
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-22.6298,9.213,-2.456,0.018,-41.251,-4.009
C(issue)[T.affair],-0.6186,0.120,-5.159,0.000,-0.861,-0.376
C(issue)[T.death_penalty],0.4521,0.085,5.339,0.000,0.281,0.623
C(issue)[T.divorce],0.4504,0.083,5.415,0.000,0.282,0.619
C(issue)[T.fur],0.3286,0.067,4.935,0.000,0.194,0.463
C(issue)[T.gambling],0.3584,0.069,5.185,0.000,0.219,0.498
C(issue)[T.samesex],0.0492,0.033,1.488,0.145,-0.018,0.116
C(issue)[T.sex],0.2645,0.050,5.267,0.000,0.163,0.366
C(issue)[T.stem_cell],0.3267,0.057,5.753,0.000,0.212,0.441

0,1,2,3
Omnibus:,0.967,Durbin-Watson:,2.402
Prob(Omnibus):,0.617,Jarque-Bera (JB):,0.374
Skew:,-0.137,Prob(JB):,0.829
Kurtosis:,3.302,Cond. No.,3000000.0


In [20]:
formula = 'pol_vote_change ~ year1  + C(issue) + pol_vote_1'
model = smf.ols(formula, data=rate_df).fit() 
model.summary()


0,1,2,3
Dep. Variable:,pol_vote_change,R-squared:,0.419
Model:,OLS,Adj. R-squared:,0.267
Method:,Least Squares,F-statistic:,2.754
Date:,"Sat, 19 Apr 2025",Prob (F-statistic):,0.00885
Time:,14:29:35,Log-Likelihood:,95.397
No. Observations:,54,AIC:,-166.8
Df Residuals:,42,BIC:,-142.9
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-16.4875,9.108,-1.810,0.077,-34.867,1.892
C(issue)[T.affair],-0.5593,0.121,-4.635,0.000,-0.803,-0.316
C(issue)[T.death_penalty],0.4071,0.085,4.777,0.000,0.235,0.579
C(issue)[T.divorce],0.4092,0.084,4.869,0.000,0.240,0.579
C(issue)[T.fur],0.2954,0.067,4.392,0.000,0.160,0.431
C(issue)[T.gambling],0.3248,0.070,4.647,0.000,0.184,0.466
C(issue)[T.samesex],0.0397,0.034,1.168,0.250,-0.029,0.108
C(issue)[T.sex],0.2373,0.051,4.698,0.000,0.135,0.339
C(issue)[T.stem_cell],0.2984,0.057,5.200,0.000,0.183,0.414

0,1,2,3
Omnibus:,0.095,Durbin-Watson:,2.32
Prob(Omnibus):,0.954,Jarque-Bera (JB):,0.19
Skew:,0.093,Prob(JB):,0.909
Kurtosis:,2.777,Cond. No.,2860000.0
