In [None]:
# author: Jana Lasser & Almog Simchon

In [1]:
import pandas as pd
import numpy as np
from os.path import join
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.stattools import durbin_watson
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
import matplotlib.pyplot as plt

In [2]:
# colors
demblue = "#0015BC"
repred = "#FF0000"
indgrey = "#696969"

In [8]:
src = "../../data/users"
fname = "US_politician_accounts_2010-11-06_to_2022-03-16.csv"
users = pd.read_csv(join(src, fname))

# NewsGuard

## OLS regression Score

In [10]:
cols = ["NG_score_mean", "accuracy_mean", "transparency_mean",
        "party", "followers_count", "tweet_count", "belief_share", "truth_share"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]

In [11]:
cols = ["NG_score_mean", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score_mean,R-squared:,0.49
Model:,OLS,Adj. R-squared:,0.487
Method:,Least Squares,F-statistic:,137.4
Date:,"Wed, 15 Jun 2022",Prob (F-statistic):,1.3399999999999999e-141
Time:,12:32:47,Log-Likelihood:,-2791.8
No. Observations:,1008,AIC:,5600.0
Df Residuals:,1000,BIC:,5639.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,91.3625,1.063,85.980,0.000,89.277,93.448
party[T.Republican],-1.8266,0.885,-2.063,0.039,-3.564,-0.089
np.log(followers_count),-0.2258,0.101,-2.230,0.026,-0.425,-0.027
np.log(tweet_count),0.4210,0.158,2.672,0.008,0.112,0.730
belief_share,2.2577,5.740,0.393,0.694,-9.007,13.522
party[T.Republican]:belief_share,-67.1910,8.942,-7.514,0.000,-84.738,-49.644
truth_share,11.9210,4.688,2.543,0.011,2.722,21.120
party[T.Republican]:truth_share,-3.2489,6.329,-0.513,0.608,-15.669,9.171

0,1,2,3
Omnibus:,213.745,Durbin-Watson:,1.949
Prob(Omnibus):,0.0,Jarque-Bera (JB):,953.685
Skew:,-0.919,Prob(JB):,8.13e-208
Kurtosis:,7.396,Cond. No.,1140.0


In [12]:
table_header = '''
\\begin{table}[]
    \\footnotesize
    \\centering
    \\begin{tabular}{l|c|c|c|c|c|c}
            & coef. & std. err. & $t$ & $P>\\vert t \\vert$ & [0.025 & 0.975] \\\\
            \\toprule
'''
table_footer = '''
            \\bottomrule
    \end{tabular}
    \caption{TODO}
    \label{tab:TODO}
\end{table}
'''
name_map = {
    "Intercept":"Intercept",
    "party[T.Republican]":"Republican",
    "np.log(followers_count)":"log(followers)",
    "np.log(tweet_count)":"log(\# tweets)",
    "belief_share":"belief-speaking",
    "party[T.Republican]:belief_share":"Rep. $\times$ belief-speaking",
    "truth_share":"truth-seeking",
    "party[T.Republican]:truth_share":"Rep. $\times$ truth-seeking"
}
def make_regression_table(res, data, dep_col, name_map, decimals=3):
    tab = table_header
    conf_int = res.conf_int()
    for var in conf_int.index:
        varname = name_map[var]
        coef = res.params[var]
        stderr = res.bse[var]
        t = res.tvalues[var]
        pval = res.pvalues[var]
        #if pval == 0: pval = "$< 0.001$"
        ci_low = conf_int.loc[var][0]
        ci_hi = conf_int.loc[var][1]
        line = f"\t\t{varname} & {coef:1.3f} & {stderr:1.3f} & {t:1.3f} & {pval:1.3f} & {ci_low:1.3f} & {ci_hi:1.3f} \\\\ \n"
        tab += line
        
    tab += "\t\t\\bottomrule \n"
    mean_dep_var = data[dep_col].mean()
    std_dep_var = data[dep_col].std()
    line = f"\t\t\multicolumn{{2}}{{l}}{{R-squared}} & \multicolumn{{1}}{{r}}{res.rsquared:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{Mean dependent var}} & \multicolumn{{2}}{{r}}{mean_dep_var:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Adjusted R-squared}} & \multicolumn{{1}}{{r}}{res.rsquared_adj:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{S.D. dependent var}} & \multicolumn{{2}}{{r}}{std_dep_var:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Model MSE}} & \multicolumn{{1}}{{r}}{res.mse_model:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{AIC}} & \multicolumn{{2}}{{r}}{res.aic:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Sum squared resid}} & \multicolumn{{1}}{{r}}{res.ssr:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{BIC}} & \multicolumn{{2}}{{r}}{res.bic:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Log-likelihood}} & \multicolumn{{1}}{{r}}{res.llf:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{F-statistic}} & \multicolumn{{2}}{{r}}{res.fvalue:1.3f} \\\\ \n"
    tab += line
    dw = durbin_watson(res.resid)
    line = f"\t\t\multicolumn{{2}}{{l}}{{Durbin-Watson stat}} & \multicolumn{{1}}{{r}}{dw:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{Prob(F-statistic)}} & \multicolumn{{2}}{{r}}{res.f_pvalue:1.3f} \\\\ \n"
    tab += line
    tab += table_footer
    
    return tab

In [13]:
tab = make_regression_table(res, subset, "NG_score_mean", name_map)
dst = "../../tables"
fname = "OLS_table_NG_score.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## Prediction

In [14]:
subset = users[users["party"].isin(["Democrat", "Republican"])].copy()
mean_cols = ["followers_count", "tweet_count"]
for col in mean_cols:
    subset[col] = subset[col].mean()

honesty_component_share = np.arange(-0.01, 1.01, 0.01)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})

In [15]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [16]:
dst = "../../data/users"
fname = "OLS_predictions_score.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

## OLS regression cutoff 60

In [20]:
cols = ["NG_unreliable_share", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_unreliable_share ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_unreliable_share,R-squared:,0.282
Model:,OLS,Adj. R-squared:,0.277
Method:,Least Squares,F-statistic:,56.24
Date:,"Wed, 15 Jun 2022",Prob (F-statistic):,6.1e-68
Time:,12:33:26,Log-Likelihood:,1836.0
No. Observations:,1008,AIC:,-3656.0
Df Residuals:,1000,BIC:,-3617.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0024,0.011,0.220,0.826,-0.019,0.024
party[T.Republican],-0.0030,0.009,-0.335,0.738,-0.021,0.015
np.log(followers_count),0.0031,0.001,3.058,0.002,0.001,0.005
np.log(tweet_count),-0.0027,0.002,-1.705,0.089,-0.006,0.000
belief_share,0.0194,0.058,0.333,0.739,-0.095,0.134
party[T.Republican]:belief_share,0.8753,0.091,9.651,0.000,0.697,1.053
truth_share,-0.0614,0.048,-1.291,0.197,-0.155,0.032
party[T.Republican]:truth_share,-0.1584,0.064,-2.467,0.014,-0.284,-0.032

0,1,2,3
Omnibus:,756.858,Durbin-Watson:,1.896
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17454.988
Skew:,3.218,Prob(JB):,0.0
Kurtosis:,22.343,Cond. No.,1140.0


## Variance inflation factor

In [21]:
cols = ["followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols].dropna().copy()
subset["party"] = subset["party"].replace({"Republican":1, "Democrat":0})
# add a constant which is needed according to this post: https://github.com/statsmodels/statsmodels/issues/2376
X = add_constant(subset)
pd.Series([variance_inflation_factor(X.values, i) 
               for i in range(X.shape[1])], 
              index=X.columns)

const              13.241610
followers_count     1.085761
tweet_count         1.146761
belief_share        1.139245
truth_share         1.160840
party               1.033540
dtype: float64

## Prediction

In [22]:
subset = users[users["party"].isin(["Democrat", "Republican"])].copy()
mean_cols = ["followers_count", "tweet_count"]
for col in mean_cols:
    subset[col] = subset[col].mean()

honesty_component_share = np.arange(0, 1.01, 0.01)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})

In [23]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [24]:
dst = "../../data/users"
fname = "OLS_predictions_unreliable_60.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

# Independent list

## OLS regression share

In [25]:
cols = ["independent_unreliable_share", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='independent_unreliable_share ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,independent_unreliable_share,R-squared:,0.374
Model:,OLS,Adj. R-squared:,0.369
Method:,Least Squares,F-statistic:,85.25
Date:,"Wed, 15 Jun 2022",Prob (F-statistic):,3.5300000000000003e-97
Time:,12:34:11,Log-Likelihood:,1295.8
No. Observations:,1008,AIC:,-2576.0
Df Residuals:,1000,BIC:,-2536.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0160,0.018,0.873,0.383,-0.020,0.052
party[T.Republican],0.0370,0.015,2.400,0.017,0.007,0.067
np.log(followers_count),0.0051,0.002,2.913,0.004,0.002,0.009
np.log(tweet_count),-0.0066,0.003,-2.437,0.015,-0.012,-0.001
belief_share,0.0084,0.099,0.085,0.932,-0.186,0.203
party[T.Republican]:belief_share,1.2415,0.155,8.002,0.000,0.937,1.546
truth_share,-0.0673,0.081,-0.836,0.403,-0.225,0.091
party[T.Republican]:truth_share,-0.2306,0.110,-2.105,0.036,-0.445,-0.016

0,1,2,3
Omnibus:,666.029,Durbin-Watson:,1.954
Prob(Omnibus):,0.0,Jarque-Bera (JB):,13111.587
Skew:,2.715,Prob(JB):,0.0
Kurtosis:,19.813,Cond. No.,1140.0


## OLS regression accuracy score

In [26]:
cols = ["accuracy_mean", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='accuracy_mean ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,accuracy_mean,R-squared:,0.317
Model:,OLS,Adj. R-squared:,0.312
Method:,Least Squares,F-statistic:,66.24
Date:,"Wed, 15 Jun 2022",Prob (F-statistic):,1.82e-78
Time:,12:34:16,Log-Likelihood:,-264.93
No. Observations:,1008,AIC:,545.9
Df Residuals:,1000,BIC:,585.2
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.2549,0.086,49.308,0.000,4.086,4.424
party[T.Republican],-0.1281,0.072,-1.769,0.077,-0.270,0.014
np.log(followers_count),-0.0230,0.008,-2.791,0.005,-0.039,-0.007
np.log(tweet_count),0.0153,0.013,1.204,0.229,-0.010,0.040
belief_share,-1.3142,0.467,-2.813,0.005,-2.231,-0.398
party[T.Republican]:belief_share,-4.5957,0.730,-6.297,0.000,-6.028,-3.164
truth_share,0.2550,0.379,0.673,0.501,-0.489,0.999
party[T.Republican]:truth_share,0.6028,0.515,1.170,0.242,-0.408,1.614

0,1,2,3
Omnibus:,105.321,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,427.92
Skew:,-0.415,Prob(JB):,1.2e-93
Kurtosis:,6.082,Cond. No.,1140.0


In [27]:
tab = make_regression_table(res, subset, "accuracy_mean", name_map)
dst = "../../tables"
fname = "OLS_table_accuracy.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## OLS regression transparency score

In [28]:
cols = ["transparency_mean", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='transparency_mean ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,transparency_mean,R-squared:,0.446
Model:,OLS,Adj. R-squared:,0.443
Method:,Least Squares,F-statistic:,115.2
Date:,"Wed, 15 Jun 2022",Prob (F-statistic):,8.42e-124
Time:,12:34:17,Log-Likelihood:,328.47
No. Observations:,1008,AIC:,-640.9
Df Residuals:,1000,BIC:,-601.6
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.8243,0.048,58.967,0.000,2.730,2.918
party[T.Republican],-0.1151,0.040,-2.863,0.004,-0.194,-0.036
np.log(followers_count),-0.0124,0.005,-2.715,0.007,-0.021,-0.003
np.log(tweet_count),0.0211,0.007,2.991,0.003,0.007,0.035
belief_share,-0.5367,0.259,-2.070,0.039,-1.045,-0.028
party[T.Republican]:belief_share,-2.8790,0.405,-7.107,0.000,-3.674,-2.084
truth_share,0.3238,0.210,1.539,0.124,-0.089,0.737
party[T.Republican]:truth_share,0.2436,0.286,0.852,0.395,-0.318,0.805

0,1,2,3
Omnibus:,218.815,Durbin-Watson:,1.959
Prob(Omnibus):,0.0,Jarque-Bera (JB):,881.209
Skew:,-0.975,Prob(JB):,4.440000000000001e-192
Kurtosis:,7.145,Cond. No.,1140.0


In [29]:
tab = make_regression_table(res, subset, "transparency_mean", name_map)
dst = "../../tables"
fname = "OLS_table_transparency.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

# Article texts

## OLS regression Score

In [30]:
src = "../../data/articles"
fname = "full_links_with_text_and_honesty.csv.gzip"
cols = ["belief", "truth", "other_words", "wc", "belief_prop", "truth_prop",
        "NG_score", "url", "party"]
texts = pd.read_csv(join(src, fname), usecols=cols, compression="gzip")

In [34]:
fname = "article_scores_with_parties.csv.gzip"
cols = ["NG_score", "belief_prop", "truth_prop", "party"]
texts[cols].to_csv(join(src, fname), index=False, compression="gzip")

In [35]:
mod = smf.ols(formula='NG_score ~ belief_prop + truth_prop + party + party * belief_prop + party * truth_prop', data=texts)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score,R-squared:,0.131
Model:,OLS,Adj. R-squared:,0.131
Method:,Least Squares,F-statistic:,4847.0
Date:,"Wed, 15 Jun 2022",Prob (F-statistic):,0.0
Time:,12:35:37,Log-Likelihood:,-627960.0
No. Observations:,160750,AIC:,1256000.0
Df Residuals:,160744,BIC:,1256000.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,94.7729,0.068,1396.422,0.000,94.640,94.906
party[T.Republican],-9.1938,0.108,-84.746,0.000,-9.406,-8.981
belief_prop,-42.5632,11.542,-3.688,0.000,-65.186,-19.940
party[T.Republican]:belief_prop,-19.1074,17.684,-1.080,0.280,-53.768,15.553
truth_prop,19.3650,6.108,3.170,0.002,7.393,31.337
party[T.Republican]:truth_prop,-46.6972,9.855,-4.739,0.000,-66.012,-27.383

0,1,2,3
Omnibus:,64018.333,Durbin-Watson:,1.162
Prob(Omnibus):,0.0,Jarque-Bera (JB):,370739.046
Skew:,-1.831,Prob(JB):,0.0
Kurtosis:,9.476,Cond. No.,710.0


In [36]:
name_map = {
    "Intercept":"Intercept",
    "party[T.Republican]":"Republican",
    "belief_prop":"belief-speaking",
    "party[T.Republican]:belief_prop":"Rep. $\\times$ belief-speaking",
    "truth_prop":"truth-seeking",
    "party[T.Republican]:truth_prop":"Rep. $\\times$ truth-seeking"
}

tab = make_regression_table(res, texts, "NG_score", name_map)
dst = "../../tables"
fname = "OLS_table_article_NG_score.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## Prediction

In [37]:
honesty_component_share = np.arange(-0.01, 0.21, 0.001)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_prop":honesty_component_share,
    "party":["Democrat"] * N,
    "truth_prop":[texts["truth_prop"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_prop":honesty_component_share,
    "party":["Republican"] * N,
    "truth_prop":[texts["truth_prop"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_prop":honesty_component_share,
    "party":["Democrat"] * N,
    "belief_prop":[texts["belief_prop"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_prop":honesty_component_share,
    "party":["Republican"] * N,
    "belief_prop":[texts["belief_prop"].mean()] * N
})

In [38]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [39]:
dst = "../../data/articles"
fname = "OLS_predictions_articles.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)