In [None]:
# author: Jana Lasser & Almog Simchon

In [2]:
import pandas as pd
import numpy as np
from os.path import join
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.stattools import durbin_watson
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
import matplotlib.pyplot as plt

In [3]:
# colors
demblue = "#0015BC"
repred = "#FF0000"
indgrey = "#696969"

In [4]:
src = "../../data/users"
fname = "US_politician_accounts_2010-11-06_to_2022-03-16.csv"
users = pd.read_csv(join(src, fname))

# NewsGuard

## OLS regression Score

In [4]:
cols = ["NG_score_mean", "accuracy_mean", "transparency_mean",
        "party", "followers_count", "tweet_count", "belief_share", "truth_share"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]

In [5]:
cols = ["NG_score_mean", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score_mean,R-squared:,0.464
Model:,OLS,Adj. R-squared:,0.46
Method:,Least Squares,F-statistic:,123.4
Date:,"Fri, 07 Oct 2022",Prob (F-statistic):,1.47e-130
Time:,16:07:45,Log-Likelihood:,-2817.5
No. Observations:,1008,AIC:,5651.0
Df Residuals:,1000,BIC:,5690.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,90.8089,1.061,85.627,0.000,88.728,92.890
party[T.Republican],-2.8889,0.726,-3.981,0.000,-4.313,-1.465
np.log(followers_count),-0.1345,0.106,-1.275,0.203,-0.342,0.073
np.log(tweet_count),0.5137,0.162,3.172,0.002,0.196,0.832
belief_share,4.0868,8.658,0.472,0.637,-12.902,21.076
party[T.Republican]:belief_share,-43.0604,12.501,-3.444,0.001,-67.593,-18.528
truth_share,8.8806,8.558,1.038,0.300,-7.912,25.673
party[T.Republican]:truth_share,-30.0959,11.311,-2.661,0.008,-52.292,-7.900

0,1,2,3
Omnibus:,221.329,Durbin-Watson:,1.949
Prob(Omnibus):,0.0,Jarque-Bera (JB):,859.008
Skew:,-0.999,Prob(JB):,2.94e-187
Kurtosis:,7.057,Cond. No.,1730.0


In [15]:
table_header = '''
\\begin{table}[]
    \\footnotesize
    \\centering
    \\begin{tabular}{l|c|c|c|c|c|c}
            & coef. & std. err. & $t$ & $P>\\vert t \\vert$ & [0.025 & 0.975] \\\\
            \\toprule
'''
table_footer = '''
            \\bottomrule
    \end{tabular}
    \caption{TODO}
    \label{tab:TODO}
\end{table}
'''
name_map = {
    "Intercept":"Intercept",
    "party[T.Republican]":"Republican",
    "np.log(followers_count)":"log(followers)",
    "np.log(tweet_count)":"log(\# tweets)",
    "belief_share":"belief-speaking",
    "party[T.Republican]:belief_share":"Rep. $\times$ belief-speaking",
    "truth_share":"truth-seeking",
    "party[T.Republican]:truth_share":"Rep. $\times$ truth-seeking"
}
def make_regression_table(res, data, dep_col, name_map, decimals=3):
    tab = table_header
    conf_int = res.conf_int()
    for var in conf_int.index:
        varname = name_map[var]
        coef = res.params[var]
        stderr = res.bse[var]
        t = res.tvalues[var]
        pval = res.pvalues[var]
        #if pval == 0: pval = "$< 0.001$"
        ci_low = conf_int.loc[var][0]
        ci_hi = conf_int.loc[var][1]
        line = f"\t\t{varname} & {coef:1.3f} & {stderr:1.3f} & {t:1.3f} & {pval:1.3f} & {ci_low:1.3f} & {ci_hi:1.3f} \\\\ \n"
        tab += line
        
    tab += "\t\t\\bottomrule \n"
    mean_dep_var = data[dep_col].mean()
    std_dep_var = data[dep_col].std()
    line = f"\t\t\multicolumn{{2}}{{l}}{{R-squared}} & \multicolumn{{1}}{{r}}{res.rsquared:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{Mean dependent var}} & \multicolumn{{2}}{{r}}{mean_dep_var:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Adjusted R-squared}} & \multicolumn{{1}}{{r}}{res.rsquared_adj:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{S.D. dependent var}} & \multicolumn{{2}}{{r}}{std_dep_var:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Model MSE}} & \multicolumn{{1}}{{r}}{res.mse_model:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{AIC}} & \multicolumn{{2}}{{r}}{res.aic:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Sum squared resid}} & \multicolumn{{1}}{{r}}{res.ssr:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{BIC}} & \multicolumn{{2}}{{r}}{res.bic:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Log-likelihood}} & \multicolumn{{1}}{{r}}{res.llf:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{F-statistic}} & \multicolumn{{2}}{{r}}{res.fvalue:1.3f} \\\\ \n"
    tab += line
    dw = durbin_watson(res.resid)
    line = f"\t\t\multicolumn{{2}}{{l}}{{Durbin-Watson stat}} & \multicolumn{{1}}{{r}}{dw:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{Prob(F-statistic)}} & \multicolumn{{2}}{{r}}{res.f_pvalue:1.3f} \\\\ \n"
    tab += line
    tab += table_footer
    
    return tab

In [7]:
tab = make_regression_table(res, subset, "NG_score_mean", name_map)
dst = "../../tables"
fname = "OLS_table_NG_score.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## Prediction

In [8]:
subset = users[users["party"].isin(["Democrat", "Republican"])].copy()
mean_cols = ["followers_count", "tweet_count"]
for col in mean_cols:
    subset[col] = subset[col].mean()

honesty_component_share = np.arange(-0.01, 1.01, 0.01)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})

In [9]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [10]:
dst = "../../data/users"
fname = "OLS_predictions_score.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

## OLS regression cutoff 60

In [11]:
cols = ["NG_unreliable_share", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_unreliable_share ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_unreliable_share,R-squared:,0.216
Model:,OLS,Adj. R-squared:,0.21
Method:,Least Squares,F-statistic:,39.32
Date:,"Fri, 07 Oct 2022",Prob (F-statistic):,5.96e-49
Time:,16:08:10,Log-Likelihood:,1791.2
No. Observations:,1008,AIC:,-3566.0
Df Residuals:,1000,BIC:,-3527.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0114,0.011,1.040,0.298,-0.010,0.033
party[T.Republican],-0.0046,0.008,-0.609,0.543,-0.019,0.010
np.log(followers_count),0.0024,0.001,2.221,0.027,0.000,0.005
np.log(tweet_count),-0.0033,0.002,-1.956,0.051,-0.007,1.01e-05
belief_share,0.0238,0.089,0.266,0.790,-0.152,0.199
party[T.Republican]:belief_share,0.6735,0.129,5.213,0.000,0.420,0.927
truth_share,-0.0940,0.088,-1.062,0.288,-0.268,0.080
party[T.Republican]:truth_share,0.0363,0.117,0.311,0.756,-0.193,0.266

0,1,2,3
Omnibus:,797.254,Durbin-Watson:,1.908
Prob(Omnibus):,0.0,Jarque-Bera (JB):,18828.754
Skew:,3.477,Prob(JB):,0.0
Kurtosis:,22.999,Cond. No.,1730.0


## Variance inflation factor

In [12]:
cols = ["followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols].dropna().copy()
subset["party"] = subset["party"].replace({"Republican":1, "Democrat":0})
# add a constant which is needed according to this post: https://github.com/statsmodels/statsmodels/issues/2376
X = add_constant(subset)
pd.Series([variance_inflation_factor(X.values, i) 
               for i in range(X.shape[1])], 
              index=X.columns)

const              9.267962
followers_count    1.089397
tweet_count        1.198772
belief_share       1.290596
truth_share        1.346980
party              1.073131
dtype: float64

## Prediction

In [13]:
subset = users[users["party"].isin(["Democrat", "Republican"])].copy()
mean_cols = ["followers_count", "tweet_count"]
for col in mean_cols:
    subset[col] = subset[col].mean()

honesty_component_share = np.arange(0, 1.01, 0.01)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})

In [14]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [15]:
dst = "../../data/users"
fname = "OLS_predictions_unreliable_60.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

# Independent list

## OLS regression share

In [16]:
cols = ["independent_unreliable_share", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='independent_unreliable_share ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,independent_unreliable_share,R-squared:,0.332
Model:,OLS,Adj. R-squared:,0.327
Method:,Least Squares,F-statistic:,70.85
Date:,"Fri, 07 Oct 2022",Prob (F-statistic):,3.78e-83
Time:,16:08:17,Log-Likelihood:,1262.9
No. Observations:,1008,AIC:,-2510.0
Df Residuals:,1000,BIC:,-2471.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0332,0.018,1.810,0.071,-0.003,0.069
party[T.Republican],0.0380,0.013,2.992,0.003,0.013,0.063
np.log(followers_count),0.0037,0.002,2.026,0.043,0.000,0.007
np.log(tweet_count),-0.0079,0.003,-2.804,0.005,-0.013,-0.002
belief_share,0.0238,0.151,0.158,0.875,-0.273,0.320
party[T.Republican]:belief_share,0.8099,0.218,3.710,0.000,0.381,1.238
truth_share,-0.0451,0.149,-0.302,0.762,-0.338,0.247
party[T.Republican]:truth_share,0.1335,0.198,0.674,0.500,-0.255,0.522

0,1,2,3
Omnibus:,678.296,Durbin-Watson:,1.948
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12107.138
Skew:,2.829,Prob(JB):,0.0
Kurtosis:,19.008,Cond. No.,1730.0


## OLS regression accuracy score

In [17]:
cols = ["accuracy_mean", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='accuracy_mean ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,accuracy_mean,R-squared:,0.281
Model:,OLS,Adj. R-squared:,0.276
Method:,Least Squares,F-statistic:,55.76
Date:,"Fri, 07 Oct 2022",Prob (F-statistic):,1.98e-67
Time:,16:08:31,Log-Likelihood:,-290.85
No. Observations:,1008,AIC:,597.7
Df Residuals:,1000,BIC:,637.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.1542,0.086,48.412,0.000,3.986,4.323
party[T.Republican],-0.1328,0.059,-2.240,0.025,-0.249,-0.016
np.log(followers_count),-0.0160,0.009,-1.864,0.063,-0.033,0.001
np.log(tweet_count),0.0221,0.013,1.690,0.091,-0.004,0.048
belief_share,-1.5535,0.706,-2.201,0.028,-2.939,-0.168
party[T.Republican]:belief_share,-3.3048,1.020,-3.241,0.001,-5.306,-1.304
truth_share,-0.1466,0.696,-0.211,0.833,-1.513,1.219
party[T.Republican]:truth_share,-0.5527,0.925,-0.597,0.551,-2.369,1.263

0,1,2,3
Omnibus:,114.961,Durbin-Watson:,1.953
Prob(Omnibus):,0.0,Jarque-Bera (JB):,402.558
Skew:,-0.519,Prob(JB):,3.85e-88
Kurtosis:,5.917,Cond. No.,1730.0


In [18]:
tab = make_regression_table(res, subset, "accuracy_mean", name_map)
dst = "../../tables"
fname = "OLS_table_accuracy.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## OLS regression transparency score

In [19]:
cols = ["transparency_mean", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='transparency_mean ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,transparency_mean,R-squared:,0.412
Model:,OLS,Adj. R-squared:,0.408
Method:,Least Squares,F-statistic:,100.1
Date:,"Fri, 07 Oct 2022",Prob (F-statistic):,8.18e-111
Time:,16:08:33,Log-Likelihood:,298.12
No. Observations:,1008,AIC:,-580.2
Df Residuals:,1000,BIC:,-540.9
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.7736,0.048,57.978,0.000,2.680,2.867
party[T.Republican],-0.1278,0.033,-3.867,0.000,-0.193,-0.063
np.log(followers_count),-0.0081,0.005,-1.688,0.092,-0.017,0.001
np.log(tweet_count),0.0254,0.007,3.478,0.001,0.011,0.040
belief_share,-0.5925,0.394,-1.506,0.133,-1.365,0.180
party[T.Republican]:belief_share,-1.9665,0.569,-3.459,0.001,-3.082,-0.851
truth_share,0.1393,0.388,0.359,0.720,-0.622,0.901
party[T.Republican]:truth_share,-0.6766,0.516,-1.311,0.190,-1.689,0.336

0,1,2,3
Omnibus:,223.341,Durbin-Watson:,1.957
Prob(Omnibus):,0.0,Jarque-Bera (JB):,786.399
Skew:,-1.04,Prob(JB):,1.72e-171
Kurtosis:,6.794,Cond. No.,1730.0


In [20]:
tab = make_regression_table(res, subset, "transparency_mean", name_map)
dst = "../../tables"
fname = "OLS_table_transparency.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

# Article texts

## OLS regression Score

In [10]:
src = "../../data/articles"
fname = "articles_text_honesty_13_10_22.csv"
texts = pd.read_csv(join(src, fname))
texts = texts.rename(columns={"Score":"NG_score"})
texts.to_csv(join(src, "full_links_with_text_and_honesty.csv.gzip"), index=False, compression="gzip")

In [11]:
src = "../../data/articles"
fname = "full_links_with_text_and_honesty.csv.gzip"
cols = ["belief", "truth", "other_words", "wc", "belief_prop", "truth_prop",
        "NG_score", "url", "party"]
texts = pd.read_csv(join(src, fname), compression="gzip", usecols=cols)

In [12]:
fname = "article_scores_with_parties.csv.gzip"
cols = ["NG_score", "belief_prop", "truth_prop", "party"]
texts[cols].to_csv(join(src, fname), index=False, compression="gzip")

In [13]:
mod = smf.ols(formula='NG_score ~ belief_prop + truth_prop + party + party * belief_prop + party * truth_prop', data=texts)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score,R-squared:,0.132
Model:,OLS,Adj. R-squared:,0.132
Method:,Least Squares,F-statistic:,4899.0
Date:,"Thu, 13 Oct 2022",Prob (F-statistic):,0.0
Time:,11:32:39,Log-Likelihood:,-627850.0
No. Observations:,160750,AIC:,1256000.0
Df Residuals:,160744,BIC:,1256000.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,94.8067,0.063,1513.853,0.000,94.684,94.929
party[T.Republican],-8.6008,0.101,-85.552,0.000,-8.798,-8.404
belief_prop,-16.3885,9.618,-1.704,0.088,-35.239,2.462
party[T.Republican]:belief_prop,-136.2484,14.876,-9.159,0.000,-165.406,-107.091
truth_prop,10.4743,8.188,1.279,0.201,-5.573,26.522
party[T.Republican]:truth_prop,-109.2503,13.714,-7.967,0.000,-136.129,-82.372

0,1,2,3
Omnibus:,63607.092,Durbin-Watson:,1.165
Prob(Omnibus):,0.0,Jarque-Bera (JB):,364869.037
Skew:,-1.821,Prob(JB):,0.0
Kurtosis:,9.42,Cond. No.,599.0


In [16]:
name_map = {
    "Intercept":"Intercept",
    "party[T.Republican]":"Republican",
    "belief_prop":"belief-speaking",
    "party[T.Republican]:belief_prop":"Rep. $\\times$ belief-speaking",
    "truth_prop":"truth-seeking",
    "party[T.Republican]:truth_prop":"Rep. $\\times$ truth-seeking"
}

tab = make_regression_table(res, texts, "NG_score", name_map)
dst = "../../tables"
fname = "OLS_table_article_NG_score.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## Prediction

In [17]:
honesty_component_share = np.arange(-0.01, 0.21, 0.001)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_prop":honesty_component_share,
    "party":["Democrat"] * N,
    "truth_prop":[texts["truth_prop"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_prop":honesty_component_share,
    "party":["Republican"] * N,
    "truth_prop":[texts["truth_prop"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_prop":honesty_component_share,
    "party":["Democrat"] * N,
    "belief_prop":[texts["belief_prop"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_prop":honesty_component_share,
    "party":["Republican"] * N,
    "belief_prop":[texts["belief_prop"].mean()] * N
})

In [18]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [19]:
dst = "../../data/articles"
fname = "OLS_predictions_articles.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)