In [None]:
# author: Jana Lasser & Almog Simchon

In [1]:
import pandas as pd
import numpy as np
from os.path import join
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.stattools import durbin_watson
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
import matplotlib.pyplot as plt

# Tweets

## Data preparation LME regression

In [5]:
src = "../../data/tweets"
fname = "US_politician_tweets_2010-11-06_to_2022-03-16.csv.gzip"
cols = [
    "retweeted", # used to filter out retweets
    "author_id", # data grouping: independent random variable
    "party", # characteristic of author: independent fixed variable
    "avg_belief_score", # fixed variable
    "avg_truth_score", # fixed variable
    "NG_score", # dependent variable
    "accuracy", # dependent variable
    "transparency", # dependent variable
]
tweets = pd.read_csv(
    join(src, fname), 
    dtype={"author_id":str},
    compression="gzip",
    usecols=cols
)

In [6]:
tweets = tweets[tweets["retweeted"] == False] # remove retweets
tweets = tweets.drop(columns=["retweeted"])
tweets = tweets[tweets["party"].isin(["Democrat", "Republican"])] # remove independents
tweets = tweets.dropna() # remove tweets without NG, belief or truth score
len(tweets)

208012

In [7]:
# filter out authors with only a single tweet
tweet_counts = tweets["author_id"]\
    .value_counts()\
    .reset_index()\
    .rename(columns={"index":"author_id", "author_id":"count"})

tweets = tweets[tweets["author_id"].isin(tweet_counts[tweet_counts["count"] > 1]["author_id"])]
len(tweets)

208010

In [8]:
tweets["belief"] = tweets["avg_belief_score"] - tweets["avg_belief_score"].mean()
tweets["truth"] = tweets["avg_truth_score"] - tweets["avg_truth_score"].mean()
tweets["NG"] = tweets["NG_score"] / 100
tweets["accuracy"] = tweets["accuracy"] / 5
tweets["transparency"] = tweets["transparency"] / 3

In [9]:
tweets.to_csv("../../data/tweets_for_lme_modelling.csv", index=False)

## LME models (consistency check with R)

In [13]:
md = smf.mixedlm(
    "NG ~ 1 + belief * truth + belief * truth * party",
    tweets, 
    groups=tweets["author_id"],
    re_formula="~belief * truth"
)
mdf_NG = md.fit(method=["lbfgs"], maxiter=30000)
print(mdf_NG.summary())



                   Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       NG         
No. Observations:        208010        Method:                   REML       
No. Groups:              1012          Scale:                    0.0128     
Min. group size:         2             Log-Likelihood:           156342.2907
Max. group size:         1909          Converged:                Yes        
Mean group size:         205.5                                              
----------------------------------------------------------------------------
                                 Coef.  Std.Err.    z    P>|z| [0.025 0.975]
----------------------------------------------------------------------------
Intercept                         0.949    0.002 522.700 0.000  0.946  0.953
party[T.Republican]              -0.081    0.003 -30.618 0.000 -0.086 -0.076
belief                           -0.006    0.007  -0.908 0.364 -0.020  0.007
belief:party[T.Repu

In [23]:
md = smf.mixedlm(
    "accuracy ~ 1 + belief * truth + belief * truth * party",
    tweets, 
    groups=tweets["author_id"],
    re_formula="~belief * truth"
)
mdf_acc = md.fit(method=["lbfgs"], maxiter=30000)
print(mdf_acc.summary())



                   Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       accuracy   
No. Observations:        208010        Method:                   REML       
No. Groups:              1012          Scale:                    0.0219     
Min. group size:         2             Log-Likelihood:           100085.0345
Max. group size:         1909          Converged:                Yes        
Mean group size:         205.5                                              
----------------------------------------------------------------------------
                                 Coef.  Std.Err.    z    P>|z| [0.025 0.975]
----------------------------------------------------------------------------
Intercept                         0.815    0.003 317.532 0.000  0.810  0.820
party[T.Republican]              -0.064    0.004 -17.147 0.000 -0.071 -0.057
belief                           -0.116    0.009 -12.312 0.000 -0.134 -0.097
belief:party[T.Repu

In [24]:
md = smf.mixedlm(
    "transparency ~ 1 + belief * truth + belief * truth * party",
    tweets, 
    groups=tweets["author_id"],
    re_formula="~belief * truth"
)
mdf_trans = md.fit(method=["lbfgs"], maxiter=30000)
print(mdf_trans.summary())



                   Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       transparency
No. Observations:       208010        Method:                   REML        
No. Groups:             1012          Scale:                    0.0220      
Min. group size:        2             Log-Likelihood:           99565.2910  
Max. group size:        1909          Converged:                Yes         
Mean group size:        205.5                                               
----------------------------------------------------------------------------
                                 Coef.  Std.Err.    z    P>|z| [0.025 0.975]
----------------------------------------------------------------------------
Intercept                         0.960    0.002 396.748 0.000  0.955  0.965
party[T.Republican]              -0.089    0.004 -25.216 0.000 -0.096 -0.082
belief                           -0.058    0.009  -6.332 0.000 -0.076 -0.040
belief:party[T.Repu

# Articles

## Regression table functionality

In [25]:
table_header = '''
\\begin{table}[]
    \\footnotesize
    \\centering
    \\begin{tabular}{l|c|c|c|c|c|c}
            & coef. & std. err. & $t$ & $P>\\vert t \\vert$ & [0.025 & 0.975] \\\\
            \\toprule
'''
table_footer = '''
            \\bottomrule
    \end{tabular}
    \caption{TODO}
    \label{tab:TODO}
\end{table}
'''

def make_regression_table(res, data, dep_col, name_map, decimals=3):
    tab = table_header
    conf_int = res.conf_int()
    for var in conf_int.index:
        varname = name_map[var]
        coef = res.params[var]
        stderr = res.bse[var]
        t = res.tvalues[var]
        pval = res.pvalues[var]
        #if pval == 0: pval = "$< 0.001$"
        ci_low = conf_int.loc[var][0]
        ci_hi = conf_int.loc[var][1]
        line = f"\t\t{varname} & {coef:1.3f} & {stderr:1.3f} & {t:1.3f} & {pval:1.3f} & {ci_low:1.3f} & {ci_hi:1.3f} \\\\ \n"
        tab += line
        
    tab += "\t\t\\bottomrule \n"
    mean_dep_var = data[dep_col].mean()
    std_dep_var = data[dep_col].std()
    line = f"\t\t\multicolumn{{2}}{{l}}{{R-squared}} & \multicolumn{{1}}{{r}}{res.rsquared:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{Mean dependent var}} & \multicolumn{{2}}{{r}}{mean_dep_var:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Adjusted R-squared}} & \multicolumn{{1}}{{r}}{res.rsquared_adj:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{S.D. dependent var}} & \multicolumn{{2}}{{r}}{std_dep_var:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Model MSE}} & \multicolumn{{1}}{{r}}{res.mse_model:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{AIC}} & \multicolumn{{2}}{{r}}{res.aic:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Sum squared resid}} & \multicolumn{{1}}{{r}}{res.ssr:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{BIC}} & \multicolumn{{2}}{{r}}{res.bic:1.3f} \\\\ \n"
    tab += line
    line = f"\t\t\multicolumn{{2}}{{l}}{{Log-likelihood}} & \multicolumn{{1}}{{r}}{res.llf:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{F-statistic}} & \multicolumn{{2}}{{r}}{res.fvalue:1.3f} \\\\ \n"
    tab += line
    dw = durbin_watson(res.resid)
    line = f"\t\t\multicolumn{{2}}{{l}}{{Durbin-Watson stat}} & \multicolumn{{1}}{{r}}{dw:1.3f} & "
    line += f"\multicolumn{{2}}{{l}}{{Prob(F-statistic)}} & \multicolumn{{2}}{{r}}{res.f_pvalue:1.3f} \\\\ \n"
    tab += line
    tab += table_footer
    
    return tab

## OLS regression NewsGuard score on belief & truth similarity

In [26]:
src = "../../data/articles"
fname = "article_scores_with_parties.csv.gzip"
texts = pd.read_csv(join(src, fname), compression="gzip")

In [27]:
texts["NG_score"] = texts["NG_score"] / 100
texts["avg_belief_score"] = texts["avg_belief_score"] - texts["avg_belief_score"].mean()
texts["avg_truth_score"] = texts["avg_truth_score"] - texts["avg_truth_score"].mean()

In [28]:
mod = smf.ols(formula='NG_score ~ avg_belief_score + avg_truth_score + party + party * avg_belief_score + party * avg_truth_score', data=texts)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score,R-squared:,0.122
Model:,OLS,Adj. R-squared:,0.122
Method:,Least Squares,F-statistic:,3491.0
Date:,"Tue, 08 Nov 2022",Prob (F-statistic):,0.0
Time:,22:20:38,Log-Likelihood:,85056.0
No. Observations:,125595,AIC:,-170100.0
Df Residuals:,125589,BIC:,-170000.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9438,0.000,2110.749,0.000,0.943,0.945
party[T.Republican],-0.0907,0.001,-127.537,0.000,-0.092,-0.089
avg_belief_score,-0.0081,0.008,-0.988,0.323,-0.024,0.008
party[T.Republican]:avg_belief_score,-0.3122,0.013,-24.323,0.000,-0.337,-0.287
avg_truth_score,0.0096,0.009,1.086,0.278,-0.008,0.027
party[T.Republican]:avg_truth_score,0.1587,0.014,11.453,0.000,0.132,0.186

0,1,2,3
Omnibus:,50139.899,Durbin-Watson:,1.366
Prob(Omnibus):,0.0,Jarque-Bera (JB):,286519.574
Skew:,-1.842,Prob(JB):,0.0
Kurtosis:,9.418,Cond. No.,60.9


In [29]:
name_map = {
    "Intercept":"Intercept",
    "party[T.Republican]":"Republican",
    "avg_belief_score":"$D_b$",
    "party[T.Republican]:avg_belief_score":"Republican $\\times$ $D_b$",
    "avg_truth_score":"$D_t$",
    "party[T.Republican]:avg_truth_score":"Rep. $\\times$ $D_t$"
}

tab = make_regression_table(res, texts, "NG_score", name_map)
dst = "../../tables"
fname = "OLS_table_article_NG_score.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## Prediction

In [30]:
honesty_component_score = np.arange(-1.0, 0.4, 0.01)
N = len(honesty_component_score)
belief_pred_df_dem = pd.DataFrame({
    "avg_belief_score":honesty_component_score,
    "party":["Democrat"] * N,
    "avg_truth_score":[texts["avg_truth_score"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "avg_belief_score":honesty_component_score,
    "party":["Republican"] * N,
    "avg_truth_score":[texts["avg_truth_score"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "avg_truth_score":honesty_component_score,
    "party":["Democrat"] * N,
    "avg_belief_score":[texts["avg_belief_score"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "avg_truth_score":honesty_component_score,
    "party":["Republican"] * N,
    "avg_belief_score":[texts["avg_belief_score"].mean()] * N
})

In [31]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [32]:
for df in [belief_pred_dem, belief_pred_rep]:
    df["belief"] = honesty_component_score
    df["truth"] = np.nan
    df["honesty_component"] = "belief"
    df.rename(columns={"mean":"NG", "mean_ci_lower":"ymin", "mean_ci_upper":"ymax"}, inplace=True)
for df in [truth_pred_dem, truth_pred_rep]:
    df["belief"] = np.nan
    df["truth"] = honesty_component_score
    df["honesty_component"] = "truth"
    df.rename(columns={"mean":"NG", "mean_ci_lower":"ymin", "mean_ci_upper":"ymax"}, inplace=True)

In [33]:
dst = "../../data/articles"
fname = "OLS_predictions_articles_honesty.csv"
cols = ["NG", "belief", "truth", "ymin", "ymax", "party", "honesty_component"]
predictions = pd.concat([
    belief_pred_dem[cols],
    belief_pred_rep[cols],
    truth_pred_dem[cols],
    truth_pred_rep[cols]
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

# Code graveyard

## OLS regression NewsGuard score on belief & truth score + emotions

In [77]:
cols = ["NG_score_mean", "accuracy_mean", "transparency_mean",
        "party", "followers_count", "tweet_count", "avg_belief_score",
        "avg_truth_score", "LIWC_emo_pos_mean", "LIWC_emo_neg_mean"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]

In [80]:
subset[["avg_belief_score", "LIWC_emo_neg_mean"]].corr()

Unnamed: 0,avg_belief_score,LIWC_emo_neg_mean
avg_belief_score,1.0,0.476722
LIWC_emo_neg_mean,0.476722,1.0


In [81]:
subset[["avg_belief_score", "LIWC_emo_pos_mean"]].corr()

Unnamed: 0,avg_belief_score,LIWC_emo_pos_mean
avg_belief_score,1.0,-0.227538
LIWC_emo_pos_mean,-0.227538,1.0


In [82]:
subset[["avg_truth_score", "LIWC_emo_neg_mean"]].corr()

Unnamed: 0,avg_truth_score,LIWC_emo_neg_mean
avg_truth_score,1.0,0.360038
LIWC_emo_neg_mean,0.360038,1.0


In [83]:
subset[["avg_truth_score", "LIWC_emo_pos_mean"]].corr()

Unnamed: 0,avg_truth_score,LIWC_emo_pos_mean
avg_truth_score,1.0,-0.31507
LIWC_emo_pos_mean,-0.31507,1.0


In [84]:
# only belief-speaking and truth seeking
formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + avg_belief_score + avg_truth_score + party + party * avg_belief_score + party * avg_truth_score'

# only negative and positive emotions
#formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + LIWC_emo_neg_mean + LIWC_emo_pos_mean + party + party * LIWC_emo_neg_mean + party * LIWC_emo_pos_mean'

# belief-speaking & truth-seeking and negative and positive emotions
#formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + LIWC_emo_neg_mean + LIWC_emo_pos_mean + avg_belief_score + avg_truth_score + party + party * avg_belief_score + party * avg_truth_score + party * LIWC_emo_neg_mean + party * LIWC_emo_pos_mean'

mod = smf.ols(formula, data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score_mean,R-squared:,0.529
Model:,OLS,Adj. R-squared:,0.526
Method:,Least Squares,F-statistic:,160.4
Date:,"Thu, 20 Oct 2022",Prob (F-statistic):,1.2299999999999999e-158
Time:,14:16:02,Log-Likelihood:,-2752.0
No. Observations:,1008,AIC:,5520.0
Df Residuals:,1000,BIC:,5559.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,89.1562,1.635,54.515,0.000,85.947,92.365
party[T.Republican],8.7359,2.205,3.962,0.000,4.409,13.062
np.log(followers_count),-0.3007,0.097,-3.102,0.002,-0.491,-0.110
np.log(tweet_count),0.1672,0.155,1.078,0.281,-0.137,0.472
avg_belief_score,9.0436,6.625,1.365,0.173,-3.957,22.045
party[T.Republican]:avg_belief_score,-107.2434,9.731,-11.021,0.000,-126.338,-88.148
avg_truth_score,3.5243,6.562,0.537,0.591,-9.353,16.401
party[T.Republican]:avg_truth_score,87.3222,9.598,9.098,0.000,68.487,106.158

0,1,2,3
Omnibus:,216.063,Durbin-Watson:,1.983
Prob(Omnibus):,0.0,Jarque-Bera (JB):,925.973
Skew:,-0.943,Prob(JB):,8.46e-202
Kurtosis:,7.3,Cond. No.,1700.0


## OLS regression NewsGuard score on belief & truth share

In [27]:
cols = ["NG_score_mean", "accuracy_mean", "transparency_mean",
        "party", "followers_count", "tweet_count", "belief_share", "truth_share"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]

In [28]:
cols = ["NG_score_mean", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score_mean,R-squared:,0.464
Model:,OLS,Adj. R-squared:,0.46
Method:,Least Squares,F-statistic:,123.4
Date:,"Mon, 17 Oct 2022",Prob (F-statistic):,1.47e-130
Time:,10:38:25,Log-Likelihood:,-2817.5
No. Observations:,1008,AIC:,5651.0
Df Residuals:,1000,BIC:,5690.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,90.8089,1.061,85.627,0.000,88.728,92.890
party[T.Republican],-2.8889,0.726,-3.981,0.000,-4.313,-1.465
np.log(followers_count),-0.1345,0.106,-1.275,0.203,-0.342,0.073
np.log(tweet_count),0.5137,0.162,3.172,0.002,0.196,0.832
belief_share,4.0868,8.658,0.472,0.637,-12.902,21.076
party[T.Republican]:belief_share,-43.0604,12.501,-3.444,0.001,-67.593,-18.528
truth_share,8.8806,8.558,1.038,0.300,-7.912,25.673
party[T.Republican]:truth_share,-30.0959,11.311,-2.661,0.008,-52.292,-7.900

0,1,2,3
Omnibus:,221.329,Durbin-Watson:,1.949
Prob(Omnibus):,0.0,Jarque-Bera (JB):,859.008
Skew:,-0.999,Prob(JB):,2.94e-187
Kurtosis:,7.057,Cond. No.,1730.0


## Prediction belief & truth share

In [8]:
subset = users[users["party"].isin(["Democrat", "Republican"])].copy()
mean_cols = ["followers_count", "tweet_count"]
for col in mean_cols:
    subset[col] = subset[col].mean()

honesty_component_share = np.arange(-0.01, 1.01, 0.01)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})

In [9]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [10]:
dst = "../../data/users"
fname = "OLS_predictions_score.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

## OLS regression cutoff 60

In [11]:
cols = ["NG_unreliable_share", "followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_unreliable_share ~ np.log(followers_count) + np.log(tweet_count) + belief_share + truth_share + party + party * belief_share + party * truth_share', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_unreliable_share,R-squared:,0.216
Model:,OLS,Adj. R-squared:,0.21
Method:,Least Squares,F-statistic:,39.32
Date:,"Fri, 07 Oct 2022",Prob (F-statistic):,5.96e-49
Time:,16:08:10,Log-Likelihood:,1791.2
No. Observations:,1008,AIC:,-3566.0
Df Residuals:,1000,BIC:,-3527.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0114,0.011,1.040,0.298,-0.010,0.033
party[T.Republican],-0.0046,0.008,-0.609,0.543,-0.019,0.010
np.log(followers_count),0.0024,0.001,2.221,0.027,0.000,0.005
np.log(tweet_count),-0.0033,0.002,-1.956,0.051,-0.007,1.01e-05
belief_share,0.0238,0.089,0.266,0.790,-0.152,0.199
party[T.Republican]:belief_share,0.6735,0.129,5.213,0.000,0.420,0.927
truth_share,-0.0940,0.088,-1.062,0.288,-0.268,0.080
party[T.Republican]:truth_share,0.0363,0.117,0.311,0.756,-0.193,0.266

0,1,2,3
Omnibus:,797.254,Durbin-Watson:,1.908
Prob(Omnibus):,0.0,Jarque-Bera (JB):,18828.754
Skew:,3.477,Prob(JB):,0.0
Kurtosis:,22.999,Cond. No.,1730.0


## Variance inflation factor

In [12]:
cols = ["followers_count", "tweet_count", "belief_share", "truth_share", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols].dropna().copy()
subset["party"] = subset["party"].replace({"Republican":1, "Democrat":0})
# add a constant which is needed according to this post: https://github.com/statsmodels/statsmodels/issues/2376
X = add_constant(subset)
pd.Series([variance_inflation_factor(X.values, i) 
               for i in range(X.shape[1])], 
              index=X.columns)

const              9.267962
followers_count    1.089397
tweet_count        1.198772
belief_share       1.290596
truth_share        1.346980
party              1.073131
dtype: float64

## Prediction

In [13]:
subset = users[users["party"].isin(["Democrat", "Republican"])].copy()
mean_cols = ["followers_count", "tweet_count"]
for col in mean_cols:
    subset[col] = subset[col].mean()

honesty_component_share = np.arange(0, 1.01, 0.01)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "truth_share":[subset["truth_share"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_share":honesty_component_share,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "belief_share":[subset["belief_share"].mean()] * N
})

In [14]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [15]:
dst = "../../data/users"
fname = "OLS_predictions_unreliable_60.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

## OLS regression NewsGuard score on belief & truth proportion

In [10]:
src = "../../data/articles"
fname = "articles_text_honesty_13_10_22.csv"
texts = pd.read_csv(join(src, fname))
texts = texts.rename(columns={"Score":"NG_score"})
texts.to_csv(join(src, "full_links_with_text_and_honesty.csv.gzip"), index=False, compression="gzip")

In [24]:
src = "../../data/articles"
fname = "full_links_with_text_and_honesty.csv.gzip"
cols = ["belief", "truth", "other_words", "wc", "belief_prop", "truth_prop",
        "NG_score", "url", "party"]
texts = pd.read_csv(join(src, fname), compression="gzip", usecols=cols)

In [12]:
fname = "article_scores_with_parties.csv.gzip"
cols = ["NG_score", "belief_prop", "truth_prop", "party"]
texts[cols].to_csv(join(src, fname), index=False, compression="gzip")

In [13]:
mod = smf.ols(formula='NG_score ~ belief_prop + truth_prop + party + party * belief_prop + party * truth_prop', data=texts)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score,R-squared:,0.132
Model:,OLS,Adj. R-squared:,0.132
Method:,Least Squares,F-statistic:,4899.0
Date:,"Thu, 13 Oct 2022",Prob (F-statistic):,0.0
Time:,11:32:39,Log-Likelihood:,-627850.0
No. Observations:,160750,AIC:,1256000.0
Df Residuals:,160744,BIC:,1256000.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,94.8067,0.063,1513.853,0.000,94.684,94.929
party[T.Republican],-8.6008,0.101,-85.552,0.000,-8.798,-8.404
belief_prop,-16.3885,9.618,-1.704,0.088,-35.239,2.462
party[T.Republican]:belief_prop,-136.2484,14.876,-9.159,0.000,-165.406,-107.091
truth_prop,10.4743,8.188,1.279,0.201,-5.573,26.522
party[T.Republican]:truth_prop,-109.2503,13.714,-7.967,0.000,-136.129,-82.372

0,1,2,3
Omnibus:,63607.092,Durbin-Watson:,1.165
Prob(Omnibus):,0.0,Jarque-Bera (JB):,364869.037
Skew:,-1.821,Prob(JB):,0.0
Kurtosis:,9.42,Cond. No.,599.0


In [16]:
name_map = {
    "Intercept":"Intercept",
    "party[T.Republican]":"Republican",
    "belief_prop":"belief-speaking",
    "party[T.Republican]:belief_prop":"Rep. $\\times$ belief-speaking",
    "truth_prop":"truth-seeking",
    "party[T.Republican]:truth_prop":"Rep. $\\times$ truth-seeking"
}

tab = make_regression_table(res, texts, "NG_score", name_map)
dst = "../../tables"
fname = "OLS_table_article_NG_score.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## Prediction belief & truth proportion

In [17]:
honesty_component_share = np.arange(-0.01, 0.21, 0.001)
N = len(honesty_component_share)
belief_pred_df_dem = pd.DataFrame({
    "belief_prop":honesty_component_share,
    "party":["Democrat"] * N,
    "truth_prop":[texts["truth_prop"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "belief_prop":honesty_component_share,
    "party":["Republican"] * N,
    "truth_prop":[texts["truth_prop"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "truth_prop":honesty_component_share,
    "party":["Democrat"] * N,
    "belief_prop":[texts["belief_prop"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "truth_prop":honesty_component_share,
    "party":["Republican"] * N,
    "belief_prop":[texts["belief_prop"].mean()] * N
})

In [18]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [19]:
dst = "../../data/articles"
fname = "OLS_predictions_articles.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

# Code graveyard

## OLS regression NewsGuard score on belief & truth similarity

In [52]:
src = "../../data/users"
fname = "US_politician_accounts_2010-11-06_to_2022-03-16.csv"
users = pd.read_csv(join(src, fname), dtype={"author_id":str})

In [48]:
cols = ["NG_score_mean", "followers_count", "tweet_count", 
        "avg_belief_score", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + avg_belief_score + party + party * avg_belief_score', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score_mean,R-squared:,0.456
Model:,OLS,Adj. R-squared:,0.453
Method:,Least Squares,F-statistic:,167.7
Date:,"Mon, 31 Oct 2022",Prob (F-statistic):,1.31e-129
Time:,15:25:41,Log-Likelihood:,-2824.9
No. Observations:,1008,AIC:,5662.0
Df Residuals:,1002,BIC:,5691.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,87.6555,1.746,50.204,0.000,84.229,91.082
party[T.Republican],7.0807,2.363,2.996,0.003,2.443,11.719
np.log(followers_count),-0.2590,0.104,-2.500,0.013,-0.462,-0.056
np.log(tweet_count),0.5010,0.161,3.112,0.002,0.185,0.817
avg_belief_score,9.2735,3.096,2.995,0.003,3.198,15.349
party[T.Republican]:avg_belief_score,-25.4656,4.217,-6.039,0.000,-33.741,-17.190

0,1,2,3
Omnibus:,252.204,Durbin-Watson:,1.973
Prob(Omnibus):,0.0,Jarque-Bera (JB):,995.895
Skew:,-1.138,Prob(JB):,5.55e-217
Kurtosis:,7.304,Cond. No.,580.0


In [49]:
cols = ["NG_score_mean", "followers_count", "tweet_count", 
        "avg_truth_score", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + avg_truth_score + party + party * avg_truth_score', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score_mean,R-squared:,0.44
Model:,OLS,Adj. R-squared:,0.438
Method:,Least Squares,F-statistic:,157.7
Date:,"Mon, 31 Oct 2022",Prob (F-statistic):,1.25e-123
Time:,15:26:14,Log-Likelihood:,-2838.8
No. Observations:,1008,AIC:,5690.0
Df Residuals:,1002,BIC:,5719.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,89.0843,1.611,55.309,0.000,85.924,92.245
party[T.Republican],-2.0764,2.150,-0.966,0.335,-6.296,2.144
np.log(followers_count),-0.3275,0.104,-3.157,0.002,-0.531,-0.124
np.log(tweet_count),0.4378,0.166,2.643,0.008,0.113,0.763
avg_truth_score,9.7876,3.116,3.141,0.002,3.672,15.903
party[T.Republican]:avg_truth_score,-9.5301,4.219,-2.259,0.024,-17.808,-1.252

0,1,2,3
Omnibus:,245.483,Durbin-Watson:,1.954
Prob(Omnibus):,0.0,Jarque-Bera (JB):,887.842
Skew:,-1.136,Prob(JB):,1.61e-193
Kurtosis:,6.997,Cond. No.,559.0


In [44]:
cols = ["NG_score_mean", "followers_count", "tweet_count", 
        "avg_belief_score", "avg_truth_score", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='NG_score_mean ~ np.log(followers_count) + np.log(tweet_count) + avg_belief_score + avg_truth_score + party + party * avg_belief_score + party * avg_truth_score', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,NG_score_mean,R-squared:,0.529
Model:,OLS,Adj. R-squared:,0.526
Method:,Least Squares,F-statistic:,160.4
Date:,"Thu, 27 Oct 2022",Prob (F-statistic):,1.2299999999999999e-158
Time:,19:27:02,Log-Likelihood:,-2752.0
No. Observations:,1008,AIC:,5520.0
Df Residuals:,1000,BIC:,5559.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,89.1562,1.635,54.515,0.000,85.947,92.365
party[T.Republican],8.7359,2.205,3.962,0.000,4.409,13.062
np.log(followers_count),-0.3007,0.097,-3.102,0.002,-0.491,-0.110
np.log(tweet_count),0.1672,0.155,1.078,0.281,-0.137,0.472
avg_belief_score,9.0436,6.625,1.365,0.173,-3.957,22.045
party[T.Republican]:avg_belief_score,-107.2434,9.731,-11.021,0.000,-126.338,-88.148
avg_truth_score,3.5243,6.562,0.537,0.591,-9.353,16.401
party[T.Republican]:avg_truth_score,87.3222,9.598,9.098,0.000,68.487,106.158

0,1,2,3
Omnibus:,216.063,Durbin-Watson:,1.983
Prob(Omnibus):,0.0,Jarque-Bera (JB):,925.973
Skew:,-0.943,Prob(JB):,8.46e-202
Kurtosis:,7.3,Cond. No.,1700.0


## Variance inflation factor

In [39]:
cols = ["followers_count", "tweet_count", "avg_belief_score", "avg_truth_score", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols].dropna().copy()
subset["party"] = subset["party"].replace({"Republican":1, "Democrat":0})
# add a constant which is needed according to this post: https://github.com/statsmodels/statsmodels/issues/2376
X = add_constant(subset)
pd.Series([variance_inflation_factor(X.values, i) 
               for i in range(X.shape[1])], 
              index=X.columns)

const               93.329365
followers_count      1.095096
tweet_count          1.154547
avg_belief_score     6.644917
avg_truth_score      6.811031
party                1.094327
dtype: float64

## Prediction

In [45]:
subset = users[users["party"].isin(["Democrat", "Republican"])].copy()
mean_cols = ["followers_count", "tweet_count"]
for col in mean_cols:
    subset[col] = subset[col].mean()

honesty_component_score = np.arange(-0.01, 1.01, 0.01)
N = len(honesty_component_score)
belief_pred_df_dem = pd.DataFrame({
    "avg_belief_score":honesty_component_score,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "avg_truth_score":[subset["avg_truth_score"].mean()] * N
})
belief_pred_df_rep = pd.DataFrame({
    "avg_belief_score":honesty_component_score,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "avg_truth_score":[subset["avg_truth_score"].mean()] * N
})
truth_pred_df_dem = pd.DataFrame({
    "avg_truth_score":honesty_component_score,
    "party":["Democrat"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "avg_belief_score":[subset["avg_belief_score"].mean()] * N
})
truth_pred_df_rep = pd.DataFrame({
    "avg_truth_score":honesty_component_score,
    "party":["Republican"] * N,
    "followers_count":[subset["followers_count"].mean()] * N,
    "tweet_count":[subset["tweet_count"].mean()] * N,
    "avg_belief_score":[subset["avg_belief_score"].mean()] * N
})

In [46]:
belief_pred_dem = res.get_prediction(belief_pred_df_dem)
belief_pred_rep = res.get_prediction(belief_pred_df_rep)
truth_pred_dem = res.get_prediction(truth_pred_df_dem)
truth_pred_rep = res.get_prediction(truth_pred_df_rep)

belief_pred_dem = belief_pred_dem.summary_frame(alpha=0.05)
belief_pred_dem["party"] = "Democrat"
belief_pred_dem["honesty_component"] = "belief"
belief_pred_rep = belief_pred_rep.summary_frame(alpha=0.05)
belief_pred_rep["party"] = "Republican"
belief_pred_rep["honesty_component"] = "belief"
truth_pred_dem = truth_pred_dem.summary_frame(alpha=0.05)
truth_pred_dem["party"] = "Democrat"
truth_pred_dem["honesty_component"] = "truth"
truth_pred_rep = truth_pred_rep.summary_frame(alpha=0.05)
truth_pred_rep["party"] = "Republican"
truth_pred_rep["honesty_component"] = "truth"

In [47]:
dst = "../../data/users"
fname = "OLS_predictions_score_honesty.csv"
predictions = pd.concat([
    belief_pred_dem,
    belief_pred_rep,
    truth_pred_dem,
    truth_pred_rep
]).reset_index(drop=True)
predictions.to_csv(join(dst, fname), index=False)

In [33]:
name_map = {
    "Intercept":"Intercept",
    "party[T.Republican]":"Republican",
    "np.log(followers_count)":"$\log(N_f)$",
    "np.log(tweet_count)":"$\log(N_t)$",
    "avg_belief_score":"$\left<D_b\\right>$",
    "party[T.Republican]:avg_belief_score":"Republican $\\times$ $\left<D_b\\right>$",
    "avg_truth_score":"$\left<D_t\\right>$",
    "party[T.Republican]:avg_truth_score":"Republican $\\times$ $\left<D_t\\right>$"
}

In [49]:
tab = make_regression_table(res, subset, "NG_score_mean", name_map)
dst = "../../tables"
fname = "OLS_table_NG_score.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## OLS regression accuracy score

In [35]:
cols = ["accuracy_mean", "followers_count", "tweet_count", "avg_belief_score", "avg_truth_score", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='accuracy_mean ~ np.log(followers_count) + np.log(tweet_count) + avg_belief_score + avg_truth_score + party + party * avg_belief_score + party * avg_truth_score', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,accuracy_mean,R-squared:,0.384
Model:,OLS,Adj. R-squared:,0.38
Method:,Least Squares,F-statistic:,89.0
Date:,"Fri, 28 Oct 2022",Prob (F-statistic):,1.0900000000000001e-100
Time:,14:56:22,Log-Likelihood:,-212.87
No. Observations:,1008,AIC:,441.7
Df Residuals:,1000,BIC:,481.1
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.7227,0.131,36.158,0.000,4.466,4.979
party[T.Republican],0.0061,0.177,0.034,0.973,-0.342,0.354
np.log(followers_count),-0.0233,0.008,-2.998,0.003,-0.039,-0.008
np.log(tweet_count),-0.0114,0.012,-0.922,0.357,-0.036,0.013
avg_belief_score,-2.8390,0.532,-5.340,0.000,-3.882,-1.796
party[T.Republican]:avg_belief_score,-5.8167,0.784,-7.420,0.000,-7.355,-4.278
avg_truth_score,2.5290,0.527,4.795,0.000,1.494,3.564
party[T.Republican]:avg_truth_score,5.6927,0.775,7.349,0.000,4.173,7.213

0,1,2,3
Omnibus:,219.166,Durbin-Watson:,1.998
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1549.123
Skew:,-0.801,Prob(JB):,0.0
Kurtosis:,8.858,Cond. No.,1700.0


In [36]:
tab = make_regression_table(res, subset, "accuracy_mean", name_map)
dst = "../../tables"
fname = "OLS_table_accuracy.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)

## OLS regression transparency score

In [37]:
cols = ["transparency_mean", "followers_count", "tweet_count", "avg_belief_score",
        "avg_truth_score", "party"]
subset = users[users["party"].isin(["Democrat", "Republican"])][cols]
mod = smf.ols(formula='transparency_mean ~ np.log(followers_count) + np.log(tweet_count) + avg_belief_score + avg_truth_score + party + party * avg_belief_score + party * avg_truth_score', data=subset)
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,transparency_mean,R-squared:,0.491
Model:,OLS,Adj. R-squared:,0.487
Method:,Least Squares,F-statistic:,137.8
Date:,"Fri, 28 Oct 2022",Prob (F-statistic):,6.51e-142
Time:,14:56:23,Log-Likelihood:,370.74
No. Observations:,1008,AIC:,-725.5
Df Residuals:,1000,BIC:,-686.2
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.9567,0.073,40.389,0.000,2.813,3.100
party[T.Republican],0.0879,0.099,0.884,0.377,-0.107,0.283
np.log(followers_count),-0.0140,0.004,-3.201,0.001,-0.023,-0.005
np.log(tweet_count),0.0070,0.007,1.013,0.311,-0.007,0.021
avg_belief_score,-0.9482,0.298,-3.182,0.002,-1.533,-0.363
party[T.Republican]:avg_belief_score,-3.9030,0.439,-8.884,0.000,-4.765,-3.041
avg_truth_score,1.0542,0.296,3.566,0.000,0.474,1.634
party[T.Republican]:avg_truth_score,3.5803,0.434,8.246,0.000,2.728,4.432

0,1,2,3
Omnibus:,337.869,Durbin-Watson:,2.014
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2449.635
Skew:,-1.341,Prob(JB):,0.0
Kurtosis:,10.15,Cond. No.,1700.0


In [38]:
tab = make_regression_table(res, subset, "transparency_mean", name_map)
dst = "../../tables"
fname = "OLS_table_transparency.txt"
with open(join(dst, fname), "w") as f:
    f.write(tab)