In [3]:
# Necessary data cleaning, do not touch.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# import modules and functions

import numpy as np
import pandas as pd
import seaborn as sns
sns.set(rc={'figure.figsize':(15, 8)})

# read data

tweets_data = pd.read_csv('tweetment_effect.csv')

# Drop columns X.1, X, and V1
tweets_data.drop(columns=['X.1', 'X'], inplace=True)

# Rename the column 'treat.f' to 'treatment_arm'
tweets_data.rename(columns={'treat.f': 'treatment_arm'}, inplace=True)

# Drop the 'In_group' column
tweets_data.drop(columns='In_group', inplace=True)

# Drop the 'high_followers' column
tweets_data.drop(columns='high_followers', inplace=True)

# Create a new column 'any_treatment' as a numeric representation of 'treatment_arm != 0'
tweets_data['any_treatment'] = (tweets_data['treatment_arm'] > 0).astype(int)

# Drop the 'racism.scores.post.1wk' and 'racism.scores.post.2wk' columns
tweets_data.drop(columns=['racism.scores.post.1wk', 'racism.scores.post.2wk'], inplace=True)

# Convert 'treatment_arm' column to a factor
tweets_data['treatment_arm'] = tweets_data['treatment_arm'].astype('category')
tweets_data.columns = [col.replace(' ', '_').replace('.', '_').replace('-', '_') for col in tweets_data.columns]

In [35]:
import statsmodels.api as sm
from stargazer.stargazer import Stargazer

tweets_data['anonymity_binary'] = (tweets_data['anonymity']!=0).astype(int)
reg_robust = sm.OLS.from_formula('racism_scores_post_2mon ~ any_treatment', data = tweets_data).fit(cov_type='HC1')
reg_robust_hetero = sm.OLS.from_formula('racism_scores_post_2mon ~ any_treatment*anonymity_binary', data = tweets_data).fit(cov_type='HC1')

result = Stargazer([reg_robust, reg_robust_hetero])
result

0,1,2
,,
,Dependent variable: racism_scores_post_2mon,Dependent variable: racism_scores_post_2mon
,,
,(1),(2)
,,
Intercept,0.252***,0.104***
,(0.063),(0.040)
anonymity_binary,,0.172**
,,(0.083)
any_treatment,-0.083,-0.006


In [36]:
# Clean up the table to make it look nice:

result.dependent_variable_name('Post-treatment Racism (2 months)')
result.custom_columns('OLS w/o controls')
#result.show_confidence_intervals(True)
result.rename_covariates({'Intercept': 'Constant', 'any_treatment':'Treatment', 'anonymity_binary': 'Anonymity', 'any_treatment:anonymity_binary': 'Treatment*Anonymity'})
result.covariate_order(['any_treatment', 'anonymity_binary', 'any_treatment:anonymity_binary', 'Intercept' ])
result.show_degrees_of_freedom(False)
result.show_adj_r2 = False
result.show_f_statistic = False
result.show_residual_std_err = False
result

0,1,2
,,
,Dependent variable: Post-treatment Racism (2 months),Dependent variable: Post-treatment Racism (2 months)
,OLS w/o controls,OLS w/o controls
,(1),(2)
,,
Treatment,-0.083,-0.006
,(0.069),(0.052)
Anonymity,,0.172**
,,(0.083)
Treatment*Anonymity,,-0.092


In [45]:
reg_robust = sm.OLS.from_formula('racism_scores_post_2mon ~ any_treatment', data = tweets_data).fit(cov_type='HC1')
reg_robust_hetero_preracism = sm.OLS.from_formula('racism_scores_post_2mon ~ any_treatment*racism_scores_pre_2mon', data = tweets_data).fit(cov_type='HC1')

result = Stargazer([reg_robust_hetero_preracism])

result.dependent_variable_name('Racism (2 months after)')
result.custom_columns('')
#result.show_confidence_intervals(True)
result.rename_covariates({'Intercept': 'Constant', 'any_treatment':'Treatment', 'racism_scores_pre_2mon': 'Prior Racism', 'any_treatment:racism_scores_pre_2mon': 'Treatment*Prior Racism'})
result.covariate_order(['any_treatment', 'racism_scores_pre_2mon', 'any_treatment:racism_scores_pre_2mon', 'Intercept' ])
result.show_degrees_of_freedom(False)
result.show_adj_r2 = False
result.show_f_statistic = False
result.show_residual_std_err = False
result

0,1
,
,Dependent variable: Racism (2 months after)
,
,(1)
,
Treatment,-0.014
,(0.041)
Prior Racism,0.715***
,(0.135)
Treatment*Prior Racism,0.309
