In [22]:
import pandas as pd
import pyfixest as pf
from stargazer.stargazer import Stargazer
import numpy as np



# Analyze the experiment with an across subjects design:

The observations are unique at a headline and subject level, where the subject is denoted as 'responseid' and the headline is denoted 'question'. The 'treatment' variable denotes whether the headline was presented as being generated by an AI.

In [23]:

# Reading the CSV file (replace 'your_path/data/lucid_across_byq.csv' with the actual path)
data = pd.read_csv('lucid_across_byq.csv')
data.head()

Unnamed: 0,responseid,question,date_num,startdate,enddate,recordeddate,finished,severe,risk_1,risk_2,...,sum_response_ctr,mean_correct_q,sum_correct_ctr,sum_answers_ctr,share_correct_jack,mean_val_jack,concern_numeric,severe_numeric,value_alt,waves_bin
0,R_01Dp1WwOlFUK8FP,f1,6,5/2/20 11:47,5/2/20 11:59,5/2/20 11:59,True,5,50.0,50.0,...,1007,0.674481,1007,1493,0.674263,0.674263,5,5,0,6
1,R_01Dp1WwOlFUK8FP,f10,6,5/2/20 11:47,5/2/20 11:59,5/2/20 11:59,True,5,50.0,50.0,...,883,0.591427,883,1493,0.591823,0.589812,5,5,-2,6
2,R_01Dp1WwOlFUK8FP,f11,6,5/2/20 11:47,5/2/20 11:59,5/2/20 11:59,True,5,50.0,50.0,...,1008,0.675151,1008,1493,0.674933,0.674933,5,5,0,6
3,R_01Dp1WwOlFUK8FP,f12,6,5/2/20 11:47,5/2/20 11:59,5/2/20 11:59,True,5,50.0,50.0,...,694,0.464836,694,1493,0.465147,0.462466,5,5,-3,6
4,R_01Dp1WwOlFUK8FP,f13,6,5/2/20 11:47,5/2/20 11:59,5/2/20 11:59,True,5,50.0,50.0,...,750,0.502344,750,1493,0.502011,0.502011,5,5,0,6


In [28]:

# Fixed effects models using pyfixest
reg_basic_no_cluster_se = pf.feols("value ~ treatment", data = data, vcov = 'hetero')
reg_basic = pf.feols("value ~ treatment", data = data, vcov = {'CRV1':'responseid'})
reg_resp_qfe = pf.feols("value ~ treatment | question", data = data, vcov = {'CRV1':'responseid'})

pf.etable([reg_basic_no_cluster_se, reg_basic, reg_resp_qfe])


                           est1               est2               est3
------------  -----------------  -----------------  -----------------
depvar                    value              value              value
---------------------------------------------------------------------
Intercept      2.601*** (0.004)   2.601*** (0.010)
treatment     -0.076*** (0.006)  -0.076*** (0.015)  -0.076*** (0.015)
---------------------------------------------------------------------
question                      -                  -                  x
---------------------------------------------------------------------
R2                        0.001              0.001              0.093
S.E. type                hetero     by: responseid     by: responseid
Observations              1E+05              1E+05              1E+05
---------------------------------------------------------------------
Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001
Format of coefficient cell:
Coefficient (Std. Err

# Analyze the experiment with a within subjects design:

The observations are unique at a headline and subject level, where the subject is denoted as 'responseid' and the headline is denoted 'question'. The 'treatment' variable denotes whether the headline was presented as being generated by an AI. Note that that randomization is done on a question by subject level.

In [25]:
data_within = pd.read_csv('lucid_within_byq.csv')
data_within.head()

Unnamed: 0,responseid,question_num,question,startdate,enddate,recordeddate,finished,severe,risk_1,risk_2,...,mean_response,sum_response_ctr,mean_correct_q,sum_correct_ctr,sum_answers_ctr,share_correct_jack,mean_val_jack,concern_numeric,severe_numeric,value_alt
0,R_00PMpA4QLBbQQ1P,1,t11,5/6/20 12:21,5/6/20 12:29,5/6/20 12:29,True,5,,49.0,...,2.847913,368,0.69697,368,528,0.69697,0.69697,5,5,3
1,R_00PMpA4QLBbQQ1P,10,t21,5/6/20 12:21,5/6/20 12:29,5/6/20 12:29,True,5,,49.0,...,2.569583,283,0.550584,283,514,0.551657,0.547758,5,5,2
2,R_00PMpA4QLBbQQ1P,11,f22,5/6/20 12:21,5/6/20 12:29,5/6/20 12:29,True,5,,49.0,...,2.213718,307,0.592664,307,518,0.591876,0.589942,5,5,-1
3,R_00PMpA4QLBbQQ1P,12,f23,5/6/20 12:21,5/6/20 12:29,5/6/20 12:29,True,5,,49.0,...,2.650099,176,0.356275,176,494,0.356998,0.348884,5,5,-3
4,R_00PMpA4QLBbQQ1P,13,f14,5/6/20 12:21,5/6/20 12:29,5/6/20 12:29,True,5,,49.0,...,2.595427,217,0.448347,217,484,0.447205,0.445135,5,5,-1


In [26]:
# Fixed effects models using pyfixest
reg_basic_within = pf.feols("value ~ treatment", data = data_within, vcov = {'CRV1':'responseid'})
reg_resp_qfe_within = pf.feols("value ~ treatment | question", data = data_within, vcov = {'CRV1':'responseid'})
reg_resp_qfe_subjecfe_within = pf.feols("value ~ treatment | question + responseid", data = data_within, vcov = {'CRV1':'responseid'})

pf.etable([reg_basic_within, reg_resp_qfe_within, reg_resp_qfe_subjecfe_within])



                           est1               est2               est3
------------  -----------------  -----------------  -----------------
depvar                    value              value              value
---------------------------------------------------------------------
Intercept      2.690*** (0.014)
treatment     -0.145*** (0.015)  -0.142*** (0.015)  -0.142*** (0.015)
---------------------------------------------------------------------
question                      -                  x                  x
responseid                    -                  -                  x
---------------------------------------------------------------------
R2                        0.005              0.059              0.221
S.E. type        by: responseid     by: responseid     by: responseid
Observations              2E+04              2E+04              2E+04
---------------------------------------------------------------------
Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.00

Clean up the table to look nicer:

In [27]:
reg_table = pf.etable([reg_basic_within, reg_resp_qfe_within, reg_resp_qfe_subjecfe_within], type = 'df')


reg_table.iloc[0, :] = ['Outcome', 'Accuracy', 'Accuracy', 'Accuracy']

# Change column names to be more informative: 
reg_table.columns = ['','Basic', 'Question FE', 'Question + Subject FE']

# remove the index:
reg_table.reset_index(drop = True, inplace = True)
reg_table

# Change Variable Names:
reg_table.iloc[:, 0] = ['', 'Constant', 'AI Reporter', 'Response FE', 'Question FE', 'R2', 'SE Type', 'N']
reg_table.style.hide(axis = 'index')

#print(reg_table.to_string(index=False))


Unnamed: 0,Basic,Question FE,Question + Subject FE
,Accuracy,Accuracy,Accuracy
Constant,2.690*** (0.014),,
AI Reporter,-0.145*** (0.015),-0.142*** (0.015),-0.142*** (0.015)
Response FE,-,x,x
Question FE,-,-,x
R2,0.005,0.059,0.221
SE Type,by: responseid,by: responseid,by: responseid
N,2E+04,2E+04,2E+04
