# Estimate Continuous-time outcome measures
This notebook performs hazard rate regressions to estimate the predicted outcomes of the game that are measured continuously as users take actions.

This notebook uses R 

In [1]:
library(plyr)
library(survival)

In [2]:
working_dir = "../results-anonymized/pilot/"
filenames <- Sys.glob(paste(working_dir,"block_*_hazards.csv", sep=""))
filenames

In [3]:
df <- ldply(lapply(filenames, read.csv, header=TRUE, stringsAsFactors=TRUE), rbind)
dim(df)

In [4]:
names(df)

In [5]:
# create subset that only includes clues that are candidates for adoption
# also, only look at spoke clues which are the same across conditions
# and ignore the clues an individual is exposed to at t0, because their neighbors didn't choose to adopt them
adopt_ht = subset(df, is_in_leads == 0 & n_exposures > 0 & is_spoke == 1 & is_exposed_t0==0)
dim(adopt_ht)

In [6]:
# create interaction terms with treatment
adopt_ht$t_n_exposures = adopt_ht$n_exposures * adopt_ht$is_treatment_condition
adopt_ht$t_n_rim_connections = adopt_ht$n_rim_connections * adopt_ht$is_treatment_condition
adopt_ht$t_is_in_deads = adopt_ht$is_in_deads * adopt_ht$is_treatment_condition
adopt_ht$triangle_paths_exist = 1*(adopt_ht$n_triangle_paths > 0)
adopt_ht$t_n_edges_shared_with_exposers = adopt_ht$n_edges_shared_with_exposers * adopt_ht$is_treatment_condition
head(adopt_ht)

Unnamed: 0,X,start,exposure_id,player_id,game_id,is_treatment_condition,is_spoke,is_link_or_spur,is_prompt,is_in_leads,...,is_held_t0,stop,adopt_event,forget_event,is_caveman_game,t_n_exposures,t_n_rim_connections,t_is_in_deads,triangle_paths_exist,t_n_edges_shared_with_exposers
200,643040,98.624,2TGzq8Txds8TAmHFE_tclue_1_10,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,...,0,102.463,1,0,0,1,2,0,0,2
300,3435441,438.922,2TGzq8Txds8TAmHFE_tclue_1_11,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,...,0,441.372,0,0,0,1,2,0,1,3
301,3457281,441.372,2TGzq8Txds8TAmHFE_tclue_1_11,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,...,0,445.579,0,0,0,1,2,0,1,3
302,3482241,445.579,2TGzq8Txds8TAmHFE_tclue_1_11,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,...,0,457.26,1,0,0,1,2,0,1,3
444,680474,102.418,2TGzq8Txds8TAmHFE_tclue_1_4,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,...,0,102.463,0,0,0,1,0,0,0,2
445,683594,102.463,2TGzq8Txds8TAmHFE_tclue_1_4,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,...,0,104.378,0,0,0,1,0,0,0,2


# Estimate effect of familiarity, supporting pathways on adoption

Use this regression in the manipulation check - if there is no effect of n_triangle_paths, then we expect the treatment and control conditions to be identical, as there isn't interdependence between clues other than through the familiarity mechanism, and so the spoke clues are independent of one another.

In [7]:
factors_fit <- coxph(
    Surv(time=start, time2=stop, event=adopt_event, type='counting') ~
        in_startup + in_peak + in_close + in_tail + # timing dummies 
        #is_link + is_spur + is_spoke + # is_prompt + # type of clue dummies
        n_exposures + t_n_exposures + # social reinforcement
        n_triangle_paths +  # "logical" support
        #triangle_paths_exist +
        n_rim_connections + t_n_rim_connections + # familiarity with concepts
        is_in_deads + t_is_in_deads + # prior rejection
        n_existing_leads +  # current state of notebook
        frailty.gaussian(player_id),  # random player effect (as players have different play speeds)
    data=adopt_ht,
)

factors_fit

“Inner loop failed to coverge for iterations 2 4 5 6”

Call:
coxph(formula = Surv(time = start, time2 = stop, event = adopt_event, 
    type = "counting") ~ in_startup + in_peak + in_close + in_tail + 
    n_exposures + t_n_exposures + n_triangle_paths + n_rim_connections + 
    t_n_rim_connections + is_in_deads + t_is_in_deads + n_existing_leads + 
    frailty.gaussian(player_id), data = adopt_ht)

                               coef  se(coef)       se2     Chisq  DF       p
in_startup                           0.00e+00  0.00e+00             1        
in_peak                    2.58e+01  3.53e+01  3.53e+01  5.32e-01   1    0.47
in_close                             0.00e+00  0.00e+00             1        
in_tail                              0.00e+00  0.00e+00             1        
n_exposures                5.01e-01  6.75e-02  6.27e-02  5.50e+01   1 1.2e-13
t_n_exposures             -1.15e-01  8.15e-02  6.47e-02  2.01e+00   1    0.16
n_triangle_paths           5.79e-02  5.65e-02  5.48e-02  1.05e+00   1    0.31
n_rim_connections          4

In [8]:
factors_summary = summary(factors_fit)
factors_coefs = merge(factors_summary$conf.int, factors_summary$coefficients, by='row.names')
rownames(factors_coefs) <- factors_coefs$Row.names  # reset row names
factors_coefs$Row.names <- NULL

write.csv(factors_coefs, paste(working_dir, "factors_cox_results.csv", sep=""))
factors_coefs

Unnamed: 0,exp(coef),exp(-coef),lower .95,upper .95,coef,se(coef),se2,Chisq,DF,p
in_close,,,,,,0.0,0.0,,1,
in_peak,157043300000.0,6.36767e-12,1.3108909999999998e-19,1.881362e+41,25.77978753,35.336464165,35.33640104,0.5322461,1,0.4656641
in_startup,,,,,,0.0,0.0,,1,
in_tail,,,,,,0.0,0.0,,1,
is_in_deads,0.151018,6.621729,0.1093119,0.2086363,-1.89035648,0.16489781,0.1623295,131.4188441,1,2.0050619999999998e-30
n_existing_leads,1.072663,0.9322596,1.05416,1.09149,0.070144,0.008877428,0.00745667,62.4319207,1,2.758162e-15
n_exposures,1.649851,0.6061155,1.445281,1.883376,0.50068475,0.067542571,0.06268737,54.9507401,1,1.235888e-13
n_rim_connections,1.592242,0.6280453,1.499628,1.690575,0.46514305,0.030574996,0.02968823,231.4409809,1,2.891298e-52
n_triangle_paths,1.059642,0.9437146,0.9485427,1.183755,0.05793146,0.056511208,0.05483761,1.0508959,1,0.3053008
t_is_in_deads,0.6990039,1.430607,0.4227174,1.15587,-0.35809897,0.256613109,0.25276142,1.94737,1,0.1628705


# Estimate effect of similarity to exposers

The theoretical argument is that logic and familiarity increase polarization by making individuals imitate their more similar alters. So, we swap out the logic and familiarity regressors with the similarity measure

In [9]:
similarity_fit <- coxph(
    Surv(time=start, time2=stop, event=adopt_event, type='counting') ~
        in_startup + in_peak + in_close + in_tail +  # timing dummies 
        #is_link + is_spur + is_spoke + # is_prompt +  # type of clue dummies
        n_exposures + t_n_exposures +  # social reinforcement
        #n_triangle_paths +  # "logical" support
        #n_rim_connections + t_n_rim_connections + # familiarity with concepts
        n_edges_shared_with_exposers + t_n_edges_shared_with_exposers +  # similarity to exposers
        is_in_deads + t_is_in_deads +  # prior rejection
        n_existing_leads +  # current state of notebook
        frailty.gaussian(player_id),  # random player effect (as players have different play speeds)
    data=adopt_ht,
)


similarity_fit

“NaNs produced”

Call:
coxph(formula = Surv(time = start, time2 = stop, event = adopt_event, 
    type = "counting") ~ in_startup + in_peak + in_close + in_tail + 
    n_exposures + t_n_exposures + n_edges_shared_with_exposers + 
    t_n_edges_shared_with_exposers + is_in_deads + t_is_in_deads + 
    n_existing_leads + frailty.gaussian(player_id), data = adopt_ht)

                               coef  se(coef)       se2     Chisq  DF       p
in_startup                 226.7349   87.4886              6.7164   1 0.00955
in_peak                                0.0000    0.0000             1        
in_close                               0.0000    0.0000             1        
in_tail                                0.0000    0.0000             1        
n_exposures                  0.3634    0.1008    0.0557   13.0040   1 0.00031
t_n_exposures                0.2394    0.1182              4.1017   1 0.04284
n_edges_shared_with_expos   -0.0236    0.0261              0.8175   1 0.36592
t_n_edges_shared_with_exp

In [10]:
similarity_summary = summary(similarity_fit)
similarity_coefs = merge(similarity_summary$conf.int, similarity_summary$coefficients, by='row.names')
rownames(similarity_coefs) <- similarity_coefs$Row.names  # reset row names
similarity_coefs$Row.names <- NULL

write.csv(similarity_coefs, paste(working_dir, "similarity_cox_results.csv", sep=""))
similarity_coefs

“NaNs produced”

Unnamed: 0,exp(coef),exp(-coef),lower .95,upper .95,coef,se(coef),se2,Chisq,DF,p
in_close,,,,,,0.0,0.0,,1,
in_peak,,,,,,0.0,0.0,,1,
in_startup,2.94921e+98,3.3907390000000003e-99,9.984178e+23,8.711621e+172,226.73487633,87.48855536,,6.7163631,1,0.009553224
in_tail,,,,,,0.0,0.0,,1,
is_in_deads,0.4053821,2.466808,0.3073351,0.5347084,-0.90292519,0.14127376,0.09209751,40.8489157,1,1.644645e-10
n_edges_shared_with_expos,0.9766902,1.023866,0.9280093,1.027925,-0.02358575,0.02608611,,0.817487,1,0.3659159
n_existing_leads,1.095022,0.9132233,1.069111,1.121562,0.09077485,0.01221849,,55.1944793,1,1.091749e-13
n_exposures,1.438188,0.6953193,1.180432,1.752228,0.36338418,0.10076914,0.05569089,13.0040005,1,0.0003108262
t_is_in_deads,0.308033,3.246406,0.1927032,0.4923858,-1.17754839,0.23931851,0.19506646,24.2105665,1,8.635719e-07
t_n_edges_shared_with_exp,1.013233,0.9869395,0.9605847,1.068768,0.01314657,0.02722482,,0.2331819,1,0.6291745
