# Estimate Continuous-time outcome measures
This `R` notebook performs hazard rate regressions to estimate the predicted outcomes of the game that are measured continuously as users take actions.

In [2]:
#install.packages("plyr")
library(plyr)
library(survival)


The downloaded binary packages are in
	/var/folders/k4/90b0qh1946s8g2d438pzzdvr0000gq/T//Rtmp7wgzDD/downloaded_packages


In [3]:
working_dir = "../results-anonymized/pilot/"
filenames <- Sys.glob(paste(working_dir,"block_*_hazards.csv", sep=""))
filenames

In [44]:
# load the total dataset
df <- ldply(lapply(filenames, read.csv, header=TRUE, stringsAsFactors=TRUE), rbind)
names(df)

In [45]:
# create subset that only includes clues that are candidates for adoption
# also, only look at spoke clues which are the same across conditions
# and ignore the clues an individual is exposed to at t0, because their neighbors didn't choose to adopt them
adopt_ht = subset(df, is_in_leads == 0 & n_exposures > 0 & is_spoke == 1 & is_exposed_t0==0)
dim(adopt_ht)

In [46]:
# create interaction terms with treatment
adopt_ht$t_n_exposures = adopt_ht$n_exposures * adopt_ht$is_treatment_condition
adopt_ht$t_n_rim_connections = adopt_ht$n_rim_connections * adopt_ht$is_treatment_condition
adopt_ht$t_is_in_deads = adopt_ht$is_in_deads * adopt_ht$is_treatment_condition

adopt_ht$t_n_shared_edges <- adopt_ht$n_shared_edges * adopt_ht$is_treatment_condition
adopt_ht$X <- NULL
rownames(adopt_ht) <- NULL  # reindex
tail(adopt_ht)

Unnamed: 0_level_0,start,exposure_id,player_id,game_id,is_treatment_condition,is_spoke,is_link_or_spur,is_prompt,is_in_leads,is_in_deads,⋯,is_exposed_t0,is_held_t0,stop,adopt_event,forget_event,is_caveman_game,t_n_exposures,t_n_rim_connections,t_is_in_deads,t_n_shared_edges
Unnamed: 0_level_1,<dbl>,<fct>,<fct>,<fct>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<dbl>,<int>,<int>,<int>,<int>,<dbl>,<int>,<int>
24985,93.859,zkxoJdJcrFgimKzdW_tclue_2_8,zkxoJdJcrFgimKzdW,bALDaKPEGyutrm8bn,1,1,0,0,0,0,⋯,0,0,121.056,0,0,0,1,0,0,0
24986,121.056,zkxoJdJcrFgimKzdW_tclue_2_8,zkxoJdJcrFgimKzdW,bALDaKPEGyutrm8bn,1,1,0,0,0,0,⋯,0,0,139.615,0,0,0,1,0,0,1
24987,139.615,zkxoJdJcrFgimKzdW_tclue_2_8,zkxoJdJcrFgimKzdW,bALDaKPEGyutrm8bn,1,1,0,0,0,0,⋯,0,0,141.041,1,0,0,1,0,0,1
24988,270.249,zkxoJdJcrFgimKzdW_tclue_2_9,zkxoJdJcrFgimKzdW,bALDaKPEGyutrm8bn,1,1,0,0,0,0,⋯,0,0,326.087,0,0,0,1,0,0,2
24989,326.087,zkxoJdJcrFgimKzdW_tclue_2_9,zkxoJdJcrFgimKzdW,bALDaKPEGyutrm8bn,1,1,0,0,0,0,⋯,0,0,335.312,0,0,0,1,0,0,2
24990,335.312,zkxoJdJcrFgimKzdW_tclue_2_9,zkxoJdJcrFgimKzdW,bALDaKPEGyutrm8bn,1,1,0,0,0,0,⋯,0,0,336.155,1,0,0,1,1,0,3


# Estimate effect of familiarity, supporting pathways on adoption

Use this regression in the manipulation check - if there is no effect of n_triangle_paths, then we expect the treatment and control conditions to be identical, as there isn't interdependence between clues other than through the familiarity mechanism, and so the spoke clues are independent of one another.

In [7]:
factors_fit <- coxph(
    Surv(time=start, time2=stop, event=adopt_event, type='counting') ~
        in_startup + in_peak + in_close + in_tail + # timing dummies 
        n_exposures + t_n_exposures + # social reinforcement
        n_triangle_paths +  # "logical" support
        n_rim_connections + t_n_rim_connections + # familiarity with concepts
        is_in_deads + t_is_in_deads + # prior rejection
        n_existing_leads +  # current state of notebook
        frailty.gaussian(player_id),  # random player effect (as players have different play speeds)
    data=adopt_ht,
)

factors_fit

“Inner loop failed to coverge for iterations 2 4 5 6”

Call:
coxph(formula = Surv(time = start, time2 = stop, event = adopt_event, 
    type = "counting") ~ in_startup + in_peak + in_close + in_tail + 
    n_exposures + t_n_exposures + n_triangle_paths + n_rim_connections + 
    t_n_rim_connections + is_in_deads + t_is_in_deads + n_existing_leads + 
    frailty.gaussian(player_id), data = adopt_ht)

                               coef  se(coef)       se2     Chisq  DF       p
in_startup                           0.00e+00  0.00e+00             1        
in_peak                    2.58e+01  3.53e+01  3.53e+01  5.32e-01   1    0.47
in_close                             0.00e+00  0.00e+00             1        
in_tail                              0.00e+00  0.00e+00             1        
n_exposures                5.01e-01  6.75e-02  6.27e-02  5.50e+01   1 1.2e-13
t_n_exposures             -1.15e-01  8.15e-02  6.47e-02  2.01e+00   1    0.16
n_triangle_paths           5.79e-02  5.65e-02  5.48e-02  1.05e+00   1    0.31
n_rim_connections          4

In [78]:
factors_summary = summary(factors_fit)
factors_coefs = merge(factors_summary$conf.int, factors_summary$coefficients, by='row.names')
rownames(factors_coefs) <- factors_coefs$Row.names  # reset row names
factors_coefs$Row.names <- NULL

# add means
factors_coefs$means <- factors_fit$means[row.names(factors_coefs)]

# export data for plotting in python
write.csv(factors_coefs, paste(working_dir, "factors_cox_results.csv", sep=""))
factors_coefs

Unnamed: 0,exp(coef),exp(-coef),lower .95,upper .95,coef,se(coef),se2,Chisq,DF,p,means
in_close,,,,,,0.0,0.0,,1,,0.148979592
in_peak,157043300000.0,6.36767e-12,1.3108909999999998e-19,1.881362e+41,25.77978753,35.336464165,35.33640104,0.5322461,1,0.4656641,0.269147659
in_startup,,,,,,0.0,0.0,,1,,0.003321329
in_tail,,,,,,0.0,0.0,,1,,0.578551421
is_in_deads,0.151018,6.621729,0.1093119,0.2086363,-1.89035648,0.16489781,0.1623295,131.4188441,1,2.0050619999999998e-30,0.1777511
n_existing_leads,1.072663,0.9322596,1.05416,1.09149,0.070144,0.008877428,0.00745667,62.4319207,1,2.758162e-15,11.129891957
n_exposures,1.649851,0.6061155,1.445281,1.883376,0.50068475,0.067542571,0.06268737,54.9507401,1,1.235888e-13,1.237454982
n_rim_connections,1.592242,0.6280453,1.499628,1.690575,0.46514305,0.030574996,0.02968823,231.4409809,1,2.891298e-52,0.859743898
n_triangle_paths,1.059642,0.9437146,0.9485427,1.183755,0.05793146,0.056511208,0.05483761,1.0508959,1,0.3053008,0.265666267
t_is_in_deads,0.6990039,1.430607,0.4227174,1.15587,-0.35809897,0.256613109,0.25276142,1.94737,1,0.1628705,0.089555822


# Estimate effect of similarity to exposers

The theoretical argument is that logic and familiarity increase polarization by making individuals imitate their more similar alters. So, we swap out the logic and familiarity regressors with the similarity measure

In [71]:
similarity_fit <- coxph(
    Surv(time=start, time2=stop, event=adopt_event, type='counting') ~
        in_startup + in_peak + in_close + in_tail +  # timing dummies 
        n_exposures + t_n_exposures +  # social reinforcement
        n_shared_edges + t_n_shared_edges +  # similarity to exposers
        is_in_deads + t_is_in_deads +  # prior rejection
        n_existing_leads +  # current state of notebook
        frailty.gaussian(player_id),  # random player effect (as players have different play speeds)
    data=adopt_ht,
)


similarity_fit

“NaNs produced”

Call:
coxph(formula = Surv(time = start, time2 = stop, event = adopt_event, 
    type = "counting") ~ in_startup + in_peak + in_close + in_tail + 
    n_exposures + t_n_exposures + n_shared_edges + t_n_shared_edges + 
    is_in_deads + t_is_in_deads + n_existing_leads + frailty.gaussian(player_id), 
    data = adopt_ht)

                               coef  se(coef)       se2     Chisq  DF       p
in_startup                 226.7349   87.4886              6.7164   1 0.00955
in_peak                                0.0000    0.0000             1        
in_close                               0.0000    0.0000             1        
in_tail                                0.0000    0.0000             1        
n_exposures                  0.3634    0.1008    0.0557   13.0040   1 0.00031
t_n_exposures                0.2394    0.1182              4.1017   1 0.04284
n_shared_edges              -0.0236    0.0261              0.8175   1 0.36592
t_n_shared_edges             0.0131    0.0272        

In [77]:
similarity_summary = summary(similarity_fit)
similarity_coefs = merge(similarity_summary$conf.int, similarity_summary$coefficients, by='row.names')
rownames(similarity_coefs) <- similarity_coefs$Row.names  # reset row names
similarity_coefs$Row.names <- NULL

# add means
similarity_coefs$means <- similarity_fit$means[row.names(similarity_coefs)]

# export data for plotting in python
write.csv(similarity_coefs, paste(working_dir, "similarity_cox_results.csv", sep=""))
similarity_coefs

“NaNs produced”

Unnamed: 0,exp(coef),exp(-coef),lower .95,upper .95,coef,se(coef),se2,Chisq,DF,p,means
in_close,,,,,,0.0,0.0,,1,,0.148979592
in_peak,,,,,,0.0,0.0,,1,,0.269147659
in_startup,2.94921e+98,3.3907390000000003e-99,9.984178e+23,8.711621e+172,226.73487633,87.48855536,,6.7163631,1,0.009553224,0.003321329
in_tail,,,,,,0.0,0.0,,1,,0.578551421
is_in_deads,0.4053821,2.466808,0.3073351,0.5347084,-0.90292519,0.14127376,0.09209751,40.8489157,1,1.644645e-10,0.1777511
n_existing_leads,1.095022,0.9132233,1.069111,1.121562,0.09077485,0.01221849,,55.1944793,1,1.091749e-13,11.129891957
n_exposures,1.438188,0.6953193,1.180432,1.752228,0.36338418,0.10076914,0.05569089,13.0040005,1,0.0003108262,1.237454982
n_shared_edges,0.9766902,1.023866,0.9280093,1.027925,-0.02358575,0.02608611,,0.817487,1,0.3659159,4.643657463
t_is_in_deads,0.308033,3.246406,0.1927032,0.4923858,-1.17754839,0.23931851,0.19506646,24.2105665,1,8.635719e-07,0.089555822
t_n_exposures,1.270506,0.7870879,1.007749,1.601774,0.23941533,0.11821477,,4.1016674,1,0.04284094,0.591796719


# Estimate Game-level effects for mediation analysis

In [105]:
game_treat = factor(paste(adopt_ht$game_id, adopt_ht$is_treatment_condition))
dummies <- data.frame(model.matrix( ~ game_treat))
dummies$X.Intercept. <- NULL  # drop intercept column
names(dummies) <- gsub("\\.", "_", names(dummies))
names(dummies) <- gsub("game_treat", "GT_", names(dummies))


game_n_triangle = dummies*adopt_ht$n_triangle_paths
names(game_n_triangle) <- paste(names(game_n_triangle), "_nTriangles", sep="")
game_n_shared_edges = dummies*adopt_ht$n_shared_edges
names(game_n_shared_edges) <- paste(names(game_n_shared_edges), "_nShared", sep="")


adopt_ht_dummies <- cbind(adopt_ht, game_n_triangle, game_n_shared_edges)
adopt_ht_dummies

Unnamed: 0_level_0,start,exposure_id,player_id,game_id,is_treatment_condition,is_spoke,is_link_or_spur,is_prompt,is_in_leads,is_in_deads,⋯,GT_bALDaKPEGyutrm8bn_0_nShared,GT_bALDaKPEGyutrm8bn_1_nShared,GT_Bd3w7KBP8ZKKMBxhj_0_nShared,GT_Bd3w7KBP8ZKKMBxhj_1_nShared,GT_hRLty7e6fGMsjgzJH_0_nShared,GT_hRLty7e6fGMsjgzJH_1_nShared,GT_kEHF9QnbLb3YtYfbj_0_nShared,GT_kEHF9QnbLb3YtYfbj_1_nShared,GT_xXvsHQvoQC4LsoyCi_0_nShared,GT_xXvsHQvoQC4LsoyCi_1_nShared
Unnamed: 0_level_1,<dbl>,<fct>,<fct>,<fct>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,98.624,2TGzq8Txds8TAmHFE_tclue_1_10,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,2
2,438.922,2TGzq8Txds8TAmHFE_tclue_1_11,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,3
3,441.372,2TGzq8Txds8TAmHFE_tclue_1_11,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,3
4,445.579,2TGzq8Txds8TAmHFE_tclue_1_11,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,3
5,102.418,2TGzq8Txds8TAmHFE_tclue_1_4,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,2
6,102.463,2TGzq8Txds8TAmHFE_tclue_1_4,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,2
7,104.378,2TGzq8Txds8TAmHFE_tclue_1_4,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,3
8,139.971,2TGzq8Txds8TAmHFE_tclue_1_5,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,1
9,148.945,2TGzq8Txds8TAmHFE_tclue_1_5,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,1
10,155.676,2TGzq8Txds8TAmHFE_tclue_1_5,2TGzq8Txds8TAmHFE,xXvsHQvoQC4LsoyCi,1,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,1


In [115]:
fmla <- as.formula(paste(
    "Surv(time=start, time2=stop, event=adopt_event, type='counting') ~ 
        in_startup + in_peak + in_close + in_tail +  
        n_exposures + t_n_exposures + ",
    paste(names(game_n_triangle), collapse= "+"),
    " + n_rim_connections + t_n_rim_connections + 
        is_in_deads + t_is_in_deads + 
        n_existing_leads +  
        frailty.gaussian(player_id)"
))  
print(fmla)
 
game_factors_fit <- coxph(fmla,
    data=adopt_ht_dummies,
)

game_factors_fit

Surv(time = start, time2 = stop, event = adopt_event, type = "counting") ~ 
    in_startup + in_peak + in_close + in_tail + n_exposures + 
        t_n_exposures + GT_5wifJzSKTJuDtQkzS_1_nTriangles + GT_acKghgvvb3kewvMd9_0_nTriangles + 
        GT_acKghgvvb3kewvMd9_1_nTriangles + GT_aufBQzkbCokRBX9js_0_nTriangles + 
        GT_aufBQzkbCokRBX9js_1_nTriangles + GT_bALDaKPEGyutrm8bn_0_nTriangles + 
        GT_bALDaKPEGyutrm8bn_1_nTriangles + GT_Bd3w7KBP8ZKKMBxhj_0_nTriangles + 
        GT_Bd3w7KBP8ZKKMBxhj_1_nTriangles + GT_hRLty7e6fGMsjgzJH_0_nTriangles + 
        GT_hRLty7e6fGMsjgzJH_1_nTriangles + GT_kEHF9QnbLb3YtYfbj_0_nTriangles + 
        GT_kEHF9QnbLb3YtYfbj_1_nTriangles + GT_xXvsHQvoQC4LsoyCi_0_nTriangles + 
        GT_xXvsHQvoQC4LsoyCi_1_nTriangles + n_rim_connections + 
        t_n_rim_connections + is_in_deads + t_is_in_deads + n_existing_leads + 
        frailty.gaussian(player_id)


“Inner loop failed to coverge for iterations 1 2 3 4 5 6 7 8 9 10”


Call:
coxph(formula = fmla, data = adopt_ht_dummies)

                               coef  se(coef)       se2     Chisq  DF       p
in_startup                            0.00000   0.00000             1        
in_peak                               0.00000   0.00000             1        
in_close                              0.00000   0.00000             1        
in_tail                               0.00000   0.00000             1        
n_exposures                 0.63143   0.06344   0.05921  99.06288   1 < 2e-16
t_n_exposures              -0.22050   0.07116   0.05879   9.60283   1 0.00194
GT_5wifJzSKTJuDtQkzS_1_nT   0.83727   0.18755   0.16498  19.92890   1 8.0e-06
GT_acKghgvvb3kewvMd9_0_nT   0.95507   0.32589   0.30751   8.58878   1 0.00338
GT_acKghgvvb3kewvMd9_1_nT  -0.21233   0.09210   0.08643   5.31459   1 0.02115
GT_aufBQzkbCokRBX9js_0_nT   0.05290   0.22053   0.20388   0.05754   1 0.81042
GT_aufBQzkbCokRBX9js_1_nT  -0.00733   0.08490   0.07913   0.00745   1 0.93120
GT_bALDaKP

In [156]:
fmla <- as.formula(paste(
    "Surv(time=start, time2=stop, event=adopt_event, type='counting') ~ 
        in_startup + in_peak + in_close + in_tail +  
        n_exposures + t_n_exposures + ",
    paste(head(names(game_n_shared_edges),9), collapse= "+"),  # missing data in pilot, remove 'head'
    " + n_rim_connections + t_n_rim_connections + 
        is_in_deads + t_is_in_deads + 
        n_existing_leads +  
        frailty.gaussian(player_id)"
))  
print(fmla)
 
game_similarity_fit <- coxph(fmla,
    data=adopt_ht_dummies,
)

game_similarity_fit

Surv(time = start, time2 = stop, event = adopt_event, type = "counting") ~ 
    in_startup + in_peak + in_close + in_tail + n_exposures + 
        t_n_exposures + GT_5wifJzSKTJuDtQkzS_1_nShared + GT_acKghgvvb3kewvMd9_0_nShared + 
        GT_acKghgvvb3kewvMd9_1_nShared + GT_aufBQzkbCokRBX9js_0_nShared + 
        GT_aufBQzkbCokRBX9js_1_nShared + GT_bALDaKPEGyutrm8bn_0_nShared + 
        GT_bALDaKPEGyutrm8bn_1_nShared + GT_Bd3w7KBP8ZKKMBxhj_0_nShared + 
        GT_Bd3w7KBP8ZKKMBxhj_1_nShared + n_rim_connections + 
        t_n_rim_connections + is_in_deads + t_is_in_deads + n_existing_leads + 
        frailty.gaussian(player_id)


“Inner loop failed to coverge for iterations 1 3 4 5 6 7 8 9 10”


Call:
coxph(formula = fmla, data = adopt_ht_dummies)

                               coef  se(coef)       se2     Chisq  DF       p
in_startup                            0.00000   0.00000             1        
in_peak                               0.00000   0.00000             1        
in_close                   23.54562   5.63457   5.63438  17.46218   1 2.9e-05
in_tail                               0.00000   0.00000             1        
n_exposures                 0.50782   0.06807   0.06315  55.65594   1 8.6e-14
t_n_exposures              -0.11171   0.08412   0.06868   1.76347   1 0.18419
GT_5wifJzSKTJuDtQkzS_1_nS   0.10860   0.04083   0.03385   7.07639   1 0.00781
GT_acKghgvvb3kewvMd9_0_nS  -0.02043   0.02554   0.02098   0.63972   1 0.42381
GT_acKghgvvb3kewvMd9_1_nS  -0.04804   0.02950   0.02633   2.65237   1 0.10340
GT_aufBQzkbCokRBX9js_0_nS  -0.00896   0.01654   0.01423   0.29358   1 0.58794
GT_aufBQzkbCokRBX9js_1_nS  -0.08023   0.02353   0.02127  11.62978   1 0.00065
GT_bALDaKP

In [157]:
game_df_tri = data.frame(game_factors_fit$coefficients[names(game_n_triangle)])
names(game_df_tri) <- "n_triangle"
rownames(game_df_tri) <- gsub("_nTriangles", "", rownames(game_df_tri))
game_df_tri

Unnamed: 0_level_0,n_triangle
Unnamed: 0_level_1,<dbl>
GT_5wifJzSKTJuDtQkzS_1,0.837267267
GT_acKghgvvb3kewvMd9_0,0.955068306
GT_acKghgvvb3kewvMd9_1,-0.212331368
GT_aufBQzkbCokRBX9js_0,0.052902245
GT_aufBQzkbCokRBX9js_1,-0.007329374
GT_bALDaKPEGyutrm8bn_0,0.972015023
GT_bALDaKPEGyutrm8bn_1,0.277238818
GT_Bd3w7KBP8ZKKMBxhj_0,0.16521667
GT_Bd3w7KBP8ZKKMBxhj_1,0.5793827
GT_hRLty7e6fGMsjgzJH_0,0.011777185


In [158]:
game_df_sim = data.frame(game_similarity_fit$coefficients[head(names(game_n_shared_edges),9)])  # missing data in pilot, remove 'head'
names(game_df_sim) <- "n_shared_edges"
rownames(game_df_sim) <- gsub("_nShared", "", rownames(game_df_sim))


In [159]:
game_df_sim$n_triangle <- game_df_tri[rownames(game_df_sim),]
game_df_sim

Unnamed: 0_level_0,n_shared_edges,n_triangle
Unnamed: 0_level_1,<dbl>,<dbl>
GT_5wifJzSKTJuDtQkzS_1,0.108601099,0.837267267
GT_acKghgvvb3kewvMd9_0,-0.020428987,0.955068306
GT_acKghgvvb3kewvMd9_1,-0.048037423,-0.212331368
GT_aufBQzkbCokRBX9js_0,-0.008959724,0.052902245
GT_aufBQzkbCokRBX9js_1,-0.08022921,-0.007329374
GT_bALDaKPEGyutrm8bn_0,0.02323379,0.972015023
GT_bALDaKPEGyutrm8bn_1,-0.092498556,0.277238818
GT_Bd3w7KBP8ZKKMBxhj_0,0.032200046,0.16521667
GT_Bd3w7KBP8ZKKMBxhj_1,0.068241028,0.5793827


In [160]:
write.csv(game_df_sim, paste(working_dir, "game_level_factors.csv", sep=""))