# End of game measures
This `Python` notebook takes the anonymized data and computes population-level measures for each game.

In [1]:
%pylab inline
import json
import numpy as np
import pandas as pd
import glob
import itertools
from sklearn.decomposition import PCA
from scipy import stats

from helpers import shuffle

Populating the interactive namespace from numpy and matplotlib


In [2]:
output_dir = "../results-anonymized/experiment/"
files = glob.glob(output_dir+'block_*.json')
sorted(files)

['../results-anonymized/experiment/block_0.json',
 '../results-anonymized/experiment/block_1.json',
 '../results-anonymized/experiment/block_10.json',
 '../results-anonymized/experiment/block_11.json',
 '../results-anonymized/experiment/block_12.json',
 '../results-anonymized/experiment/block_13.json',
 '../results-anonymized/experiment/block_14.json',
 '../results-anonymized/experiment/block_15.json',
 '../results-anonymized/experiment/block_16.json',
 '../results-anonymized/experiment/block_17.json',
 '../results-anonymized/experiment/block_18.json',
 '../results-anonymized/experiment/block_19.json',
 '../results-anonymized/experiment/block_2.json',
 '../results-anonymized/experiment/block_20.json',
 '../results-anonymized/experiment/block_21.json',
 '../results-anonymized/experiment/block_22.json',
 '../results-anonymized/experiment/block_23.json',
 '../results-anonymized/experiment/block_24.json',
 '../results-anonymized/experiment/block_25.json',
 '../results-anonymized/experiment

In [3]:
len(files)

30

In [4]:
blocks = []
for file in files:
    with open(file) as f:
        blocks.append(json.load(f))

In [10]:
# Enumerate clues to be used in polarization analysis
# final clues used in analysis are connections between hub nodes (1,2) and rim nodes (3-13)
t_spokes = ['tclue_1_3', 'tclue_1_4', 'tclue_1_5', 'tclue_1_6', 'tclue_1_7', 
            'tclue_1_8', 'tclue_1_9', 'tclue_1_10','tclue_1_11', 'tclue_1_12', 'tclue_1_13',
            'tclue_2_3', 'tclue_2_4', 'tclue_2_5', 'tclue_2_6', 'tclue_2_7',
            'tclue_2_8', 'tclue_2_9', 'tclue_2_10', 'tclue_2_11', 'tclue_2_12', 'tclue_2_13']

c_spokes = ['cclue_1_3', 'cclue_1_4', 'cclue_1_5', 'cclue_1_6', 'cclue_1_7', 
            'cclue_1_8', 'cclue_1_9', 'cclue_1_10','cclue_1_11', 'cclue_1_12', 'cclue_1_13',
            'cclue_2_3', 'cclue_2_4', 'cclue_2_5', 'cclue_2_6', 'cclue_2_7',
            'cclue_2_8', 'cclue_2_9', 'cclue_2_10', 'cclue_2_11', 'cclue_2_12', 'cclue_2_13']

# Enumerate end-of-game survey questions to be used in polarization analysis
assessments = ['appearance_1', 'appearance_2', 
               'clothing_1', 'clothing_2',
               'suspect_1', 'suspect_2', 'suspect_3',
               'tool_1', 'tool_2', 
               'vehicle_1', 'vehicle_2']    
    
def compute_single_point_measures(game, n_used):
    """ 
    Compute the game-level measures 
    
    "Games" in this experiment contain both a treatment and control condition
    and these must be properly separated from one another.
    
    """
    # Form end-of-game survey responses into a dataframe
    collector = {}
    for p, k in game['players'].items():
        try:
            collector[k['data.position']] = k['data.caseMade']
        except:
            print('%s did not complete the post-game survey' %k['data.position'])
    responses = pd.DataFrame(collector).T.sort_index()

    # Form final notebook states into a dataframe
    final_adoptions = pd.DataFrame(data=0, index=responses.index, columns=t_spokes+c_spokes)
    for p, k in game['players'].items():
        for clue_id in k['data.notebooks']['promising_leads']['clueIDs']:
            final_adoptions.loc[k['data.position'], clue_id] = 1


    # Determine the number of datapoints to be used in polarization analysis
    # if there are missing responses, need to compare equal sized datasets
    t_responses = [pos for pos in responses.index if pos.startswith('t')]
    c_responses = [pos for pos in responses.index if pos.startswith('c')] 
    # use whichever condition has fewer responses to set the sample size
    #n_used = min(len(t_responses), len(c_responses))


    def process_subset(subset, spokes):
        """ compute a result on the selected subset of the data """
        sub_res = {}
        
        # select the subset of the survey responses that will be used in the subset analysis
        sub_survey = responses.loc[subset, assessments]
        
        # survey PC1 
        pca = PCA(n_components=1)
        pca.fit(sub_survey)  
        sub_res['survey PC1'] = pca.explained_variance_ratio_

        # survey similarity percentiles
        survey_corrs = sub_survey.T.corr().mask(np.tri(n_used, n_used, 0, dtype='bool')).stack()
        sub_res['survey 5% similarity'], sub_res['survey 95% similarity'] = np.percentile(
            survey_corrs, [5, 95])
        
        # select the subset of the behavioral responses that will be used in the subset analysis
        sub_adopt = final_adoptions.loc[subset, spokes]
        
        # final-state PC1
        pca = PCA(n_components=1)
        pca.fit(sub_adopt)  
        sub_res['spoke PC1'] = pca.explained_variance_ratio_
        
        # final state similarity percentiles
        spoke_corrs = sub_adopt.T.corr().mask(np.tri(n_used, n_used, 0, dtype='bool')).stack()
        sub_res['spoke 5% similarity'], sub_res['spoke 95% similarity'] = np.percentile(
            spoke_corrs, [5, 95])
        
        # compute the expected values for the given level of adoption
        # by shuffling the clues between individuals 
        # (preserving the number of clues each individual holds, 
        # and the number of individuals holding each clue)
        # do this a number of times and average the result
        e95 = []
        e5 = []
        ePC1 = []
        for _ in range(100):
            shuffle_adopt = pd.DataFrame(index=sub_adopt.index,
                                         columns=sub_adopt.columns,
                                         data=shuffle(sub_adopt.values, n=500))

            n_agents = len(shuffle_adopt.index)
            corrs = shuffle_adopt.astype(float).T.corr().mask(np.tri(n_agents, n_agents, 0, dtype='bool')).stack()
            e95.append(np.percentile(corrs, 95))
            e5.append(np.percentile(corrs, 5))

            pca = PCA(n_components=1)
            pca.fit(shuffle_adopt)
            ePC1.append(pca.explained_variance_ratio_[0])
        
        # compute the net effect of (interdependent or independent) diffusion 
        # over chance distribution of the same clues
        sub_res['net spoke PC1'] = sub_res['spoke PC1'] - np.mean(ePC1)
        sub_res['net spoke 95% similarity'] = sub_res['spoke 95% similarity'] - np.mean(e95)
        sub_res['net spoke 5% similarity'] = sub_res['spoke 5% similarity'] - np.mean(e5)
        
        return sub_res
        
        
    # For each subset of size 'n_used', compute a result. 
    # In most cases there are no missing responses, so just compute on the complete set
    t_collector = []
    for subset in itertools.combinations(t_responses, r=n_used):
        t_collector.append(process_subset(subset, t_spokes))

    # The recorded result is the average over all subsets
    if len(t_collector) > 1:
        print('Averaging over %i combinations for treatment case'%len(t_collector))
    t_result = pd.DataFrame(t_collector).mean()

    # Compute average for confidence and consensus measures on all submissions
    t_result['confidence'] = responses.loc[t_responses, 'confidence'].mean()
    t_result['consensus'] = responses.loc[t_responses, 'consensus'].mean()    
    t_result['id'] = game['_id']
    
    # Perform the same analysis as above for the control condition
    c_collector = []
    for subset in itertools.combinations(c_responses, r=n_used):
        c_collector.append(process_subset(subset, c_spokes))

    if len(c_collector) > 1:
        print('Averaging over %i combinations for control case'%len(c_collector))
    c_result = pd.DataFrame(c_collector).mean()

    c_result['confidence'] = responses.loc[c_responses, 'confidence'].mean()
    c_result['consensus'] = responses.loc[c_responses, 'consensus'].mean()     

    c_result['id'] = game['_id']
    
    #pd.merge(t_result, c)result, suffixes=(' (inter)', ' (indep)'))
    result = pd.concat([t_result, c_result], keys=['inter', 'indep'])
    #result['game_id']=game['createdAt'].split('_')[0].replace('-','_').replace(':','_').replace('.','_')
    return result

def compute_block(block):
    results_collector = []
    network_collector = []
    
    # identify the number of players to use in averages to account for dropouts.
    min_players = 20
    for name, game in block.items():
        c_count = 0
        t_count = 0
        for p, k in game['players'].items():
            if 'data.caseMade' in k:
                if k["data.position"][0] == "t":
                    t_count += 1
                elif k["data.position"][0] == "c":
                    c_count += 1
        min_players = min(min_players, t_count, c_count)
    
    for name, game in block.items():
        network_collector.append('caveman' if 'caveman' in name else 'dodec')
        results_collector.append(compute_single_point_measures(game, n_used=min_players))
        
    result = pd.concat(results_collector, keys=network_collector)
    return result

In [11]:
import multiprocessing

with multiprocessing.Pool(3) as p:
    measurements_list = p.map(compute_block, blocks)
measurements = pd.concat(measurements_list, axis=1)
#measurements.to_csv(output_dir+"end_of_game_measurements.csv")
measurements

c12 did not complete the post-game survey
c17 did not complete the post-game survey
c19 did not complete the post-game survey
Averaging over 20 combinations for treatment case
Averaging over 20 combinations for treatment case
c4 did not complete the post-game survey
c13 did not complete the post-game survey
t12 did not complete the post-game survey
Averaging over 20 combinations for treatment case
t10 did not complete the post-game survey
Averaging over 20 combinations for treatment case
t5 did not complete the post-game survey
c6 did not complete the post-game survey
c17 did not complete the post-game survey
Averaging over 20 combinations for control case
Averaging over 20 combinations for treatment case
t10 did not complete the post-game survey
t19 did not complete the post-game survey
Averaging over 190 combinations for treatment case
Averaging over 190 combinations for treatment case
Averaging over 190 combinations for control case
t5 did not complete the post-game survey
t16 did n

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
caveman,indep,confidence,60.7368,58.9474,62.35,49.5789,52.25,57.5,55.5,56.3889,65.85,61.8421,...,48.75,42.75,50.25,48.8421,57.6842,54.7,50.4,52.75,64.3889,61.25
caveman,indep,consensus,58.2632,64.5789,59.9,55.2632,52.65,54.4,53.2,51.1667,69.9,61.8421,...,48.35,48.85,55.3,52.1053,57.8947,54.95,56.5,52.25,64.9444,63.6
caveman,indep,id,jcfFiGC22Api8cm7o,oWf8QrmFrj8ZiE6JK,mSJbtZFsaaWC3ANqy,b4CBPunda96KnrhFi,NkvLcmxvQY36f759u,qQyhQa275SKLzM3X9,9oijjTA5XEFzCtedK,2RErnuyZ5YmzTzpxh,RLNbvyv9ipkrkPJKc,55mC2NYGpS5sYb46x,...,szPuQL8WF5prJP9s7,HXeLe9DnXEMs6JFjf,ew8uue7goY6SzSnMn,To5RB7JuNk4h5FHaZ,W4EAr6TwRFApoCagP,zg5puDzKGCxk7wAj2,W7ZjgcCnPfhq8eYnp,PRBmQFw8WDcrNi2Qn,wx5E4WNYAzhggxxNt,R5JmsFyk7arzZrwiE
caveman,indep,net spoke 5% similarity,-0.109421,-0.0464853,-0.0893916,-0.0911016,-0.0851615,-0.0842986,-0.0397809,-0.152654,-0.123733,-0.118047,...,-0.10581,-0.114623,-0.14413,-0.159561,-0.21115,-0.236645,-0.100988,-0.107651,-0.114018,-0.104817
caveman,indep,net spoke 95% similarity,0.173093,0.399166,0.364419,0.207669,0.230616,0.211998,0.204921,0.341224,0.377617,0.243582,...,0.40453,0.189508,0.14788,0.333087,0.204087,0.243926,0.1642,0.105551,0.222545,0.183853
caveman,indep,net spoke PC1,0.167596,0.213104,0.18669,0.160516,0.151575,0.203246,0.139065,0.228139,0.231012,0.301739,...,0.249651,0.188658,0.17417,0.187016,0.260017,0.15892,0.088373,0.132292,0.249236,0.110823
caveman,indep,spoke 5% similarity,-0.267283,-0.271448,-0.291655,-0.110657,-0.321101,-0.253218,-0.105496,-0.37918,-0.368662,-0.196553,...,-0.369162,-0.32015,-0.185634,-0.39734,-0.332106,-0.354044,-0.114092,-0.0300367,-0.301073,-0.129046
caveman,indep,spoke 95% similarity,0.770833,0.906105,0.881366,0.845124,0.705407,0.780618,0.807523,0.815588,0.830682,0.83205,...,0.850287,0.698674,0.762401,0.793884,0.793884,0.801265,0.787037,0.823839,0.742,0.793361
caveman,indep,spoke PC1,0.356007,0.392184,0.375846,0.348715,0.345412,0.410325,0.32753,0.428915,0.422723,0.489493,...,0.429961,0.370795,0.389353,0.375183,0.453819,0.343516,0.282689,0.357237,0.454266,0.400555
caveman,indep,survey 5% similarity,-0.480858,-0.465191,-0.448069,-0.301127,-0.482257,-0.471916,-0.456227,-0.533481,-0.592599,-0.549006,...,-0.418934,-0.44806,-0.445848,-0.605129,-0.530969,-0.382967,-0.299744,-0.454362,-0.484487,-0.464423


In [12]:
# write a flat output file for the games, to be used in mediation analysis
measurements_flat = measurements.unstack(level=[0,1])
measurements_flat.loc['t'] = [1 if c[2]=="inter" else 0 for c in measurements_flat.columns]
measurements_flat.loc['network'] = [1 if c[1]=="caveman" else 0 for c in measurements_flat.columns]
measurements_flat.columns = ["GT_" + str(measurements_flat.loc['id'][c]) + ("_1" if c[2]=="inter" else "_0") for c in measurements_flat.columns]
measurements_flat.T.to_csv(output_dir+"end_of_game_measurements.csv")
measurements_flat.T

Unnamed: 0,confidence,consensus,id,net spoke 5% similarity,net spoke 95% similarity,net spoke PC1,spoke 5% similarity,spoke 95% similarity,spoke PC1,survey 5% similarity,survey 95% similarity,survey PC1,t,network
GT_jcfFiGC22Api8cm7o_0,60.7368,58.2632,jcfFiGC22Api8cm7o,-0.109421,0.173093,0.167596,-0.267283,0.770833,0.356007,-0.480858,0.577215,0.285638,0,1
GT_jcfFiGC22Api8cm7o_1,54.8,53.35,jcfFiGC22Api8cm7o,-0.051287,0.188156,0.147737,-0.243468,0.770392,0.338543,-0.307725,0.832922,0.330469,1,1
GT_Lj2up2ePZA6WtiK4q_0,53,48.7895,Lj2up2ePZA6WtiK4q,-0.135912,0.0961384,0.0444583,-0.11606,0.748331,0.276937,-0.415275,0.78428,0.280021,0,0
GT_Lj2up2ePZA6WtiK4q_1,55.1,49.95,Lj2up2ePZA6WtiK4q,-0.088976,0.153019,0.0876056,-0.26717,0.677797,0.286886,-0.487964,0.620877,0.328971,1,0
GT_oWf8QrmFrj8ZiE6JK_0,58.9474,64.5789,oWf8QrmFrj8ZiE6JK,-0.0464853,0.399166,0.213104,-0.271448,0.906105,0.392184,-0.465191,0.63842,0.435887,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GT_hjTbDvCzT2RW9pgm8_1,65.35,65.6,hjTbDvCzT2RW9pgm8,-0.00942991,0.130742,0.0384105,-0.25634,0.587616,0.221859,-0.46451,0.592063,0.340722,1,0
GT_R5JmsFyk7arzZrwiE_0,61.25,63.6,R5JmsFyk7arzZrwiE,-0.104817,0.183853,0.110823,-0.129046,0.793361,0.400555,-0.464423,0.578193,0.402418,0,1
GT_R5JmsFyk7arzZrwiE_1,60.4,58.45,R5JmsFyk7arzZrwiE,-0.183742,0.254854,0.169985,-0.355311,0.793884,0.348154,-0.477525,0.613346,0.287303,1,1
GT_d2N6gipGzAM2B3GPn_0,59.1,64.15,d2N6gipGzAM2B3GPn,-0.0506744,0.16707,0.0947598,-0.184017,0.690066,0.394813,-0.47493,0.638807,0.33135,0,0


In [13]:
# todo: drop id row for subsequent analysis
measurements.drop('id', level=2, inplace=True)
measurements.index = measurements.index.remove_unused_levels()

In [14]:
def bootstrap_mean(l, q=(2.5, 97.5), n=1000):
    "Basic bootstrap confidence intervals (q) with n resamples"
    return np.percentile([np.mean(np.random.choice(l, size=len(l))) for _ in range(n)], q=q)


def mean_result(measure1, _):
    return measure1.mean()

def mean_lowerbound(measure1, _):
    return bootstrap_mean(measure1)[0]

def mean_upperbound(measure1, _):
    return bootstrap_mean(measure1)[1]

def effect_size(measure1, measure2):
    return (measure1 - measure2).mean()

def effect_lowerbound(measure1, measure2):
    return bootstrap_mean(measure1 - measure2)[0]

def effect_upperbound(measure1, measure2):
    return bootstrap_mean(measure1 - measure2)[1]

def effect_p_val(measure1, measure2):
    return stats.ttest_rel(measure1, measure2)[1]

def effect_p_val_one_tail(measure1, measure2):
    return stats.ttest_rel(measure1, measure2)[1]/2

def make_table(measurements, func, network='dodec', treatment='indep'):
    rows = measurements.index.levels[2]
    cols = np.unique(measurements.index.droplevel(2))
    res = pd.DataFrame(index=rows, columns=cols)
    for row in rows:
        for col in cols:
            comparison = measurements.loc[col+tuple([row])].dropna()
            #reference = measurements.loc[('dodec', 'indep')+tuple([row])][comparison.index]
            reference = measurements.loc[(network, treatment)+tuple([row])][comparison.index]
            res.at[row, col] = func(comparison, reference)
    return res

In [15]:
res = make_table(measurements, mean_result)
res

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,56.0755,59.1306,55.3935,57.5618
consensus,57.6475,56.931,55.8923,55.2684
net spoke 5% similarity,-0.115098,-0.112007,-0.0827036,-0.0713472
net spoke 95% similarity,0.252084,0.255505,0.0959625,0.12206
net spoke PC1,0.18974,0.194293,0.0530893,0.0755307
spoke 5% similarity,-0.271291,-0.303028,-0.157664,-0.215429
spoke 95% similarity,0.803906,0.777318,0.68817,0.672197
spoke PC1,0.387681,0.391874,0.296064,0.292582
survey 5% similarity,-0.497605,-0.491553,-0.408854,-0.44298
survey 95% similarity,0.689841,0.704249,0.668364,0.664479


In [16]:
res_lowerbound = make_table(measurements, mean_lowerbound)
res_lowerbound

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,54.2185,57.244,53.0767,54.5643
consensus,55.7411,54.8468,53.8684,52.9639
net spoke 5% similarity,-0.134264,-0.12864,-0.0978846,-0.0927573
net spoke 95% similarity,0.224687,0.230246,0.0759711,0.100479
net spoke PC1,0.169609,0.175359,0.0404301,0.0596758
spoke 5% similarity,-0.309194,-0.335847,-0.193438,-0.24623
spoke 95% similarity,0.782607,0.757518,0.668767,0.644961
spoke PC1,0.36952,0.369743,0.278357,0.273112
survey 5% similarity,-0.529969,-0.527749,-0.438666,-0.467595
survey 95% similarity,0.66163,0.674998,0.642767,0.639452


In [17]:
res_upperbound = make_table(measurements, mean_upperbound)
res_upperbound

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,57.944,60.8472,57.904,60.3482
consensus,59.4605,58.9566,57.9096,57.6968
net spoke 5% similarity,-0.0975195,-0.0942266,-0.0682537,-0.0521282
net spoke 95% similarity,0.281542,0.280059,0.113777,0.145947
net spoke PC1,0.207929,0.215287,0.0670917,0.0940049
spoke 5% similarity,-0.234844,-0.263206,-0.122203,-0.184334
spoke 95% similarity,0.822883,0.797879,0.708554,0.699393
spoke PC1,0.406518,0.414314,0.31417,0.311288
survey 5% similarity,-0.466863,-0.450175,-0.376392,-0.416541
survey 95% similarity,0.712307,0.735543,0.694532,0.690183


In [18]:
eff = make_table(measurements, effect_size)
eff

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,0.681974,3.7371,0,2.16832
consensus,1.75515,1.0387,0,-0.623904
net spoke 5% similarity,-0.0323943,-0.0293038,0,0.0113564
net spoke 95% similarity,0.156121,0.159542,0,0.0260974
net spoke PC1,0.136651,0.141204,0,0.0224414
spoke 5% similarity,-0.113627,-0.145364,0,-0.057765
spoke 95% similarity,0.115736,0.0891476,0,-0.015973
spoke PC1,0.0916162,0.0958095,0,-0.00348228
survey 5% similarity,-0.0887504,-0.0826988,0,-0.0341256
survey 95% similarity,0.0214769,0.0358843,0,-0.00388585


In [19]:
eff_pval = make_table(measurements.dropna(axis=1), effect_p_val_one_tail)
eff_pval

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,0.322673,0.0119632,,0.0451382
consensus,0.0868613,0.230841,,0.311086
net spoke 5% similarity,0.00480436,0.0115603,,0.195875
net spoke 95% similarity,1.06329e-09,1.04357e-11,,0.0142176
net spoke PC1,1.73092e-11,2.81217e-12,,0.0126675
spoke 5% similarity,6.62032e-05,8.12011e-08,,0.00459607
spoke 95% similarity,2.09443e-09,7.13514e-08,,0.167968
spoke PC1,1.86737e-08,1.83911e-07,,0.389573
survey 5% similarity,6.34841e-05,0.00147917,,0.0583438
survey 95% similarity,0.123006,0.00581763,,0.4137


In [20]:
eff_lowerbound = make_table(measurements, effect_lowerbound)
eff_lowerbound

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,-2.10375,0.644177,0,-0.121202
consensus,-0.689012,-1.38337,0,-3.10756
net spoke 5% similarity,-0.0571775,-0.0509624,0,-0.0134539
net spoke 95% similarity,0.11945,0.128631,0,0.00544055
net spoke PC1,0.110264,0.117815,0,0.00451623
spoke 5% similarity,-0.161027,-0.183544,0,-0.0961195
spoke 95% similarity,0.0891501,0.0639893,0,-0.046414
spoke PC1,0.067414,0.0673154,0,-0.0268315
survey 5% similarity,-0.125618,-0.131284,0,-0.0743633
survey 95% similarity,-0.0135706,0.0104655,0,-0.0400188


In [21]:
eff_upperbound = make_table(measurements, effect_upperbound)
eff_upperbound

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,3.27269,6.92109,0,4.59122
consensus,4.14702,3.84964,0,1.96467
net spoke 5% similarity,-0.01078,-0.00538281,0,0.0369309
net spoke 95% similarity,0.188767,0.191345,0,0.0477789
net spoke PC1,0.160674,0.165302,0,0.0415609
spoke 5% similarity,-0.0614042,-0.103722,0,-0.0186676
spoke 95% similarity,0.144219,0.113329,0,0.0150134
spoke PC1,0.115482,0.121464,0,0.0190875
survey 5% similarity,-0.0502835,-0.0360001,0,0.00367095
survey 95% similarity,0.0554148,0.0618794,0,0.0299033


In [22]:
export_table = pd.DataFrame()
export_table["Result Mean"] = res.unstack()
export_table["Result Lower Bound"] = res_lowerbound.unstack()
export_table["Result Upper Bound"] = res_upperbound.unstack()
export_table["Effect Size"] = eff.unstack()
export_table["Effect Upper Bound"] = eff_upperbound.unstack()
export_table["Effect Lower Bound"] = eff_lowerbound.unstack()
export_table["Effect P Value"] = eff_pval.unstack()


export_table.to_csv(output_dir+"end_of_game_effects_summary.csv")
export_table

Unnamed: 0,Unnamed: 1,Result Mean,Result Lower Bound,Result Upper Bound,Effect Size,Effect Upper Bound,Effect Lower Bound,Effect P Value
"(caveman, indep)",confidence,56.0755,54.2185,57.944,0.681974,3.27269,-2.10375,0.322673
"(caveman, indep)",consensus,57.6475,55.7411,59.4605,1.75515,4.14702,-0.689012,0.0868613
"(caveman, indep)",net spoke 5% similarity,-0.115098,-0.134264,-0.0975195,-0.0323943,-0.01078,-0.0571775,0.00480436
"(caveman, indep)",net spoke 95% similarity,0.252084,0.224687,0.281542,0.156121,0.188767,0.11945,1.06329e-09
"(caveman, indep)",net spoke PC1,0.18974,0.169609,0.207929,0.136651,0.160674,0.110264,1.73092e-11
"(caveman, indep)",spoke 5% similarity,-0.271291,-0.309194,-0.234844,-0.113627,-0.0614042,-0.161027,6.62032e-05
"(caveman, indep)",spoke 95% similarity,0.803906,0.782607,0.822883,0.115736,0.144219,0.0891501,2.09443e-09
"(caveman, indep)",spoke PC1,0.387681,0.36952,0.406518,0.0916162,0.115482,0.067414,1.86737e-08
"(caveman, indep)",survey 5% similarity,-0.497605,-0.529969,-0.466863,-0.0887504,-0.0502835,-0.125618,6.34841e-05
"(caveman, indep)",survey 95% similarity,0.689841,0.66163,0.712307,0.0214769,0.0554148,-0.0135706,0.123006


# compare to the caveman baseline

In [23]:
eff = make_table(measurements, effect_size, network='caveman')
eff

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,0,3.05513,-0.681974,1.48635
consensus,0,-0.716453,-1.75515,-2.37906
net spoke 5% similarity,0,0.00309047,0.0323943,0.0437507
net spoke 95% similarity,0,0.00342085,-0.156121,-0.130024
net spoke PC1,0,0.00455352,-0.136651,-0.114209
spoke 5% similarity,0,-0.0317362,0.113627,0.0558623
spoke 95% similarity,0,-0.0265887,-0.115736,-0.131709
spoke PC1,0,0.00419327,-0.0916162,-0.0950985
survey 5% similarity,0,0.00605165,0.0887504,0.0546248
survey 95% similarity,0,0.0144075,-0.0214769,-0.0253627


In [24]:
eff_pval = make_table(measurements.dropna(axis=1), effect_p_val, network='caveman')
eff_pval

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,,0.0163334,0.645346,0.298291
consensus,,0.500056,0.173723,0.0645604
net spoke 5% similarity,,0.807965,0.00960872,0.00218251
net spoke 95% similarity,,0.866084,2.12657e-09,2.49554e-06
net spoke PC1,,0.722699,3.46184e-11,6.57352e-08
spoke 5% similarity,,0.261186,0.000132406,0.0308809
spoke 95% similarity,,0.0288426,4.18887e-09,7.58097e-08
spoke PC1,,0.745033,3.73473e-08,1.21139e-06
survey 5% similarity,,0.807714,0.000126968,0.0147092
survey 95% similarity,,0.408355,0.246012,0.171747


In [25]:
eff_lowerbound = make_table(measurements, effect_lowerbound, network='caveman')
eff_lowerbound

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,0,0.624942,-3.61331,-1.30872
consensus,0,-2.77881,-4.17769,-4.94378
net spoke 5% similarity,0,-0.0204528,0.0115398,0.0162717
net spoke 95% similarity,0,-0.0315796,-0.190152,-0.171642
net spoke PC1,0,-0.0196225,-0.162785,-0.141936
spoke 5% similarity,0,-0.0832308,0.0638545,0.00852617
spoke 95% similarity,0,-0.0486917,-0.140274,-0.167637
spoke PC1,0,-0.0161127,-0.114674,-0.123097
survey 5% similarity,0,-0.0426461,0.052509,0.0141827
survey 95% similarity,0,-0.0160445,-0.0547109,-0.0608156


In [26]:
eff_upperbound = make_table(measurements, effect_upperbound, network='caveman')
eff_upperbound

Unnamed: 0,"(caveman, indep)","(caveman, inter)","(dodec, indep)","(dodec, inter)"
confidence,0,5.52012,2.0907,4.19331
consensus,0,1.38052,0.612866,0.0048891
net spoke 5% similarity,0,0.0263142,0.0565033,0.0672817
net spoke 95% similarity,0,0.0403043,-0.120166,-0.0859765
net spoke PC1,0,0.0276954,-0.108491,-0.0814798
spoke 5% similarity,0,0.0281616,0.161661,0.0987032
spoke 95% similarity,0,-0.00550559,-0.0895696,-0.0926969
spoke PC1,0,0.0290817,-0.0659373,-0.0666239
survey 5% similarity,0,0.057544,0.126509,0.094245
survey 95% similarity,0,0.0464873,0.0123069,0.00806497


In [27]:
export_table = pd.DataFrame()
export_table["Result Mean"] = res.unstack()
export_table["Result Lower Bound"] = res_lowerbound.unstack()
export_table["Result Upper Bound"] = res_upperbound.unstack()
export_table["Effect Size"] = eff.unstack()
export_table["Effect Upper Bound"] = eff_upperbound.unstack()
export_table["Effect Lower Bound"] = eff_lowerbound.unstack()
export_table["Effect P Value"] = eff_pval.unstack()


export_table.to_csv(output_dir+"end_of_game_effects_summary_caveman_baseline.csv")
export_table

Unnamed: 0,Unnamed: 1,Result Mean,Result Lower Bound,Result Upper Bound,Effect Size,Effect Upper Bound,Effect Lower Bound,Effect P Value
"(caveman, indep)",confidence,56.0755,54.2185,57.944,0.0,0.0,0.0,
"(caveman, indep)",consensus,57.6475,55.7411,59.4605,0.0,0.0,0.0,
"(caveman, indep)",net spoke 5% similarity,-0.115098,-0.134264,-0.0975195,0.0,0.0,0.0,
"(caveman, indep)",net spoke 95% similarity,0.252084,0.224687,0.281542,0.0,0.0,0.0,
"(caveman, indep)",net spoke PC1,0.18974,0.169609,0.207929,0.0,0.0,0.0,
"(caveman, indep)",spoke 5% similarity,-0.271291,-0.309194,-0.234844,0.0,0.0,0.0,
"(caveman, indep)",spoke 95% similarity,0.803906,0.782607,0.822883,0.0,0.0,0.0,
"(caveman, indep)",spoke PC1,0.387681,0.36952,0.406518,0.0,0.0,0.0,
"(caveman, indep)",survey 5% similarity,-0.497605,-0.529969,-0.466863,0.0,0.0,0.0,
"(caveman, indep)",survey 95% similarity,0.689841,0.66163,0.712307,0.0,0.0,0.0,


# Interaction / Moderation

In [28]:
cave_effect = (measurements.xs(("caveman", "inter"), level=(0,1)) - measurements.xs(("caveman", "indep"), level=(0,1)))
dodec_effect = (measurements.xs(("dodec", "inter"), level=(0,1)) - measurements.xs(("dodec", "indep"), level=(0,1)))
df = pd.DataFrame((cave_effect - dodec_effect).mean(axis=1), columns=['Interaction'])
df['pval'] = stats.ttest_rel(cave_effect.T, dodec_effect.T)[1]
df

Unnamed: 0,Interaction,pval
confidence,0.886805,0.622754
consensus,-0.092549,0.953745
net spoke 5% similarity,-0.008266,0.631615
net spoke 95% similarity,-0.022677,0.267805
net spoke PC1,-0.017888,0.224151
spoke 5% similarity,0.026029,0.504185
spoke 95% similarity,-0.010616,0.614256
spoke PC1,0.007676,0.600682
survey 5% similarity,0.040177,0.22148
survey 95% similarity,0.018293,0.523234


# exploratory analysis
Total variance change - assuming the mean may also change.

In [26]:
measurements.swaplevel(2,0).loc['']

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
confidence,indep,caveman,60.7368,58.9474,62.35,49.5789,52.25,57.5,55.5,56.3889,65.85,61.8421,...,48.75,42.75,50.25,48.8421,57.6842,54.7,50.4,52.75,64.3889,61.25
consensus,indep,caveman,58.2632,64.5789,59.9,55.2632,52.65,54.4,53.2,51.1667,69.9,61.8421,...,48.35,48.85,55.3,52.1053,57.8947,54.95,56.5,52.25,64.9444,63.6
net spoke 5% similarity,indep,caveman,-0.111123,-0.0522341,-0.089682,-0.099385,-0.0875477,-0.0765501,-0.0392197,-0.149115,-0.125831,-0.128283,...,-0.105806,-0.114064,-0.144042,-0.15434,-0.20855,-0.237329,-0.100588,-0.0797204,-0.117188,-0.0899266
net spoke 95% similarity,indep,caveman,0.175356,0.401069,0.364013,0.21431,0.233472,0.217858,0.206372,0.341329,0.400767,0.24972,...,0.405398,0.193235,0.148096,0.32721,0.196599,0.242625,0.16412,0.0989161,0.226052,0.182973
net spoke PC1,indep,caveman,0.166436,0.215398,0.18643,0.15949,0.156671,0.216975,0.138372,0.226464,0.234374,0.30378,...,0.249723,0.189937,0.176859,0.185284,0.257358,0.164435,0.0882648,0.125604,0.252929,0.110283
spoke 5% similarity,indep,caveman,-0.267283,-0.271448,-0.291655,-0.110657,-0.322031,-0.259759,-0.105496,-0.37918,-0.370479,-0.196553,...,-0.369162,-0.322031,-0.187415,-0.39248,-0.332106,-0.354044,-0.114092,-0.00928344,-0.301073,-0.129046
spoke 95% similarity,indep,caveman,0.770833,0.906105,0.881366,0.845124,0.707087,0.793884,0.807523,0.815588,0.85549,0.83205,...,0.850287,0.700826,0.765868,0.793884,0.793884,0.801265,0.787037,0.823625,0.742,0.793361
spoke PC1,indep,caveman,0.356007,0.392184,0.375846,0.348715,0.338786,0.402638,0.32753,0.428915,0.421086,0.489493,...,0.429961,0.368541,0.38431,0.370714,0.453819,0.343516,0.282689,0.354856,0.454266,0.400555
survey 5% similarity,indep,caveman,-0.480858,-0.465191,-0.448069,-0.301127,-0.491501,-0.481266,-0.456227,-0.533481,-0.594349,-0.549006,...,-0.418934,-0.444454,-0.442717,-0.611374,-0.530969,-0.382967,-0.299744,-0.457911,-0.484487,-0.464423
survey 95% similarity,indep,caveman,0.577215,0.63842,0.738817,0.718672,0.679652,0.656122,0.694059,0.708197,0.570669,0.806997,...,0.712926,0.67434,0.710155,0.637065,0.764925,0.69992,0.783079,0.688364,0.682026,0.578193


In [54]:
measurements.T.columns

FrozenList([['caveman', 'dodec'], ['indep', 'inter'], ['confidence', 'consensus', 'net spoke 5% similarity', 'net spoke 95% similarity', 'net spoke PC1', 'spoke 5% similarity', 'spoke 95% similarity', 'spoke PC1', 'survey 5% similarity', 'survey 95% similarity', 'survey PC1']])

In [34]:
measurements_flat.loc['net spoke delta'] = measurements_flat.loc['net spoke 95% similarity'] - measurements_flat.loc['net spoke 5% similarity']
measurements_flat.loc['survey delta'] = measurements_flat.loc['survey 95% similarity'] - measurements_flat.loc['survey 5% similarity']
groups = measurements_flat.T.groupby(['t', 'network'])

In [37]:
groups[.groups

{(0,
  0): Index(['GT_Lj2up2ePZA6WtiK4q_0', 'GT_MfjKnfErfu8AMLKps_0',
        'GT_rtzgQPuxzH7dGbna4_0', 'GT_EFdcytJKJJAgFebvR_0',
        'GT_yAmdFLN5XZJ6SxeKL_0', 'GT_c2BKscFKSBaujzQHw_0',
        'GT_SamzHPveTXj44MS6F_0', 'GT_7kcXpJaQqJmx68nYz_0',
        'GT_mYHZ32AyKCmNrfSY5_0', 'GT_KtFjB6pDKRispFwCs_0',
        'GT_W3Pr3tyHAoPsyoZxh_0', 'GT_BhccgcKQe7bpH7pWi_0',
        'GT_nyj44mrAdH9BGGSnK_0', 'GT_TCrgRvccdKGHeDvdW_0',
        'GT_hE8iDmuY7wvbgTTRB_0', 'GT_Zxhtd5NFdLyNExLtN_0',
        'GT_BfWPcLgXY7dKcAsdX_0', 'GT_yBcW7BjFe59HDfshF_0',
        'GT_ETYJ4Ru9uE3Fszkkr_0', 'GT_e6Kq4iEPeKDdJ3wyk_0',
        'GT_dC7qKt8rhj8W3FFTC_0', 'GT_nkPob7gypQi6DtwqS_0',
        'GT_F6g9NDczqP966pA5m_0', 'GT_7c9biMCakuPCxMEXN_0',
        'GT_T9KCzCFiibi6W5mnL_0', 'GT_aS5rH5N5tnRsr3CQQ_0',
        'GT_RugWRCuQ9MounHF8x_0', 'GT_ig8z4GCN3uW4nFT54_0',
        'GT_hjTbDvCzT2RW9pgm8_0', 'GT_d2N6gipGzAM2B3GPn_0'],
       dtype='object'),
 (0,
  1): Index(['GT_jcfFiGC22Api8cm7o_0', 'GT_oWf8QrmFrj8ZiE6JK

In [None]:
for block in blocks:
    