# Postprocess and anonymize empirica export

This `Python` notebook:
- combines the various information that empirica exports into a useable structure
- removes mechanical turk IDs and url parameters to anonymize data
- removes survey questions about fair pay, enough time, and general feedback
- combines into blocks that are analyzed together (you have to specify which games go in which blocks)
- exports each block to a file in the `results-anonymized` folder

In [1]:
import json
from os import path

## Load Data

In [38]:
#source_dir = "../results-sensitive/pilot/20200626/jsonl/"
source_dir = "../results-sensitive/experiment/data/ServerA/"
output_dir = "../results-anonymized/experiment/"


In [39]:
players = []
with open(source_dir + 'players.jsonl', 'r') as f:
    for line in f:
        player_data = json.loads(line)
        # remove identifying information
        player_data.pop('id')
        player_data.pop('urlParams')
        
        if 'data.survey' in player_data:
            player_data['data.survey'].pop('strength')
            player_data['data.survey'].pop('fair')
            player_data['data.survey'].pop('feedback')
            player_data['data.survey'].pop('time')
            
        if 'exitStatus' in player_data:    
            if player_data['exitStatus'] != 'gameFull':
                players.append(player_data)

games = []
with open(source_dir + 'games.jsonl', 'r') as f:
    for line in f:
        games.append(json.loads(line))
        
treatments = []
with open(source_dir + 'treatments.jsonl', 'r') as f:
    for line in f:
        treatments.append(json.loads(line))

logs = []
with open(source_dir + 'player-logs.jsonl', 'r') as f:
    for line in f:
        entry = json.loads(line)
        entry['data'] = json.loads(entry['jsonData'])
        logs.append(entry)  
        
stages = []
with open(source_dir + 'stages.jsonl', 'r') as f:
    for line in f:
        stages.append(json.loads(line)) 
        
# match games, players, treatments, and log info
loaded_games = []
for game in games:
    game['players'] = {pl['_id']:pl for pl in players if pl['_id'] in game['playerIds']}
    treatment = [t for t in treatments if t["_id"] == game['treatmentId']][0]
    game['gameSetupId'] = treatment['name']
    game['log'] = [l for l in logs if l['gameId'] == game['_id']]
    game['stages'] = [r for r in stages if r['gameId'] == game['_id']]
    
    loaded_games.append(game)
    
for i, game in enumerate(loaded_games):
    print(i, game['createdAt'], game['gameSetupId'])

0 2020-07-08T23:23:47.490Z playWithBots
1 2020-07-09T12:06:10.618Z playWithBots
2 2020-07-09T15:40:58.667Z playWithBots
3 2020-07-09T16:29:18.486Z block_3_caveman_prereg_20200702_170602
4 2020-07-09T16:29:18.739Z block_3_dodec_prereg_20200702_170602
5 2020-07-09T18:28:12.492Z block_5_dodec_prereg_20200702_170602
6 2020-07-09T18:28:30.595Z block_5_caveman_prereg_20200702_170602
7 2020-07-09T20:32:04.739Z block_8_dodec_prereg_20200702_170602
8 2020-07-09T20:32:27.358Z block_8_caveman_prereg_20200702_170602
9 2020-07-10T16:33:12.038Z block_12_dodec_prereg_20200702_170602
10 2020-07-10T16:33:18.935Z block_12_caveman_prereg_20200702_170602
11 2020-07-10T18:34:18.377Z block_15_dodec_prereg_20200702_170602
12 2020-07-10T18:34:52.110Z block_15_caveman_prereg_20200702_170602
13 2020-07-10T20:32:41.903Z block_17_dodec_prereg_20200702_170602
14 2020-07-10T20:33:44.500Z block_17_caveman_prereg_20200702_170602
15 2020-07-11T16:39:40.473Z block_19_caveman_prereg_20200702_170602
16 2020-07-11T16:40:2

## Construct Blocks
Each block contains a set of games that should be processed together

In [40]:
# adjust these to process new data
# Server A
blocks = {
    "block_3": [3,4],
    "block_5": [5,6],
    "block_8": [7,8],
    "block_12": [9,10],
    "block_15": [11,12],
    "block_17": [13,14],
    "block_19": [15,16],
    "block_20": [17,18],
    "block_21": [19,20],
    "block_22": [21,22],
    "block_23": [23,24],
    "block_24": [25,26],
    "block_25": [27,28],
    "block_27": [29,30],
    "block_28": [31,32],
    "block_29": [33,34],
}

# server B
# blocks = {
#     "block_4": [0,1],
#     "block_6": [2,3],
#     "block_9": [4,5],
#     "block_13": [6,7],
#     "block_16": [8,9],
#     "block_18": [10,11],
#     "block_26": [12,13]
# }

# server C
# blocks = {
#     "block_7": [0,1],
#     "block_10": [2,3],
#     "block_14": [4,5],
# }

# # server D
# blocks = {
#     "block_11": [0,1],
# }

# server T
# blocks = {
#     "block_0": [1,2],
#     "block_1": [3,4],
#     "block_2": [5,6],
# }

# blocks = {
#     "block_20200624_pilot": [1],
#     "block_20200626_pilot": [2],
# }

for block_name, games_list in blocks.items(): 
    outfile_name = output_dir + block_name + '.json'
    if path.exists(outfile_name):
        print(outfile_name + " already exists. Be careful if you don't want to overwrite")

../results-anonymized/experiment/block_3.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_5.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_8.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_12.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_15.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_17.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_19.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_20.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_21.json already exists. Be careful if you don't want to overwrite
../results-anonymized/experiment/block_22.json already exi

In [41]:
for block_name, games_list in blocks.items(): 
    obj = {loaded_games[i]['gameSetupId']: loaded_games[i] for i in games_list}
    outfile_name = output_dir + block_name + '.json'
    with open(outfile_name, 'w') as f:
        print("Writing " + str(games_list) + " to " + outfile_name)
        json.dump(obj, f)

Writing [3, 4] to ../results-anonymized/experiment/block_3.json
Writing [5, 6] to ../results-anonymized/experiment/block_5.json
Writing [7, 8] to ../results-anonymized/experiment/block_8.json
Writing [9, 10] to ../results-anonymized/experiment/block_12.json
Writing [11, 12] to ../results-anonymized/experiment/block_15.json
Writing [13, 14] to ../results-anonymized/experiment/block_17.json
Writing [15, 16] to ../results-anonymized/experiment/block_19.json
Writing [17, 18] to ../results-anonymized/experiment/block_20.json
Writing [19, 20] to ../results-anonymized/experiment/block_21.json
Writing [21, 22] to ../results-anonymized/experiment/block_22.json
Writing [23, 24] to ../results-anonymized/experiment/block_23.json
Writing [25, 26] to ../results-anonymized/experiment/block_24.json
Writing [27, 28] to ../results-anonymized/experiment/block_25.json
Writing [29, 30] to ../results-anonymized/experiment/block_27.json
Writing [31, 32] to ../results-anonymized/experiment/block_28.json
Writi