# Recovery Notebook

This notebook is for scraping the data from the input json files of the users to determine what remaining errors they left in each scenario

## Imports

In [2]:
import json
import os
import numpy as np
import pandas as pd

## Import Data

### Ground Truth Values

In [51]:
Q = [[5000, 1500, 2500], [0, 0, 1], [0, 1, 1], [1, 1, 0]]
Q = np.asarray(Q)

Y = [[4200, 1200, 500, 195, 140, 2000, 0], [0, 0, 0, 1, 1, 0, 0], [0, 1, 1, 0, 0, 1, 1], [1, 0, 0, 0, 0, 0, 0]]
Y = np.asarray(Y)

speed = [0.4, 0.7, 0.8]
speed = np.asarray(speed)

print(Q)
print(Y)
print(speed)

[[5000 1500 2500]
 [   0    0    1]
 [   0    1    1]
 [   1    1    0]]
[[4200 1200  500  195  140 2000    0]
 [   0    0    0    1    1    0    0]
 [   0    1    1    0    0    1    1]
 [   1    0    0    0    0    0    0]]
[0.4 0.7 0.8]


### Initialize DataFrame

In [43]:
data = pd.DataFrame(columns=['id', 'study_condition','scenario', 'Q_errors', 'Y_errors', 'speed_errors'])

### Get User Ids

In [44]:
directory = os.fsencode('user_data/user_json/')
ids = [id.decode() for id in os.listdir(directory)]

print(ids)

['10552', '17510', '20038', '21356', '22388', '25858', '31493', '48765', '61352', '63191', '75187', '80131', '86884', '89175', '89194', '99983']


In [67]:
index = 0

scenarios = ['low_2', 'low_4', 'low_5', 'low_6']

# Add loop for each id
files = os.listdir(directory.decode()+ids[0])

for id in ids:
    print(id)
    # Go through every file for a participant
    for file in files:
        # Skip No-Error Scenarios
        if not any(map(file.__contains__, scenarios)):
            continue
        
        # Save the scenario name
        for scenario in scenarios:
            if scenario in file:
                scenario_name = scenario
        
        with open('user_data/' + id + '.json') as f:
            d = json.load(f)
            study_condition = d[id]['warmup_1']['study_condition']
        
        # Import data from json file
        with open(directory.decode()+ids[0]+'/'+file) as f:
            d = json.load(f)
        
        Q_errors = 0
        Y_errors = 0
        speed_errors = 0
        
        for i, specie in enumerate(d['species']):
            # Check Species Traits
            if np.count_nonzero(specie['traits'] - Q[:, i]):
                Q_errors = Q_errors + 1
                
            # Check Species Speed    
            if np.count_nonzero(specie['speed'] - speed[i]):
                speed_errors = speed_errors + 1
            
        for i, task in enumerate(d['tasks']):
            # Check Task Requirements  
            if np.count_nonzero(task['desired_traits'] - Y[:, i]):
                Y_errors = Y_errors + 1
        
        data.loc[index] = [id, study_condition, scenario_name, Q_errors, Y_errors, speed_errors]
        index = index + 1


10552
17510
20038
21356
22388
25858
31493
48765
61352
63191
75187
80131
86884
89175
89194
99983


In [68]:
data

Unnamed: 0,id,study_condition,scenario,Q_errors,Y_errors,speed_errors
0,10552,1,low_5,0,3,0
1,10552,1,low_6,2,0,2
2,10552,1,low_4,1,1,1
3,10552,1,low_2,2,1,1
4,17510,2,low_5,0,3,0
...,...,...,...,...,...,...
59,89194,1,low_2,2,1,1
60,99983,2,low_5,0,3,0
61,99983,2,low_6,2,0,2
62,99983,2,low_4,1,1,1


# Analyze Data

In [81]:
study_condition_1 = data[data['study_condition'] == 1]
study_condition_2 = data[data['study_condition'] == 2]

print(study_condition_2)

       id  study_condition scenario  Q_errors  Y_errors  speed_errors
4   17510                2    low_5         0         3             0
5   17510                2    low_6         2         0             2
6   17510                2    low_4         1         1             1
7   17510                2    low_2         2         1             1
8   20038                2    low_5         0         3             0
9   20038                2    low_6         2         0             2
10  20038                2    low_4         1         1             1
11  20038                2    low_2         2         1             1
16  22388                2    low_5         0         3             0
17  22388                2    low_6         2         0             2
18  22388                2    low_4         1         1             1
19  22388                2    low_2         2         1             1
20  25858                2    low_5         0         3             0
21  25858           

In [76]:
print(np.average(study_condition_1['Q_errors'].tolist()))
print(np.average(study_condition_1['Y_errors'].tolist()))
print(np.average(study_condition_1['speed_errors'].tolist()))

1.25
1.25
1.0


In [77]:
print(np.average(study_condition_2['Q_errors'].tolist()))
print(np.average(study_condition_2['Y_errors'].tolist()))
print(np.average(study_condition_2['speed_errors'].tolist()))

1.25
1.25
1.0


In [None]:
for scenario in 