# Import libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.core.debugger import set_trace
import json
from matplotlib import colors

# Load data

In [3]:
PATH = "data/rivka-necklace-rep-data/psynet/data/"
nodes = pd.read_csv(PATH + "node.csv", low_memory=False)
networks = pd.read_csv(PATH + "network.csv", low_memory=False)

# Preprocess data

In [4]:
# filter networks
network_data = networks
network_data = network_data[network_data["role"] == "experiment"]
network_data = network_data[network_data["failed"] == 'f']
network_data = network_data[network_data["trial_maker_id"] == 'graph_experiment']

experiment_net_id = list(network_data['id'].to_numpy())

# filter nodes
node_data = nodes
node_data = node_data[nodes["type"] == "graph_chain_node"]
node_data = node_data[node_data["failed"] == "f"]
node_data = node_data[node_data["network_id"].isin(experiment_net_id)]
node_data = node_data[["id", "network_id", "degree", "definition", "seed", "vertex_id", "dependent_vertex_ids"]]
node_data = node_data.sort_values(["network_id", "degree"])

In [5]:
node_data

Unnamed: 0,id,network_id,degree,definition,seed,vertex_id,dependent_vertex_ids
3,2,2,1.0,"[{""content"": [0, 0, 0, 0, 1, 1, 1, 0, 2], ""is_...","[{""content"": [0, 0, 0, 0, 1, 1, 1, 0, 2], ""is_...",0.0,"[1, 42, 6, 7]"
180,144,2,2.0,"[{""content"": [0, 0, 0, 0, 1, 1, 1, 0, 2], ""is_...","[{""content"": [0, 0, 0, 0, 1, 1, 1, 0, 2], ""is_...",0.0,"[1, 42, 6, 7]"
84,178,2,3.0,"[{""content"": [1, 1, 1, 1, 1, 1, 1, 1, 1], ""is_...","[{""content"": [1, 1, 1, 1, 1, 1, 1, 1, 1], ""is_...",0.0,"[1, 42, 6, 7]"
297,211,2,4.0,"[{""content"": [1, 1, 1, 1, 1, 1, 1, 1, 1], ""is_...","[{""content"": [1, 1, 1, 1, 1, 1, 1, 1, 1], ""is_...",0.0,"[1, 42, 6, 7]"
204,273,2,5.0,"[{""content"": [2, 2, 2, 2, 1, 2, 2, 2, 2], ""is_...","[{""content"": [2, 2, 2, 2, 1, 2, 2, 2, 2], ""is_...",0.0,"[1, 42, 6, 7]"
...,...,...,...,...,...,...,...
682,880,50,17.0,"[{""content"": [2, 2, 2, 2, 2, 2, 2, 2, 2], ""is_...","[{""content"": [2, 2, 2, 2, 2, 2, 2, 2, 2], ""is_...",48.0,"[6, 47, 41, 42]"
853,922,50,18.0,"[{""content"": [1, 1, 1, 1, 2, 1, 2, 1, 1], ""is_...","[{""content"": [1, 1, 1, 1, 2, 1, 2, 1, 1], ""is_...",48.0,"[6, 47, 41, 42]"
967,950,50,19.0,"[{""content"": [1, 1, 1, 1, 1, 1, 1, 1, 1], ""is_...","[{""content"": [1, 1, 1, 1, 1, 1, 1, 1, 1], ""is_...",48.0,"[6, 47, 41, 42]"
968,1012,50,20.0,"[{""content"": [1, 1, 1, 1, 2, 1, 2, 1, 1], ""is_...","[{""content"": [1, 1, 1, 1, 2, 1, 2, 1, 1], ""is_...",48.0,"[6, 47, 41, 42]"


# Export data to csv

In [6]:
node_data.to_csv("rivka-necklace-rep-data.csv")

In [8]:
# Most common necklaces that people produced
node_data = node_data.sort_values(["degree"])
necklace_freqs = {}


        id  network_id  degree  \
3        2           2     1.0   
38      26          14     1.0   
87      75          32     1.0   
79      67          28     1.0   
51      36          19     1.0   
...    ...         ...     ...   
1021  1047          31    21.0   
1089  1077          27    21.0   
1083  1067          22    21.0   
1042  1063          19    21.0   
975   1059          50    21.0   

                                             definition  \
3     [{"content": [0, 0, 0, 0, 1, 1, 1, 0, 2], "is_...   
38    [{"content": [2, 2, 0, 2, 1, 0, 1, 1, 2], "is_...   
87    [{"content": [2, 0, 2, 0, 2, 2, 2, 2, 1], "is_...   
79    [{"content": [0, 2, 0, 0, 2, 0, 1, 0, 0], "is_...   
51    [{"content": [0, 1, 0, 1, 1, 1, 1, 1, 0], "is_...   
...                                                 ...   
1021  [{"content": [1, 1, 1, 1, 1, 1, 1, 1, 1], "is_...   
1089  [{"content": [0, 0, 0, 0, 0, 0, 0, 0, 0], "is_...   
1083  [{"content": [1, 1, 1, 1, 1, 1, 1, 1, 1], "is_...   
1

In [None]:
# Experiment Cost and Demographics

In [7]:
participants = pd.read_csv(PATH + "participant.csv", low_memory=False)

In [9]:
valid_participants = participants[participants["complete"] == "t"]
valid_participants = valid_participants[valid_participants["failed"] == "f"]
experiment_summary = {
    "N_participants": valid_participants.shape[0],
    "cost": participants["base_pay"].sum() + participants["bonus"].sum()
}

experiment_summary

{'N_participants': 45, 'cost': 91.71999999999998}