In [45]:
import pandas as pd
from itertools import product
import matplotlib.pyplot as plt
import numpy as np

In [46]:
df = pd.read_csv("data/barcelona_data2.csv")

In [47]:
# Selects the columns that will be useful
df = df[["subject","horizon", "nte", "trial", "order", "big","right", "first", "performance", "stimuli", "presented_stimuli"]]
df[df["horizon"] == 2]

Unnamed: 0,subject,horizon,nte,trial,order,big,right,first,performance,stimuli,presented_stimuli
300,1.0,2.0,1.0,1,4,1.0,-1.0,True,,0.605,"[0.605, 0.455]"
301,1.0,2.0,2.0,2,4,1.0,1.0,True,,0.265,"[0.265, 0.415]"
302,1.0,2.0,3.0,3,4,1.0,1.0,False,0.151515,0.075,"[0.075, 0.225]"
303,1.0,2.0,1.0,4,4,0.0,-1.0,False,,0.395,"[0.395, 0.545]"
304,1.0,2.0,2.0,5,4,1.0,-1.0,False,,0.735,"[0.355, 0.205]"
...,...,...,...,...,...,...,...,...,...,...,...
10795,18.0,2.0,2.0,86,3,0.0,1.0,False,,0.790,"[0.79, 0.59]"
10796,18.0,2.0,3.0,87,3,1.0,1.0,False,1.000000,0.780,"[0.78, 0.98]"
10797,18.0,2.0,1.0,88,3,0.0,1.0,True,,0.535,"[0.535, 0.525]"
10798,18.0,2.0,2.0,89,3,0.0,-1.0,True,,0.715,"[0.715, 0.725]"


In [48]:
# Deletes all the performances in tries that are not final
df.loc[(df["horizon"] != df["nte"] - 1) & (~df["performance"].isna()), "performance"] = np.NaN
# Reformats the big and right columns to boolean
df["big"] = df["big"] == 1
df["right"] = df["right"] == 1
df.loc[df["first"].isna(), "first"] = False


In [49]:
def correct_errors(row):
    presented_stimuli = list(map(float, row["presented_stimuli"].strip("[]").split(", ")))
    stimuli = row["stimuli"]
    big = row["big"]
    right = row["right"]

    if big and stimuli != max(presented_stimuli):
        print("presented:", presented_stimuli, ", stimuli:", stimuli)
        print("not good big, stimuli:", stimuli, ", bigger:", max(presented_stimuli))
        row["stimuli"] = max(presented_stimuli)
    elif not big and stimuli != min(presented_stimuli):
        print("presented:", presented_stimuli, ", stimuli:", stimuli)
        print("not good small, stimuli:", stimuli, ", bigger:", min(presented_stimuli))
        row["stimuli"] = min(presented_stimuli)
    return row

df = df.apply(correct_errors, axis=1)

presented: [0.26, 0.06] , stimuli: nan
not good big, stimuli: nan , bigger: 0.26
presented: [0.175, 0.165] , stimuli: 0.175
not good small, stimuli: 0.175 , bigger: 0.165
presented: [0.745, 0.795] , stimuli: 0.745
not good big, stimuli: 0.745 , bigger: 0.795
presented: [0.36, 0.46] , stimuli: 0.36
not good big, stimuli: 0.36 , bigger: 0.46
presented: [0.52, 0.72] , stimuli: 0.52
not good big, stimuli: 0.52 , bigger: 0.72
presented: [0.215, 0.225] , stimuli: 0.215
not good big, stimuli: 0.215 , bigger: 0.225
presented: [0.095, 0.245] , stimuli: 0.095
not good big, stimuli: 0.095 , bigger: 0.245
presented: [0.07, 0.17] , stimuli: 0.07
not good big, stimuli: 0.07 , bigger: 0.17
presented: [0.47, 0.67] , stimuli: 0.47
not good big, stimuli: 0.47 , bigger: 0.67
presented: [0.385, 0.375] , stimuli: 0.385
not good small, stimuli: 0.385 , bigger: 0.375
presented: [0.785, 0.775] , stimuli: 0.785
not good small, stimuli: 0.785 , bigger: 0.775
presented: [0.24, 0.34] , stimuli: 0.24
not good big,

In [50]:
df.to_csv("data/data_cleared_barcelona2.csv")
