In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns 
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import FixedLocator
import statsmodels.api as sm

In [2]:
folder_path = 'data'
dataframes = []


for file_name in os.listdir(folder_path):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_excel(file_path)
        
        # Calculate mean and standard deviation of spaceRT
        # Calculate 3*std for spaceRT and add as a new column we do this for knowing the spaceRT outliers and remove them in
        # calculating spaceRT cause these are rest somehow 
        # the spaceRT_2sd is obviously same in all columns cause it's based on all the spaceRTs. I just wanted to have everything
        # in the dataframe
        
        df['spaceRT'] = df['spaceRT'] - 1000

        mean_spaceRT = df['spaceRT'].mean()
        std_spaceRT = df['spaceRT'].std()
        df['spaceRT_2sd'] = mean_spaceRT + 2 * std_spaceRT
        

        dataframes.append(df)

dataframes[5].head(3)

Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,spaceRT_2sd
0,na,uniform,823,na,6,8,58166,na,0,timeout,na,1,0,38549.421248
1,na,uniform,917,na,3,8,299866,na,1,timeout,na,1,0,38549.421248
2,1031,uniform,892,lose,9,2,1913,9.5,2,response,arrowdown,1,0,38549.421248


In [3]:
for df in dataframes:
    df['block_type'] = None

    df.loc[df['block'] == 1, 'block_type'] = 'uniform'     # Block 1 is uni
    df.loc[df['block'] == 4, 'block_type'] = 'mix'     # Block 4 is mix

    # For blocks 2 and 3, set based on distribution
    df.loc[(df['block'] == 2) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 2) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    
dataframes[0].head(3)   

Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,spaceRT_2sd,block_type
0,2609,uniform,789,lose,4,2,335,9.5,0,response,arrowdown,1,0,2182.041944,uniform
1,597,uniform,853,win,9,4,407,10.0,1,response,arrowup,1,0,2182.041944,uniform
2,188,uniform,904,win,4,7,504,10.5,2,response,arrowdown,1,0,2182.041944,uniform


In [4]:
# 1) Define the risk dictionaries for each deck type
risk_uniform = {
    1: 0.0,
    2: 0.125,
    3: 0.25,
    4: 0.375,
    5: 0.50,
    6: 0.375,
    7: 0.25,
    8: 0.125,
    9: 0.0
}

risk_low = {
    1: 0.000,
    2: 0.243,
    3: 0.447,
    4: 0.385,
    5: 0.250,
    6: 0.146,
    7: 0.071,
    8: 0.023,
    9: 0.000
}

risk_high = {
    1: 0.000,
    2: 0.023,
    3: 0.071,
    4: 0.146,
    5: 0.250,
    6: 0.385,
    7: 0.447,
    8: 0.243,
    9: 0.000
}

# 2) Wrap them in one master dictionary keyed by distribution
risk_map = {
    'uniform': risk_uniform,
    'low':     risk_low,
    'high':    risk_high
    # If you have a 'mix' condition, decide how to handle or skip it
}

# 3) For each DataFrame in your list, create a 'risk' column
for df in dataframes:
    df['risk'] = df.apply(
        lambda row: risk_map.get(row['distribution'], {}).get(row['myCard'], np.nan),
        axis=1
    )
    

dataframes[1]


Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,spaceRT_2sd,block_type,risk
0,702,uniform,955,win,7,4,4576,10.5,0,response,arrowup,1,0,1383.158906,uniform,0.250
1,741,uniform,935,win,3,8,802,11,1,response,arrowdown,1,0,1383.158906,uniform,0.250
2,482,uniform,772,win,2,3,432,11.5,2,response,arrowdown,1,0,1383.158906,uniform,0.125
3,1356,uniform,873,lose,5,6,901,11,3,response,arrowup,1,0,1383.158906,uniform,0.500
4,480,uniform,970,lose,3,2,454,10.5,4,response,arrowdown,1,0,1383.158906,uniform,0.250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,1218,high,896,win,8,7,400,67,116,response,arrowup,4,0,1383.158906,mix,0.243
267,509,high,805,win,9,6,535,67.5,91,response,arrowup,4,0,1383.158906,mix,0.000
268,844,high,870,win,3,4,350,68,103,response,arrowdown,4,0,1383.158906,mix,0.071
269,870,uniform,901,lose,5,8,325,67.5,5,response,arrowup,4,0,1383.158906,mix,0.500


In [None]:
output_folder = 'data_risk_added'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for file_name in os.listdir(folder_path):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_excel(file_path)

        df['spaceRT'] = df['spaceRT'] - 1000

        mean_spaceRT = df['spaceRT'].mean()
        std_spaceRT = df['spaceRT'].std()
        df['spaceRT_2sd'] = mean_spaceRT + 2 * std_spaceRT

        def compute_risk(row):
            dist = row['distribution']
            card = row['myCard']
            return risk_map.get(dist, {}).get(card, np.nan)
        
        df['risk'] = df.apply(compute_risk, axis=1)

        out_path = os.path.join(output_folder, file_name)
        df.to_excel(out_path, index=False)

print("All files processed and saved in", output_folder)


All files processed and saved in data_risk_added


# remember to copy and paste "data_risk_added" folder to the "RL_agent" folder!!!