In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns 
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import FixedLocator
import statsmodels.api as sm

In [2]:
folder_path = 'data_epileptic'
dataframes = []


for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        
        # Calculate mean and standard deviation of spaceRT
        # Calculate 3*std for spaceRT and add as a new column we do this for knowing the spaceRT outliers and remove them in
        # calculating spaceRT cause these are rest somehow 
        # the spaceRT_2sd is obviously same in all columns cause it's based on all the spaceRTs. I just wanted to have everything
        # in the dataframe
        mean_spaceRT = df['spaceRT'].mean()
        std_spaceRT = df['spaceRT'].std()
        df['spaceRT_2sd'] = mean_spaceRT + 2 * std_spaceRT
        

        dataframes.append(df)

dataframes[5].head(3)

Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,spaceRT_2sd
0,na,uniform,960,na,2,3,3443,na,0,timeout,na,1,0,3545.971005
1,890,uniform,764,lose,4,3,4224,9.5,1,response,arrowdown,1,0,3545.971005
2,803,uniform,764,win,4,8,2262,10,2,response,arrowdown,1,0,3545.971005


In [3]:
for df in dataframes:
    df['block_type'] = None

    df.loc[df['block'] == 1, 'block_type'] = 'uniform'     # Block 1 is uni
    df.loc[df['block'] == 4, 'block_type'] = 'mix'     # Block 4 is mix

    # For blocks 2 and 3, set based on distribution
    df.loc[(df['block'] == 2) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 2) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    
dataframes[0].head(3)   

Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,spaceRT_2sd,block_type
0,2390,uniform,756,win,2,7,4186,10.5,0,response,arrowdown,1,0,2927.465284,uniform
1,1262,uniform,874,win,9,4,2040,11.0,1,response,arrowup,1,0,2927.465284,uniform
2,1547,uniform,791,win,5,6,1204,11.5,2,response,arrowdown,1,0,2927.465284,uniform


In [4]:
# 1) Define the risk dictionaries for each deck type
risk_uniform = {
    1: 0.0,
    2: 0.125,
    3: 0.25,
    4: 0.375,
    5: 0.50,
    6: 0.375,
    7: 0.25,
    8: 0.125,
    9: 0.0
}

risk_low = {
    1: 0.000,
    2: 0.243,
    3: 0.447,
    4: 0.385,
    5: 0.250,
    6: 0.146,
    7: 0.071,
    8: 0.023,
    9: 0.000
}

risk_high = {
    1: 0.000,
    2: 0.023,
    3: 0.071,
    4: 0.146,
    5: 0.250,
    6: 0.385,
    7: 0.447,
    8: 0.243,
    9: 0.000
}

# 2) Wrap them in one master dictionary keyed by distribution
risk_map = {
    'uniform': risk_uniform,
    'low':     risk_low,
    'high':    risk_high
    # If you have a 'mix' condition, decide how to handle or skip it
}

# 3) For each DataFrame in your list, create a 'risk' column
for df in dataframes:
    df['risk'] = df.apply(
        lambda row: risk_map.get(row['distribution'], {}).get(row['myCard'], np.nan),
        axis=1
    )
    

dataframes[1]


Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,spaceRT_2sd,block_type,risk
0,945,uniform,778,lose,1,3,6159,9.5,0,response,arrowup,1,0,12086.380379,uniform,0.000
1,na,uniform,902,na,3,7,9084,na,1,timeout,na,1,0,12086.380379,uniform,0.250
2,1203,uniform,872,lose,2,5,1100,9,2,response,arrowup,1,0,12086.380379,uniform,0.125
3,1003,uniform,965,lose,4,9,1208,8.5,3,response,arrowup,1,0,12086.380379,uniform,0.375
4,1562,uniform,807,win,8,3,5908,9,4,response,arrowup,1,0,12086.380379,uniform,0.125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,642,uniform,945,win,8,4,1381,54,5,response,arrowup,4,0,12086.380379,mix,0.125
276,1267,high,897,win,4,8,1343,54.5,94,response,arrowdown,4,0,12086.380379,mix,0.146
277,731,uniform,944,lose,5,7,2914,54,0,response,arrowup,4,1,12086.380379,mix,0.500
278,803,low,916,win,3,7,1408,54.5,71,response,arrowdown,4,1,12086.380379,mix,0.447


In [5]:
output_folder = 'data_risk_added_epileptic'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)


for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        # Read the Excel file
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        
        # Compute the new 'risk' column
        def compute_risk(row):
            dist = row['distribution']
            card = row['myCard']
            return risk_map.get(dist, {}).get(card, np.nan)
        
        df['risk'] = df.apply(compute_risk, axis=1)

        
        out_path = os.path.join(output_folder, file_name)
        df.to_csv(out_path, index=False)

print("All files processed and saved with 'risk' column in", output_folder)

All files processed and saved with 'risk' column in data_risk_added_epileptic


# remember to copy and paste "data_risk_added_epileptic" folder to the "RL_agent" folder!!!