# do analysis on same trials data saving for color/number model
# common trials here: the trials with the same distribution and same myCard number

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ["OMP_NUM_THREADS"] = "1"
import seaborn as sns 
import warnings
warnings.filterwarnings("ignore")
from functools import reduce


In [2]:
output_dir = r"10_RL_agent_TDlearn_output_same_trials_data"
os.makedirs(output_dir, exist_ok=True)


folder_path_participants = 'data_risk_added'



folder_path_greedy = "13_RL_agent_TDlearn_output/model_behavior"
folder_path_softmax = "13_RL_agent_TDlearn_output_softmax/model_behavior"
folder_path_rs = "13_RL_agent_TDlearn_output_risk_sensitive/model_behavior"
folder_path_wsls = "13_RL_agent_TDlearn_output_wsls/model_behavior"
folder_path_dualQ = "13_RL_agent_TDlearn_output_risk_dualQ/model_behavior"





df_participants = []
df_both_greedy = []
df_both_softmax = []
df_both_wsls = []
df_both_rs = []
df_both_dualQ = []
clean_names= []

def find_matching_csv(folder_path, df_list):
            for csv_file in os.listdir(folder_path):
                if clean_name in csv_file and csv_file.endswith('.csv'):
                    csv_path = os.path.join(folder_path, csv_file)
                    df_csv = pd.read_csv(csv_path)
                    df_list.append(df_csv)





for file_name in os.listdir(folder_path_participants):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path_participants, file_name)
        df = pd.read_excel(file_path)
        df = df[df['outcome'].str.lower() != 'na'].reset_index(drop=True)  
        df_participants.append(df)

        clean_name = file_name.removeprefix("task_data_").removesuffix(".xlsx")
        clean_names.append(clean_name)


        find_matching_csv(folder_path_greedy, df_both_greedy)
        find_matching_csv(folder_path_softmax, df_both_softmax)
        find_matching_csv(folder_path_wsls, df_both_wsls)
        find_matching_csv(folder_path_rs, df_both_rs)
        find_matching_csv(folder_path_dualQ, df_both_dualQ)
        

In [3]:

df_counts = []
for df, name in zip(df_participants, clean_names):
    counts = df.groupby(['myCard', 'distribution']).size().reset_index(name=f'count_{name}')
    df_counts.append(counts)

common_trials = reduce(lambda left, right: pd.merge(left, right, on=['myCard', 'distribution']), df_counts)

count_cols = [col for col in common_trials.columns if col.startswith('count_')]
common_trials['min_count'] = common_trials[count_cols].min(axis=1)


total_common_trials = common_trials['min_count'].sum()
print("Total number of common trials:", total_common_trials)
common_trials

Total number of common trials: 180


Unnamed: 0,myCard,distribution,count_06_06_2025_16_43_26,count_07_04_2025_22_51_04,count_07_11_2024_17_23_43,count_08_11_2024_13_03_29,count_10_06_2025_16_06_19,count_11_11_2024_16_46_44,count_12_11_2024_00_15_17,count_13_11_2024_10_46_21,...,count_25_11_2024_07_37_11,count_25_11_2024_12_11_10,count_25_11_2024_18_41_38,count_25_11_2024_20_12_41,count_26_03_2025_16_21_25,count_26_11_2024_10_53_23,count_26_11_2024_14_31_40,count_28_11_2024_12_21_16,count_28_11_2024_22_38_25,min_count
0,1,high,2,1,3,3,1,2,1,3,...,3,1,2,2,2,2,2,1,2,1
1,1,low,15,17,15,15,14,17,17,20,...,16,16,18,20,17,15,23,16,16,13
2,1,uniform,10,10,11,7,10,11,8,9,...,10,9,11,8,7,12,9,10,11,7
3,2,high,4,5,4,2,6,5,4,2,...,5,3,4,3,4,4,5,5,3,2
4,2,low,18,19,16,14,15,19,15,15,...,17,18,16,16,17,16,15,15,17,11
5,2,uniform,11,11,9,8,12,9,9,10,...,9,9,9,8,9,11,9,9,12,7
6,3,high,5,8,5,11,4,5,6,8,...,5,7,6,8,3,5,8,3,8,3
7,3,low,14,16,11,16,16,11,18,14,...,13,14,12,15,13,15,14,17,16,11
8,3,uniform,11,11,9,8,11,8,10,12,...,9,12,9,10,7,7,12,11,8,6
9,4,high,5,10,7,6,7,9,7,9,...,9,11,9,7,8,12,9,10,8,5


# adding some columns to models

In [4]:
for df in df_participants:
    df['block_type'] = None

    df.loc[df['block'] == 1, 'block_type'] = 'uniform'     # Block 1 is uni
    df.loc[df['block'] == 4, 'block_type'] = 'mix'     # Block 4 is mix

    df.loc[(df['block'] == 2) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 2) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    


for i in range(len(df_participants)):
    myCard = df_participants[i]['myCard']
    yourCard = df_participants[i]['yourCard']
    distributions = df_participants[i]['distribution']
    block_type = df_participants[i]['block_type']
    participant_outcome = df_participants[i]['outcome'].replace({'win': 1, 'lose': 0})
    arrowRT = df_participants[i]['arrowRT']
    spaceRT = df_participants[i]['spaceRT']
    risk = df_participants[i]['risk']
    
    for df_list in [df_both_greedy, df_both_softmax, df_both_wsls, df_both_rs, df_both_dualQ]:
        df_list[i]['block'] = df_participants[i]['block']
        df_list[i]['myCard'] = myCard
        df_list[i]['yourCard'] = yourCard
        df_list[i]['distribution'] = distributions
        df_list[i]['block_type'] = block_type
        df_list[i]['participant_outcome'] = participant_outcome
        df_list[i]['arrowRT'] = arrowRT
        df_list[i]['spaceRT'] = spaceRT
        df_list[i]['risk'] = risk




def calculate_outcomes(df_list):
    for df in df_list:
        outcomes = []
        for i in range(len(df)):
            my = df.loc[i, 'myCard']
            your = df.loc[i, 'yourCard']
            choice = df.loc[i, 'model_choices']

            if ((my > your and choice == 1) or (my < your and choice == 0)):
                outcomes.append('1') # win
            else:
                outcomes.append('0') # lose

        df['model_outcome'] = outcomes


for df_list in [df_both_greedy, df_both_softmax, df_both_wsls, df_both_rs, df_both_dualQ]:
    calculate_outcomes(df_list)


In [5]:
df_both_dualQ[1]

Unnamed: 0,model_choices,participant_choices,model_total_reward,participant_total_reward,q_val,block,myCard,yourCard,distribution,block_type,participant_outcome,arrowRT,spaceRT,risk,model_outcome
0,1,1,9.5,9.5,"[[[0.17069383785767067, -0.22116849580349807],...",1,4,9,uniform,uniform,0,229,1402,0.375,0
1,1,1,9.0,9.0,"[[[0.17069383785767067, -0.22116849580349807],...",1,6,7,uniform,uniform,0,655,847,0.375,0
2,0,0,9.5,9.5,"[[[0.17069383785767067, -0.22116849580349807],...",1,2,8,uniform,uniform,1,1510,845,0.125,1
3,1,1,10.0,10.0,"[[[0.17069383785767067, -0.22116849580349807],...",1,9,6,uniform,uniform,1,610,981,0.000,1
4,0,0,10.5,10.5,"[[[0.17069383785767067, -0.22116849580349807],...",1,1,2,uniform,uniform,1,566,708,0.000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,1,1,44.0,61.0,"[[[0.486011951019617, -0.2932012527766039], [0...",4,7,2,low,mix,1,1546,8,0.071,1
266,1,0,44.5,60.5,"[[[0.486011951019617, -0.2932012527766039], [0...",4,3,1,uniform,mix,0,518,531,0.250,1
267,0,0,45.0,61.0,"[[[0.486011951019617, -0.2932012527766039], [0...",4,2,7,uniform,mix,1,780,210,0.125,1
268,1,1,45.5,61.5,"[[[0.486011951019617, -0.2932012527766039], [0...",4,7,2,uniform,mix,1,791,808,0.250,1


# create data

In [6]:
final_dfs = []

for idx, name in enumerate(clean_names):
    participant_df = df_participants[idx]
    greedy_df = df_both_greedy[idx]
    softmax_df = df_both_softmax[idx]
    wsls_df = df_both_wsls[idx]
    rs_df = df_both_rs[idx]
    dualQ_df = df_both_dualQ[idx]

    filtered_rows = []

    for _, row in common_trials.iterrows():
        myCard = row['myCard']
        dist = row['distribution']
        n = int(row['min_count'])

        mask = (participant_df['myCard'] == myCard) & (participant_df['distribution'] == dist)
        selected_indices = participant_df[mask].index[:n]

        for i in selected_indices:
            filtered_rows.append({
                'myCard': myCard,
                'distribution': dist,
                'model_choice_greedy': greedy_df.loc[i, 'model_choices'],
                'model_choice_softmax': softmax_df.loc[i, 'model_choices'],
                'model_choice_wsls': wsls_df.loc[i, 'model_choices'],
                'model_choice_rs': rs_df.loc[i, 'model_choices'],
                'model_choice_dualQ': dualQ_df.loc[i, 'model_choices'],
                'participant_choice': greedy_df.loc[i, 'participant_choices'],
                'model_outcome_greedy': greedy_df.loc[i, 'model_outcome'],
                'model_outcome_softmax': softmax_df.loc[i, 'model_outcome'],
                'model_outcome_wsls': wsls_df.loc[i, 'model_outcome'],
                'model_outcome_rs': rs_df.loc[i, 'model_outcome'],
                'model_outcome_dualQ': dualQ_df.loc[i, 'model_outcome'],
                'arrowRT': participant_df.loc[i, 'arrowRT'],
                'spaceRT': participant_df.loc[i, 'spaceRT'],
                'risk': participant_df.loc[i, 'risk'],
                'block_type': participant_df.loc[i, 'block_type'],
                'participant_outcome': greedy_df.loc[i, 'participant_outcome']
            })

    result_df = pd.DataFrame(filtered_rows)
    final_dfs.append(result_df)

    # Save each result
    output_path = os.path.join(output_dir, f"{name}.csv")
    result_df.to_csv(output_path, index=False)
