# do analysis on same trials data saving for color/number model
# common trials here: the trials with the same distribution and same myCard number

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ["OMP_NUM_THREADS"] = "1"
import seaborn as sns 
import warnings
warnings.filterwarnings("ignore")
from functools import reduce


In [2]:
output_dir = r"10_RL_agent_TDlearn_output_same_trials_data"
os.makedirs(output_dir, exist_ok=True)


folder_path_participants = 'data_risk_added'



folder_path_greedy = "13_RL_agent_TDlearn_output/model_behavior"
folder_path_softmax = "13_RL_agent_TDlearn_output_softmax/model_behavior"
folder_path_rs = "13_RL_agent_TDlearn_output_risk_sensitive/model_behavior"
folder_path_wsls = "13_RL_agent_TDlearn_output_wsls/model_behavior"
folder_path_dualQ = "13_RL_agent_TDlearn_output_risk_dualQ/model_behavior"
folder_path_dualQ_no_eta = "13_RL_agent_TDlearn_output_risk_dualQ_no_eta/model_behavior"




df_participants = []
df_both_greedy = []
df_both_softmax = []
df_both_wsls = []
df_both_rs = []
df_both_dualQ = []
df_both_dualQ_no_eta = []
clean_names= []

def find_matching_csv(folder_path, df_list):
            for csv_file in os.listdir(folder_path):
                if clean_name in csv_file and csv_file.endswith('.csv'):
                    csv_path = os.path.join(folder_path, csv_file)
                    df_csv = pd.read_csv(csv_path)
                    df_list.append(df_csv)





for file_name in os.listdir(folder_path_participants):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path_participants, file_name)
        df = pd.read_excel(file_path)
        df = df[df['outcome'].str.lower() != 'na'].reset_index(drop=True)  
        df_participants.append(df)

        clean_name = file_name.removeprefix("task_data_").removesuffix(".xlsx")
        clean_names.append(clean_name)


        find_matching_csv(folder_path_greedy, df_both_greedy)
        find_matching_csv(folder_path_softmax, df_both_softmax)
        find_matching_csv(folder_path_wsls, df_both_wsls)
        find_matching_csv(folder_path_rs, df_both_rs)
        find_matching_csv(folder_path_dualQ, df_both_dualQ)
        find_matching_csv(folder_path_dualQ_no_eta, df_both_dualQ_no_eta)
        

In [3]:

df_counts = []
for df, name in zip(df_participants, clean_names):
    counts = df.groupby(['myCard', 'distribution']).size().reset_index(name=f'count_{name}')
    df_counts.append(counts)

common_trials = reduce(lambda left, right: pd.merge(left, right, on=['myCard', 'distribution']), df_counts)

count_cols = [col for col in common_trials.columns if col.startswith('count_')]
common_trials['min_count'] = common_trials[count_cols].min(axis=1)


total_common_trials = common_trials['min_count'].sum()
print("Total number of common trials:", total_common_trials)
common_trials

Total number of common trials: 181


Unnamed: 0,myCard,distribution,count_18_11_2024_13_31_43,count_22_03_2025_00_10_37,count_14_11_2024_21_46_47,count_18_03_2025_20_59_56,count_13_11_2024_14_45_52,count_19_11_2024_14_28_20,count_18_11_2024_15_43_17,count_26_03_2025_16_21_25,...,count_22_11_2024_15_19_47,count_17_11_2024_15_25_39,count_22_11_2024_12_34_30,count_26_11_2024_14_31_40,count_22_11_2024_14_36_42,count_17_11_2024_23_57_47,count_15_11_2024_11_43_48,count_12_11_2024_00_15_17,count_20_11_2024_09_23_29,min_count
0,1,high,4,1,3,1,1,2,2,2,...,3,2,3,2,3,2,2,1,2,1
1,1,low,17,13,16,18,17,16,15,17,...,19,18,22,23,21,19,15,17,15,13
2,1,uniform,11,8,9,7,9,11,10,7,...,8,9,7,9,10,8,8,8,10,7
3,2,high,5,3,4,5,4,6,4,4,...,6,5,4,5,3,6,4,4,2,2
4,2,low,16,18,15,12,16,16,17,17,...,18,16,14,15,21,15,18,15,16,11
5,2,uniform,13,10,8,12,9,10,11,9,...,12,9,11,9,7,11,8,9,8,7
6,3,high,7,5,6,5,6,4,7,3,...,7,6,6,8,7,8,6,6,8,3
7,3,low,13,13,13,12,14,13,15,13,...,14,11,14,14,15,12,16,18,18,11
8,3,uniform,10,10,9,12,6,10,10,7,...,12,10,8,12,8,8,11,10,12,6
9,4,high,6,7,9,9,7,10,8,8,...,9,8,7,9,9,6,6,7,8,5


# adding some columns to models

In [4]:
for df in df_participants:
    df['block_type'] = None

    df.loc[df['block'] == 1, 'block_type'] = 'uniform'     # Block 1 is uni
    df.loc[df['block'] == 4, 'block_type'] = 'mix'     # Block 4 is mix

    df.loc[(df['block'] == 2) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 2) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'low'), 'block_type'] = 'low'
    df.loc[(df['block'] == 3) & (df['distribution'] == 'high'), 'block_type'] = 'high'
    


for i in range(len(df_participants)):
    myCard = df_participants[i]['myCard']
    yourCard = df_participants[i]['yourCard']
    distributions = df_participants[i]['distribution']
    block_type = df_participants[i]['block_type']
    participant_outcome = df_participants[i]['outcome'].replace({'win': 1, 'lose': 0})
    arrowRT = df_participants[i]['arrowRT']
    spaceRT = df_participants[i]['spaceRT']
    risk = df_participants[i]['risk']
    
    for df_list in [df_both_greedy, df_both_softmax, df_both_wsls, df_both_rs, df_both_dualQ, df_both_dualQ_no_eta]:
        df_list[i]['block'] = df_participants[i]['block']
        df_list[i]['myCard'] = myCard
        df_list[i]['yourCard'] = yourCard
        df_list[i]['distribution'] = distributions
        df_list[i]['block_type'] = block_type
        df_list[i]['participant_outcome'] = participant_outcome
        df_list[i]['arrowRT'] = arrowRT
        df_list[i]['spaceRT'] = spaceRT
        df_list[i]['risk'] = risk




def calculate_outcomes(df_list):
    for df in df_list:
        outcomes = []
        for i in range(len(df)):
            my = df.loc[i, 'myCard']
            your = df.loc[i, 'yourCard']
            choice = df.loc[i, 'model_choices']

            if ((my > your and choice == 1) or (my < your and choice == 0)):
                outcomes.append('1') # win
            else:
                outcomes.append('0') # lose

        df['model_outcome'] = outcomes


for df_list in [df_both_greedy, df_both_softmax, df_both_wsls, df_both_rs, df_both_dualQ, df_both_dualQ_no_eta]:
    calculate_outcomes(df_list)


In [5]:
df_both_dualQ[1]

Unnamed: 0,model_choices,participant_choices,model_total_reward,participant_total_reward,q_val,block,myCard,yourCard,distribution,block_type,participant_outcome,arrowRT,spaceRT,risk,model_outcome
0,0,0,9.5,9.5,"[[[0.09769494872300748, -0.027194159319305494]...",1,4,3,uniform,uniform,0,2827,3434,0.375,0
1,0,0,10.0,10.0,"[[[0.09769494872300748, -0.027194159319305494]...",1,3,9,uniform,uniform,1,1936,2410,0.250,1
2,1,1,10.5,10.5,"[[[0.09769494872300748, -0.027194159319305494]...",1,9,2,uniform,uniform,1,1545,1105,0.000,1
3,0,1,10.0,11.0,"[[[0.09769494872300748, -0.027194159319305494]...",1,7,6,uniform,uniform,1,961,1582,0.250,0
4,1,1,10.5,11.5,"[[[0.09769494872300748, -0.027194159319305494]...",1,7,1,uniform,uniform,1,648,1541,0.250,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,0,0,56.0,76.0,"[[[0.4771400691841942, -0.027194159319305494],...",4,3,1,low,mix,0,852,1481,0.447,0
266,0,0,56.5,76.5,"[[[0.4771400691841942, -0.027194159319305494],...",4,4,7,uniform,mix,1,454,1313,0.375,1
267,0,0,57.0,77.0,"[[[0.4771400691841942, -0.027194159319305494],...",4,3,4,low,mix,1,374,1667,0.447,1
268,0,0,56.5,76.5,"[[[0.4771400691841942, -0.027194159319305494],...",4,5,4,high,mix,0,166,1665,0.250,0


# create data

In [6]:
final_dfs = []

for idx, name in enumerate(clean_names):
    participant_df = df_participants[idx]
    greedy_df = df_both_greedy[idx]
    softmax_df = df_both_softmax[idx]
    wsls_df = df_both_wsls[idx]
    rs_df = df_both_rs[idx]
    dualQ_df = df_both_dualQ[idx]
    dualQ_no_eta_df = df_both_dualQ_no_eta[idx]

    filtered_rows = []

    for _, row in common_trials.iterrows():
        myCard = row['myCard']
        dist = row['distribution']
        n = int(row['min_count'])

        mask = (participant_df['myCard'] == myCard) & (participant_df['distribution'] == dist)
        selected_indices = participant_df[mask].index[:n]

        for i in selected_indices:
            filtered_rows.append({
                'myCard': myCard,
                'distribution': dist,
                'model_choice_greedy': greedy_df.loc[i, 'model_choices'],
                'model_choice_softmax': softmax_df.loc[i, 'model_choices'],
                'model_choice_wsls': wsls_df.loc[i, 'model_choices'],
                'model_choice_rs': rs_df.loc[i, 'model_choices'],
                'model_choice_dualQ': dualQ_df.loc[i, 'model_choices'],
                'model_choice_dualQ_no_eta': dualQ_no_eta_df.loc[i, 'model_choices'],
                'participant_choice': greedy_df.loc[i, 'participant_choices'],
                'model_outcome_greedy': greedy_df.loc[i, 'model_outcome'],
                'model_outcome_softmax': softmax_df.loc[i, 'model_outcome'],
                'model_outcome_wsls': wsls_df.loc[i, 'model_outcome'],
                'model_outcome_rs': rs_df.loc[i, 'model_outcome'],
                'model_outcome_dualQ': dualQ_df.loc[i, 'model_outcome'],
                'model_outcome_dualQ_no_eta': dualQ_no_eta_df.loc[i, 'model_outcome'],
                'arrowRT': participant_df.loc[i, 'arrowRT'],
                'spaceRT': participant_df.loc[i, 'spaceRT'],
                'risk': participant_df.loc[i, 'risk'],
                'block_type': participant_df.loc[i, 'block_type'],
                'participant_outcome': greedy_df.loc[i, 'participant_outcome']
            })

    result_df = pd.DataFrame(filtered_rows)
    final_dfs.append(result_df)

    # Save each result
    output_path = os.path.join(output_dir, f"{name}.csv")
    result_df.to_csv(output_path, index=False)
