# this is a risk sensitive model based on the risks that we have given to each number
# here q table is the table of risk evaluation

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import seaborn as sns
from sklearn.metrics import confusion_matrix
np.random.seed(42)
from joblib import Parallel, delayed
import matplotlib.tri as tri
import matplotlib.colors as mcolors
from scipy.interpolate import griddata
from scipy.interpolate import RBFInterpolator
import matplotlib.ticker as mticker
import itertools
from sklearn.metrics import r2_score


# important directories

In [2]:
folder_path = 'data_risk_added_epileptic'


output_dir_model_evaluation = "13_RL_agent_TDlearn_output_risk_dualQ"
os.makedirs(output_dir_model_evaluation, exist_ok=True)

output_dir_plots = os.path.join(output_dir_model_evaluation, "plots")
os.makedirs(output_dir_plots, exist_ok=True)

output_dir_model_behavior = os.path.join(output_dir_model_evaluation, "model_behavior")
os.makedirs(output_dir_model_behavior, exist_ok=True)

In [3]:

dataframes = [pd.read_csv(os.path.join(folder_path, file)) for file in os.listdir(folder_path) if file.endswith('.csv')]


n_participant = len(dataframes)
print(f"there are {n_participant} participants.")


dataframes[0]

there are 7 participants.


Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,is_within_IQR,risk
0,2390,uniform,756,win,2,7,3186,10.5,0,response,arrowdown,1,0,0,0.125
1,1262,uniform,874,win,9,4,1040,11,1,response,arrowup,1,0,0,0.000
2,1547,uniform,791,win,5,6,204,11.5,2,response,arrowdown,1,0,1,0.500
3,1627,uniform,828,win,9,1,872,12,3,response,arrowup,1,0,0,0.000
4,459,uniform,894,win,6,4,790,12.5,4,response,arrowup,1,0,0,0.375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,514,high,847,win,8,7,429,85,134,response,arrowup,4,0,1,0.243
268,179,high,902,win,4,8,211,85.5,115,response,arrowdown,4,0,1,0.146
269,419,low,815,lose,8,9,353,85,81,response,arrowup,4,0,1,0.023
270,731,low,941,lose,3,1,212,84.5,59,response,arrowdown,4,0,1,0.447


### I want to make participant file name for the model_evaluation.csv and that is I'm gonna take each data name task_data_07_11_2024_17_23_43.xlsx and extract "07_11_2024_17_23_43" and this should be the participant name in the dataset. 

In [4]:
participants = [os.path.splitext(file)[0].replace("task_data_", "")
    for file in os.listdir(folder_path) if file.endswith('.csv')]

# policy initilization for the model
now I need to find the prior policy amounts. for that I am going to put the percentage of downarrow and up arrow for each distribution.

In [5]:
# df_combined = pd.concat(dataframes, ignore_index=True)

# df_combined = df_combined[df_combined['outcome'].str.lower() != 'na'].reset_index(drop=True)
 

# desired_order = ["uniform", "low", "high"]  


# cards_sorted = sorted(df_combined["myCard"].unique())
# dist_sorted = [d for d in desired_order if d in df_combined["distribution"].unique()]
# choice_sorted = sorted(df_combined["choice"].unique())


# card_idx = {card: i for i, card in enumerate(cards_sorted)}
# dist_idx = {dist: i for i, dist in enumerate(dist_sorted)}
# choice_idx = {choice: i for i, choice in enumerate(choice_sorted)}


# matrix_3d = np.zeros((len(cards_sorted), len(dist_sorted), len(choice_sorted)))


# for _, row in df_combined.iterrows():
#     i = card_idx[row["myCard"]]-1
#     j = dist_idx[row["distribution"]]
#     k = choice_idx[row["choice"]]
#     matrix_3d[i, j, k] += 1  


# total_per_card_dist = matrix_3d.sum(axis=2, keepdims=True)

# # compute percentages, avoiding division by zero
# with np.errstate(divide='ignore', invalid='ignore'):
#     percentage_matrix = np.divide(matrix_3d, total_per_card_dist, where=total_per_card_dist != 0)

# # convert to a DataFrame for easy visualization
# percentage_list = []
# for i, card in enumerate(cards_sorted):
#     for j, dist in enumerate(dist_sorted):
#         for k, choice in enumerate(choice_sorted):
#             percentage_list.append({
#                 "myCard": card,
#                 "distribution": dist,  # Now follows "uniform", "low", "high" order
#                 "choice": choice,
#                 "percentage": percentage_matrix[i, j, k]
#             })

# df_percentages = pd.DataFrame(percentage_list)
# df_percentages

In [6]:
# np.shape(percentage_matrix)

In [7]:
actions = { "arrowdown": 0, "arrowup": 1}
distributions_map = { "uniform": 0, "low": 1,  "high": 2}
card_numbers = list(range(1, 10))

# policy_table = percentage_matrix 

Q_table_init = np.random.normal(0, 0.1, (len(card_numbers), len(distributions_map), len(actions)))
# having a q-table based on the policies
# Q_table_init = policy_table * np.mean(Q_table_init) 

Q_table = Q_table_init.copy()

#############################################################################################
# having a q-table that starts with 0! this was not a good initilization so i changed it.
# Q_table = np.zeros((len(distributions_map), len(actions)))  # 3 distributions × 2 actions
#############################################################################################

# print("policy: \n",np.shape(policy_table))
print("\n Q_table: \n",np.shape(Q_table))




 Q_table: 
 (9, 3, 2)


In [8]:
def softmax(Q_values, beta):
    # this part subtracts the maximum q-value in each row it means each state to improve numerical stability.
    # because exxponentials of large numbers can lead to overflow errors, so shifting q-values avoids this problem.
    
    Q_shifted = Q_values - np.max(Q_values, axis=2, keepdims=True)
    exps = np.exp(beta * Q_shifted)
    sums = np.sum(exps, axis=2, keepdims=True)
    new_probs = exps / sums

    return new_probs




def train_dualQ_risk_sensitive(df, alpha_r, alpha_s, beta, eta, Qr_init=None, Qs_init=None):
    if Qr_init is None:
        Qr_init = Q_table.copy()
    if Qs_init is None:
        Qs_init = Q_table.copy()

    Qr = Qr_init.copy()
    Qs = Qs_init.copy()

    q_value_pairs = []
    choices = []
    predicted_probs = []
    distributions = []
    card_numbers = []

    for _, row in df.iterrows():
        action = actions[row["choice"]]
        distribution = distributions_map[row["distribution"]]
        card_number = row["myCard"] - 1
        reward = 0.5 if row["outcome"] == "win" else -0.5
        risk = row["risk"]  

        Q_combined = Qr - eta * Qs
        probs = softmax(Q_combined, beta)
        predicted_probs.append(probs[card_number][distribution][action])

        Qr[card_number][distribution][action] += alpha_r * (reward - Qr[card_number][distribution][action])
        Qs[card_number][distribution][action] += alpha_s * (risk - Qs[card_number][distribution][action])

        q_value_pairs.append(Q_combined.copy())
        choices.append(action)
        distributions.append(distribution)
        card_numbers.append(card_number)

    return np.array(q_value_pairs), np.array(choices), np.array(predicted_probs), np.array(distributions), np.array(card_numbers)



def compute_log_likelihood(alpha_r, alpha_s, beta, eta, df_all):
    q_values, choices, predicted_probs, distributions, card_numbers = train_dualQ_risk_sensitive(
        df_all, alpha_r, alpha_s, beta, eta
    )
    predicted_probs = np.clip(predicted_probs, 1e-6, 1)
    log_likelihood = np.sum(np.log(predicted_probs))
    return (alpha_r, alpha_s, beta, eta, log_likelihood)



In [9]:
num_of_samples = 30
alpha_min = 0
alpha_max = 1
beta_min = 0
beta_max  = 10
eta_min = -1
eta_max = 1

alpha_r_samples = np.random.uniform(alpha_min, alpha_max + np.finfo(float).eps, num_of_samples)
alpha_s_samples = np.random.uniform(alpha_min, alpha_max + np.finfo(float).eps, num_of_samples)
beta_samples = np.random.uniform(beta_min, beta_max + np.finfo(float).eps, num_of_samples)
eta_samples = np.random.uniform(eta_min, eta_max + np.finfo(float).eps, num_of_samples)

In [10]:
BIC_models = []
AIC_models = []
best_alpha_r_models = []
best_alpha_s_models = []
best_eta_models = []
best_beta_models = []
accuracy_models = []
precision_models = []
sensitivity_recall_models = []
specificity_models = []
f1_score_models = []
mcFadden_r2_models = []
r2_models = []

for idx, df_all in enumerate(dataframes):
    Q_init_participant = Q_table.copy()
    
    df_all = df_all[df_all['outcome'].str.lower() != 'na'].reset_index(drop=True)


    results = Parallel(n_jobs=-1, backend='loky')(
    delayed(compute_log_likelihood)(alpha_r, alpha_s, beta, eta, df_all)
    for alpha_r in alpha_r_samples
    for alpha_s in alpha_s_samples
    for beta in beta_samples
    for eta in eta_samples)



    alpha_beta_log_likelihood = {}
    best_log_likelihood = -np.inf


    for alpha_r, alpha_s, beta, eta, log_likelihood in results:
        if log_likelihood > best_log_likelihood:
            best_log_likelihood = log_likelihood
            best_alpha_r = alpha_r
            best_alpha_s = alpha_s
            best_beta = beta
            best_eta = eta



    # model prediction 
    
    q_values, choices, predicted_probs, distributions, card_numbers = train_dualQ_risk_sensitive(df_all, best_alpha_r, best_alpha_s, best_beta, best_eta)

    
    
    predicted_choices = []
    for trial in range(len(card_numbers)):
        test_action_probs = softmax(q_values[trial], best_beta)
        p_arrowup = test_action_probs[card_numbers[trial]][distributions[trial]][actions["arrowup"]]
        p_arrow_down = test_action_probs[card_numbers[trial]][distributions[trial]][actions["arrowdown"]]
        # choosing 1 or 0 based on the softmax probabilities:
        predicted_choices.append(np.random.choice([1, 0], p=[p_arrowup, p_arrow_down]))


    # finding out model total reward based on the model's predicted choices
    total_reward = [] 
    for i in range(len(predicted_choices)):
        if len(total_reward)> 0:
            last_reward = total_reward[-1]  #  the last reward value
        else:
            last_reward = 10 # initial reward is $10
        
        if ((df_all.loc[i, 'myCard'] > df_all.loc[i, 'yourCard'] and predicted_choices[i] == 1) or
            (df_all.loc[i, 'myCard'] < df_all.loc[i, 'yourCard'] and predicted_choices[i] == 0)):
            total_reward.append(last_reward + 0.5)
        else:
            total_reward.append(last_reward - 0.5)

    
    
    # confusion matrix:
    conf_matrix = confusion_matrix(choices, predicted_choices)
    TN, FP, FN, TP = conf_matrix.ravel()  # unpacking the confusion matrix
    # acc
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    # precision: From the ones that we’ve announced them as up/down, which ones are really up/down?
    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    # recall or sensitivity : true positive rate
    sensitivity_recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    # specificity : true negative rate
    specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    # f1 Score
    f1_score = 2 * (precision * sensitivity_recall) / (precision + sensitivity_recall) if (precision + sensitivity_recall) != 0 else 0

    
    # bayes information criterion:
    n_trials = len(df_all)
    k = 4  # number of free parameters: alpha and beta
    BIC = k * np.log(n_trials) - 2 * best_log_likelihood # this is BIC formula based on the log lkelihode I found before

        # Akaike  information criterion(AIC):
    AIC = 2 * k - 2 * best_log_likelihood 

    # mcFadden r-squared:
    p_null = np.mean(choices)  # probability of choosing "1" in the dataset
    log_likelihood_null = np.sum(choices * np.log(p_null) + (1 - choices) * np.log(1 - p_null))
    mcFadden_r2 = 1 - (best_log_likelihood / log_likelihood_null)

    # r-squared
    r2 = r2_score(choices, predicted_choices)
    
    
    # saving models evaluation variables:
    best_alpha_r_models.append(best_alpha_r)
    best_alpha_s_models.append(best_alpha_s)
    best_eta_models.append(best_eta)
    best_beta_models.append(best_beta)
    BIC_models.append(BIC)
    AIC_models.append(AIC)
    accuracy_models.append(accuracy)
    precision_models.append(precision)
    sensitivity_recall_models.append(sensitivity_recall)
    specificity_models.append(specificity)
    f1_score_models.append(f1_score)
    mcFadden_r2_models.append(mcFadden_r2)
    r2_models.append(r2)

  
    ###########################################################################################
    ## visulization
    ###########################################################################################
    fig, ax = plt.subplots(figsize=(6, 6))

    heatmap_cmap_color = mcolors.LinearSegmentedColormap.from_list("warm_red", ["#fff5e6", "#ff5733"])
    sns.heatmap(
        conf_matrix, annot=True, fmt="d", cmap=heatmap_cmap_color,
        xticklabels=["arrowdown", "arrowup"], 
        yticklabels=["arrowdown", "arrowup"], 
        ax=ax, 
        cbar=False
    )

    ax.set_xlabel("prediction", fontsize=14, fontweight='bold')
    ax.set_ylabel("true label", fontsize=14, fontweight='bold')
    ax.set_title("confusion matrix", fontsize=16, fontweight='bold')
    ax.tick_params(axis='both', labelsize=14)


#############################################
    # saving figures
    plt.tight_layout(rect=[0, 0, 1, 0.9]) 
    fig.suptitle(f'participant {idx}', fontsize=18, fontweight='bold', y=0.95)

    filename = os.path.join(output_dir_plots, f"plot_{participants[idx]}.pdf")
    plt.savefig(filename, format='pdf')
    plt.close(fig)

    print(f"saved: {filename}")




#############################################

    # saving model behavior
    q_values_reshaped = [q_values[i].tolist() for i in range(n_trials)]  # convert each (9,3,2) array into a list format

    # print("Shape of predicted_choices:", np.shape(predicted_choices))
    # print("Shape of choices:", np.shape(choices))
    # print("Shape of total_reward:", np.shape(total_reward))
    # print("Shape of q_values_reshaped:", np.shape(q_values_reshaped))

    df_model_behavior = pd.DataFrame({
        "model_choices": predicted_choices,
        "participant_choices": choices,
        "model_total_reward": total_reward,
        "participant_total_reward": df_all["totalReward"],
        "q_val": q_values_reshaped  
    })



    file_path = os.path.join(output_dir_model_behavior, f"model_behavior_{participants[idx]}.csv")
    df_model_behavior.to_csv(file_path, index=False)



saved: 13_RL_agent_TDlearn_output_risk_dualQ\plots\plot_02_01_2025_13_21_03.pdf
saved: 13_RL_agent_TDlearn_output_risk_dualQ\plots\plot_03_02_2025_15_29_50.pdf
saved: 13_RL_agent_TDlearn_output_risk_dualQ\plots\plot_03_04_2025_13_57_44.pdf
saved: 13_RL_agent_TDlearn_output_risk_dualQ\plots\plot_07_12_2024_13_02_50.pdf
saved: 13_RL_agent_TDlearn_output_risk_dualQ\plots\plot_14_03_2025_16_05_47.pdf
saved: 13_RL_agent_TDlearn_output_risk_dualQ\plots\plot_20_05_2025_13_31_58.pdf
saved: 13_RL_agent_TDlearn_output_risk_dualQ\plots\plot_28_02_2025_12_02_47.pdf


# now saving the model evaluation values

In [11]:
df_models_evaluation = pd.DataFrame({
    "participants": participants,
    "best_alpha_r": best_alpha_r_models,
    "best_alpha_s": best_alpha_s_models,
    "best_beta": best_beta_models,
    "best_eta": best_eta_models,
    "BIC": BIC_models,
    "AIC": AIC_models,
    "accuracy": accuracy_models,
    "precision": precision_models,
    "sensitivity_recall": sensitivity_recall_models,
    "specificity": specificity_models,
    "f1_score": f1_score_models,
    "mcFadden_r2": mcFadden_r2_models,
    "r2": r2_models
})

file_path = os.path.join(output_dir_model_evaluation, "models_evaluation.csv")
df_models_evaluation.to_csv(file_path, index=False)