# win stay lose shift

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import seaborn as sns
from sklearn.metrics import confusion_matrix
np.random.seed(42)
from joblib import Parallel, delayed
import matplotlib.tri as tri
import matplotlib.colors as mcolors
from scipy.interpolate import griddata
from scipy.interpolate import RBFInterpolator
import matplotlib.ticker as mticker
import itertools
from sklearn.metrics import r2_score


# important directories

In [2]:
folder_path = 'data_risk_added_epileptic'

output_dir_model_evaluation = "13_RL_agent_TDlearn_output_wsls"
os.makedirs(output_dir_model_evaluation, exist_ok=True)

output_dir_plots = os.path.join(output_dir_model_evaluation, "plots")
os.makedirs(output_dir_plots, exist_ok=True)

output_dir_model_behavior = os.path.join(output_dir_model_evaluation, "model_behavior")
os.makedirs(output_dir_model_behavior, exist_ok=True)

In [3]:

dataframes = [pd.read_csv(os.path.join(folder_path, file)) for file in os.listdir(folder_path) if file.endswith('.csv')]

n_participant = len(dataframes)
print(f"there are {n_participant} participants.")
dataframes[0]


there are 7 participants.


Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,is_within_IQR,risk
0,2390,uniform,756,win,2,7,3186,10.5,0,response,arrowdown,1,0,0,0.125
1,1262,uniform,874,win,9,4,1040,11,1,response,arrowup,1,0,0,0.000
2,1547,uniform,791,win,5,6,204,11.5,2,response,arrowdown,1,0,1,0.500
3,1627,uniform,828,win,9,1,872,12,3,response,arrowup,1,0,0,0.000
4,459,uniform,894,win,6,4,790,12.5,4,response,arrowup,1,0,0,0.375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,514,high,847,win,8,7,429,85,134,response,arrowup,4,0,1,0.243
268,179,high,902,win,4,8,211,85.5,115,response,arrowdown,4,0,1,0.146
269,419,low,815,lose,8,9,353,85,81,response,arrowup,4,0,1,0.023
270,731,low,941,lose,3,1,212,84.5,59,response,arrowdown,4,0,1,0.447


### I want to make participant file name for the model_evaluation.csv and that is I'm gonna take each data name task_data_07_11_2024_17_23_43.xlsx and extract "07_11_2024_17_23_43" and this should be the participant name in the dataset. 

In [4]:
participants = [os.path.splitext(file)[0].replace("task_data_", "")
    for file in os.listdir(folder_path) if file.endswith('.csv')]

### alpha = p_stay_after_win
### beta = p_shift_after_lose

In [5]:
num_of_samples = 100
# num_of_samples = 1000
alpha_min = 0
alpha_max = 1
beta_min = 0
beta_max  = 1
alpha_samples = np.random.uniform(alpha_min, alpha_max + np.finfo(float).eps, num_of_samples)
beta_samples = np.random.uniform(beta_min, beta_max + np.finfo(float).eps, num_of_samples)

actions = { "arrowdown": 0, "arrowup": 1}
distributions_map = { "uniform": 0, "low": 1,  "high": 2}

In [6]:
# this is the probability that the model assigns to the actual choice the participant made on a given trial, based on the model's current parameters and the outcome of the previous trial.

def get_wsls_probability(prev_choice, current_choice, prev_reward, p_stay_win, p_shift_loss):
    if prev_reward > 0:  # win
        return p_stay_win if current_choice == prev_choice else 1 - p_stay_win
    else:  # lose
        return p_shift_loss if current_choice != prev_choice else 1 - p_shift_loss



def simulate_wsls_choices(prev_choice, rewards, p_stay_win, p_shift_loss):
    n_trials = len(rewards)
    choices = []
    for t in range(n_trials):             
        if t > 0:                          
            prev_reward = rewards[t-1]
            stay_prob = p_stay_win if prev_reward > 0 else 1 - p_shift_loss
            switch_prob = 1 - stay_prob
            probs = [stay_prob, switch_prob] if prev_choice == 0 else [switch_prob, stay_prob]
            prev_choice = np.random.choice([0,1], p=probs)
        choices.append(prev_choice)
    return np.array(choices)

In [7]:
BIC_models = []
AIC_models = []
best_alpha_models = []
best_beta_models = []
accuracy_models = []
precision_models = []
sensitivity_recall_models = []
specificity_models = []
f1_score_models = []
mcFadden_r2_models = []
r2_models = []



# participants loop
for idx, df_all in enumerate(dataframes):
    df_all = df_all[df_all['outcome'].str.lower() != 'na'].reset_index(drop=True)
    rewards = df_all['outcome'].apply(lambda x: 1 if x.lower() == 'win' else 0).values
    true_choices = df_all['choice'].map(actions).values
    trials_myCard = df_all["myCard"]
    trials_myCard_unique = df_all["myCard"].unique()
    trials_distribution = df_all['distribution'].map(distributions_map).values




    best_alpha, best_beta = None, None
    best_log_likelihood = -np.inf

    results = []
    for alpha in alpha_samples:
        for beta in beta_samples:
            log_likelihood = 0

            prev_choice = np.full((len(distributions_map), len(trials_myCard_unique)), np.nan)

            prev_choice[trials_distribution[0], trials_myCard[0]-1] = true_choices[0]

            prev_reward = np.full((len(distributions_map), len(trials_myCard_unique)), np.nan)


            for t in range(1, len(rewards)): # trial by trial

                prev_reward[trials_distribution[t - 1], trials_myCard[t - 1]-1] = rewards[t - 1]

        
                prob = get_wsls_probability(prev_choice[trials_distribution[t - 1],trials_myCard[t - 1]-1], true_choices[t], prev_reward[trials_distribution[t - 1], trials_myCard[t - 1]-1], alpha, beta)
                prob = np.clip(prob, 1e-6, 1 - 1e-6) # avoid log(0)
                log_likelihood += np.log(prob)
                prev_choice[trials_distribution[t - 1], trials_myCard[t]-1] = true_choices[t]
            
            
            results.append((alpha, beta, log_likelihood))
                

            if log_likelihood > best_log_likelihood:
                best_alpha, best_beta = alpha, beta
                best_log_likelihood = log_likelihood



    best_predicted_choices = simulate_wsls_choices(true_choices[0], rewards, best_alpha, best_beta)

    conf_matrix = confusion_matrix(true_choices, best_predicted_choices)
    TN, FP, FN, TP = conf_matrix.ravel()
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP) if (TP + FP) else 0
    recall = TP / (TP + FN) if (TP + FN) else 0
    specificity = TN / (TN + FP) if (TN + FP) else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0
    n_trials = len(df_all)
    k = 2
    BIC = k * np.log(n_trials) - 2 * best_log_likelihood
    AIC = 2 * k - 2 * best_log_likelihood
    p_null = np.mean(true_choices)
    p_null  = np.clip(p_null, 1e-6, 1 - 1e-6)
    log_likelihood_null = np.sum(true_choices * np.log(p_null) + (1 - true_choices) * np.log(1 - p_null))
    mcFadden_r2 = 1 - (best_log_likelihood / log_likelihood_null)
    r2 = r2_score(true_choices, best_predicted_choices)

    best_alpha_models.append(best_alpha)
    best_beta_models.append(best_beta)
    BIC_models.append(BIC)
    AIC_models.append(AIC)
    accuracy_models.append(accuracy)
    precision_models.append(precision)
    sensitivity_recall_models.append(recall)
    specificity_models.append(specificity)
    f1_score_models.append(f1)
    mcFadden_r2_models.append(mcFadden_r2)
    r2_models.append(r2)



    total_reward = []
    for i in range(len(best_predicted_choices)):
        last_reward = total_reward[-1] if total_reward else 10
        if ((df_all.loc[i, 'myCard'] > df_all.loc[i, 'yourCard'] and best_predicted_choices[i] == 1) or
            (df_all.loc[i, 'myCard'] < df_all.loc[i, 'yourCard'] and best_predicted_choices[i] == 0)):
            total_reward.append(last_reward + 0.5)
        else:
            total_reward.append(last_reward - 0.5)


###########################################################################################
## visulization
###########################################################################################

    fig, axes = plt.subplots(1, 3, figsize=(19, 6))
    plots_smooth_level = 20

#############################################
    # Density Plot (KDE)
    sns.kdeplot(
        x= alpha_samples, 
        y= beta_samples, 
        fill=True, 
        cmap="viridis", 
        ax=axes[0], 
        bw_adjust=1.8,  # Increase for smoother density
        levels=plots_smooth_level,  # More contour levels
        thresh=0  # Ensure density is plotted across all values
    )
    mappable = axes[0].collections[0]
    cbar = fig.colorbar(mappable, ax=axes[0], label="density", fraction=0.046, pad=0.04)  
    cbar.ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.2f'))  # 2 decimal places
    cbar.ax.set_ylabel("density", fontsize=12, fontweight='bold')
    cbar.ax.tick_params(labelsize=12)

    axes[0].set_xlim(alpha_min, alpha_max)
    axes[0].set_ylim(beta_min, beta_max)
    axes[0].set_xlabel("p_stay_win (α)", fontsize=14, fontweight='bold')
    axes[0].set_ylabel("p_shift_lose (β)", fontsize=14, fontweight='bold')
    axes[0].set_title("density of α and β joint probability", fontsize=16, fontweight='bold')
    axes[0].tick_params(axis='both', labelsize=14)

#############################################



#############################################
    # Log Likelihood 

    alpha_step = 0.1
    beta_step = 0.1

    alpha_bins = np.arange(alpha_min, alpha_max+ alpha_step, alpha_step)
    beta_bins = np.arange(beta_min, beta_max + beta_step, beta_step)     


    results_df = pd.DataFrame(results, columns=["alpha", "beta", "log_likelihood"])
    results_df["alpha_binned"] = pd.cut(results_df["alpha"], bins=alpha_bins, labels=alpha_bins[:-1], include_lowest=True)
    results_df["beta_binned"] = pd.cut(results_df["beta"], bins=beta_bins, labels=beta_bins[:-1], include_lowest=True)


    heatmap_data = results_df.groupby(
    ["beta_binned", "alpha_binned"], observed=False)["log_likelihood"].mean().unstack()



    heatmap_data.index = heatmap_data.index.astype(float)
    heatmap_data.columns = heatmap_data.columns.astype(float)

    sns.heatmap(
        heatmap_data,
        cmap="Blues",
        cbar=True,
        ax=axes[1]
    )
    axes[1].set_xticks(np.arange(len(heatmap_data.columns)))
    axes[1].set_xticklabels([f"{x:.1f}" for x in heatmap_data.columns], rotation=45)
    axes[1].set_yticks(np.arange(len(heatmap_data.index)))
    axes[1].set_yticklabels([f"{y:.1f}" for y in heatmap_data.index])

    axes[1].set_xlabel("p_stay_win (α)", fontsize=14, fontweight='bold')
    axes[1].set_ylabel("p_shift_lose (β)", fontsize=14, fontweight='bold')
    axes[1].set_title("Log Likelihood for Combinations of α and β", fontsize=16, fontweight='bold')
    axes[1].tick_params(axis='both', labelsize=14)
    axes[1].invert_yaxis()
#############################################





#############################################
    # Confusion Matrix

    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Oranges", xticklabels=["down", "up"], yticklabels=["down", "up"], ax=axes[2])
    axes[2].set_xlabel("prediction")
    axes[2].set_ylabel("true")
    axes[2].set_title(f"confusion matrix\nα={best_alpha:.2f}, β={best_beta:.2f}")
    plt.tight_layout()
    filename = os.path.join(output_dir_plots, f"plot_{participants[idx]}.pdf")
    plt.savefig(filename, format='pdf')
    plt.close()



###########################################################################################
    # saving model behavior
###########################################################################################

    df_model_behavior = pd.DataFrame({
        "model_choices": best_predicted_choices,
        "participant_choices": true_choices,
        "model_total_reward": total_reward,
        "participant_total_reward": df_all["totalReward"]
    })
    behavior_path = os.path.join(output_dir_model_behavior, f"model_behavior_{participants[idx]}.csv")
    df_model_behavior.to_csv(behavior_path, index=False)

    print(f"saved: {filename}")



saved: 13_RL_agent_TDlearn_output_wsls\plots\plot_02_01_2025_13_21_03.pdf
saved: 13_RL_agent_TDlearn_output_wsls\plots\plot_03_02_2025_15_29_50.pdf
saved: 13_RL_agent_TDlearn_output_wsls\plots\plot_03_04_2025_13_57_44.pdf
saved: 13_RL_agent_TDlearn_output_wsls\plots\plot_07_12_2024_13_02_50.pdf
saved: 13_RL_agent_TDlearn_output_wsls\plots\plot_14_03_2025_16_05_47.pdf
saved: 13_RL_agent_TDlearn_output_wsls\plots\plot_20_05_2025_13_31_58.pdf
saved: 13_RL_agent_TDlearn_output_wsls\plots\plot_28_02_2025_12_02_47.pdf


# now saving the model evaluation values

In [8]:
summary_df = pd.DataFrame({
    "participants": participants,
    "best_alpha": best_alpha_models,
    "best_beta": best_beta_models,
    "BIC": BIC_models,
    "AIC": AIC_models,
    "accuracy": accuracy_models,
    "precision": precision_models,
    "sensitivity_recall": sensitivity_recall_models,
    "specificity": specificity_models,
    "f1_score": f1_score_models,
    "mcFadden_r2": mcFadden_r2_models,
    "r2": r2_models
})

summary_path = os.path.join(output_dir_model_evaluation, "models_evaluation.csv")
summary_df.to_csv(summary_path, index=False)


## debug