# win stay lose shift

In [13]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import seaborn as sns
from sklearn.metrics import confusion_matrix
np.random.seed(42)
from joblib import Parallel, delayed
import matplotlib.tri as tri
import matplotlib.colors as mcolors
from scipy.interpolate import griddata
from scipy.interpolate import RBFInterpolator
import matplotlib.ticker as mticker
import itertools
from sklearn.metrics import r2_score


# important directories

In [14]:
folder_path = 'data_risk_added'

output_dir_model_evaluation = "11_RL_agent_TDlearn_output_wsls"
os.makedirs(output_dir_model_evaluation, exist_ok=True)

output_dir_plots = os.path.join(output_dir_model_evaluation, "plots")
os.makedirs(output_dir_plots, exist_ok=True)

output_dir_model_behavior = os.path.join(output_dir_model_evaluation, "model_behavior")
os.makedirs(output_dir_model_behavior, exist_ok=True)

In [15]:
dataframes = [pd.read_excel(os.path.join(folder_path, file)) for file in os.listdir(folder_path) if file.endswith('.xlsx')]

n_participant = len(dataframes)
print(f"there are {n_participant} participants.")
dataframes[0]


there are 2 participants.


Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,risk
0,2609,uniform,789,lose,4,2,1335,9.5,0,response,arrowdown,1,0,0.375
1,597,uniform,853,win,9,4,1407,10,1,response,arrowup,1,0,0.000
2,188,uniform,904,win,4,7,1504,10.5,2,response,arrowdown,1,0,0.375
3,423,uniform,916,win,2,4,1434,11,3,response,arrowdown,1,0,0.125
4,549,uniform,806,win,5,7,1287,11.5,4,response,arrowdown,1,0,0.500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,761,high,913,win,7,8,1382,80,125,response,arrowdown,4,0,0.447
273,596,low,921,win,4,3,1318,80.5,83,response,arrowup,4,0,0.385
274,414,low,950,win,2,7,1335,81,77,response,arrowdown,4,0,0.243
275,1371,uniform,842,win,6,4,1615,81.5,35,response,arrowup,4,0,0.375


### I want to make participant file name for the model_evaluation.csv and that is I'm gonna take each data name task_data_07_11_2024_17_23_43.xlsx and extract "07_11_2024_17_23_43" and this should be the participant name in the dataset. 

In [16]:
participants = [os.path.splitext(file)[0].replace("task_data_", "")
    for file in os.listdir(folder_path) if file.endswith('.xlsx')]

### alpha = p_stay_after_win
### beta = p_shift_after_lose

In [17]:
num_of_samples = 100
# num_of_samples = 1000
alpha_min = 0
alpha_max = 1
beta_min = 0
beta_max  = 10
alpha_samples = np.random.uniform(alpha_min, alpha_max + np.finfo(float).eps, num_of_samples)
beta_samples = np.random.uniform(beta_min, beta_max + np.finfo(float).eps, num_of_samples)

actions = { "arrowdown": 0, "arrowup": 1}
distributions_map = { "uniform": 0, "low": 1,  "high": 2}

## p in the following code is:
##### The probability that the model assigns to the actual choice the participant made on a given trial, based on the model's current parameters and the outcome of the previous trial.


In [18]:
actions = { "arrowdown": 0, "arrowup": 1}
distributions_map = { "uniform": 0, "low": 1,  "high": 2}

In [19]:

def get_wsls_probability(prev_choice, current_choice, prev_reward, p_stay_win, p_shift_loss):
    if prev_reward >= 0:  # WIN
        return p_stay_win if current_choice == prev_choice else 1 - p_stay_win
    else:  # LOSE
        return p_shift_loss if current_choice != prev_choice else 1 - p_shift_loss

def simulate_wsls_choices(rewards, p_stay_win, p_shift_loss, actions=(0, 1), seed=None):
    if seed is not None:
        np.random.seed(seed)

    n_trials = len(rewards)
    predicted_choices = []
    prev_choice = np.random.choice(actions)
    predicted_choices.append(prev_choice)

    for t in range(1, n_trials + 1):
        prev_reward = rewards[t - 1]
        other_choice = 1 - prev_choice

        if prev_reward >= 0:
            probs = {
                prev_choice: p_stay_win,
                other_choice: 1 - p_stay_win
            }
        else:
            probs = {
                prev_choice: 1 - p_shift_loss,
                other_choice: p_shift_loss
            }

        current_choice = np.random.choice([0, 1], p=[probs[0], probs[1]])
        predicted_choices.append(current_choice)
        prev_choice = current_choice

    return predicted_choices[:-1]



If the previous outcome was a loss:

The model assigns 1 - p_shift_loss to staying (because it's less likely)

And assigns p_shift_loss to switching

probs = {
    'A': 0.8,
    'B': 0.2
}


What’s it doing?
The key prev_choice (e.g., 'A') gets assigned p_stay_win → probability of staying

The other action (e.g., 'B') gets 1 - p_stay_win → probability of switching

In [21]:
BIC_models = []
AIC_models = []
best_alpha_models = []
best_beta_models = []
accuracy_models = []
precision_models = []
sensitivity_recall_models = []
specificity_models = []
f1_score_models = []
mcFadden_r2_models = []
r2_models = []


# participants loop
for idx, df_all in enumerate(dataframes):
    df_all = df_all[df_all['outcome'].str.lower() != 'na'].reset_index(drop=True)
    rewards = df_all['outcome'].apply(lambda x: 0.5 if x.lower() == 'win' else -0.5).values
    true_choices = df_all['choice'].map(actions).values
    best_log_likelihood = -np.inf

    results = []
    for alpha in alpha_samples:
        for beta in beta_samples:
            pred_choices = simulate_wsls_choices(rewards, alpha, beta)
            log_likelihood = 0
            prev_choice = pred_choices[0]
            for t in range(1, len(rewards)):
                prob = get_wsls_probability(prev_choice, true_choices[t], rewards[t - 1], alpha, beta)
                prob = np.clip(prob, 1e-6, 1 - 1e-6)
                log_likelihood += np.log(prob)
                prev_choice = true_choices[t]

            results.append((alpha, beta, log_likelihood))

            if log_likelihood > best_log_likelihood:
                best_alpha, best_beta = alpha, beta
                best_log_likelihood = log_likelihood
                best_predicted_choices = pred_choices


    conf_matrix = confusion_matrix(true_choices, best_predicted_choices)
    TN, FP, FN, TP = conf_matrix.ravel()
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP) if (TP + FP) else 0
    recall = TP / (TP + FN) if (TP + FN) else 0
    specificity = TN / (TN + FP) if (TN + FP) else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0
    n_trials = len(df_all)
    k = 2
    BIC = k * np.log(n_trials) - 2 * best_log_likelihood
    AIC = 2 * k - 2 * best_log_likelihood
    p_null = np.mean(true_choices)
    log_likelihood_null = np.sum(true_choices * np.log(p_null) + (1 - true_choices) * np.log(1 - p_null))
    mcFadden_r2 = 1 - (best_log_likelihood / log_likelihood_null)
    r2 = r2_score(true_choices, best_predicted_choices)

    best_alpha_models.append(best_alpha)
    best_beta_models.append(best_beta)
    BIC_models.append(BIC)
    AIC_models.append(AIC)
    accuracy_models.append(accuracy)
    precision_models.append(precision)
    sensitivity_recall_models.append(recall)
    specificity_models.append(specificity)
    f1_score_models.append(f1)
    mcFadden_r2_models.append(mcFadden_r2)
    r2_models.append(r2)

    # Total reward
    total_reward = []
    for i in range(len(best_predicted_choices)):
        last_reward = total_reward[-1] if total_reward else 10
        if ((df_all.loc[i, 'myCard'] > df_all.loc[i, 'yourCard'] and best_predicted_choices[i] == 1) or
            (df_all.loc[i, 'myCard'] < df_all.loc[i, 'yourCard'] and best_predicted_choices[i] == 0)):
            total_reward.append(last_reward + 0.5)
        else:
            total_reward.append(last_reward - 0.5)

    # Save behavior
    df_model_behavior = pd.DataFrame({
        "model_choices": best_predicted_choices,
        "participant_choices": true_choices,
        "model_total_reward": total_reward,
        "participant_total_reward": df_all["totalReward"]
    })
    behavior_path = os.path.join(output_dir_model_behavior, f"model_behavior_{participants[idx]}.csv")
    df_model_behavior.to_csv(behavior_path, index=False)

    # Plot
    fig, axes = plt.subplots(1, 1, figsize=(5, 4))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Oranges", xticklabels=["down", "up"], yticklabels=["down", "up"], ax=axes)
    axes.set_xlabel("prediction")
    axes.set_ylabel("true")
    axes.set_title(f"confusion matrix\nα={best_alpha:.2f}, β={best_beta:.2f}")
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir_plots, f"plot_{participants[idx]}.pdf"))
    plt.close()


ValueError: probabilities are not non-negative

In [None]:
summary_df = pd.DataFrame({
    "participants": participants,
    "best_alpha": best_alpha_models,
    "best_beta": best_beta_models,
    "BIC": BIC_models,
    "AIC": AIC_models,
    "accuracy": accuracy_models,
    "precision": precision_models,
    "sensitivity_recall": sensitivity_recall_models,
    "specificity": specificity_models,
    "f1_score": f1_score_models,
    "mcFadden_r2": mcFadden_r2_models,
    "r2": r2_models
})

summary_path = os.path.join(output_dir_model_evaluation, "models_evaluation.csv")
summary_df.to_csv(summary_path, index=False)


## debug

In [None]:
# def get_wsls_probability(prev_choice, current_choice, prev_reward, p_stay_win, p_shift_lose):
#     if prev_reward >= 0:  # win
#         if current_choice == prev_choice:
#             P = p_stay_win
#         else:
#             P = 1 - p_stay_win
#     else:  # lose
#         if current_choice == prev_choice:
#             P = 1 - p_shift_lose
#         else:
#             P = p_shift_lose
#     return P



# log_likelihood += np.log(P + epsilon)






# def simulate_wsls_choices(rewards, p_stay_win, p_shift_loss, actions=(0, 1), seed=None):
#     if seed is not None:
#         np.random.seed(seed)

#     n_trials = len(rewards)
#     predicted_choices = []

#     # Random first choice (0 or 1)
#     prev_choice = np.random.choice(actions)
#     predicted_choices.append(prev_choice)

#     for t in range(1, n_trials + 1):
#         prev_reward = rewards[t - 1]

#         # Determine the other action (0 -> 1, 1 -> 0)
#         other_choice = 1 - prev_choice

#         # Assign probabilities to each action
#         if prev_reward >= 0:  # WIN
#             probs = {
#                 prev_choice: p_stay_win,
#                 other_choice: 1 - p_stay_win
#             }
#         else:  # LOSS
#             probs = {
#                 prev_choice: 1 - p_shift_loss,
#                 other_choice: p_shift_loss
#             }

#         # Choose next action based on probabilities
#         current_choice = np.random.choice([0, 1], p=[probs[0], probs[1]])
#         predicted_choices.append(current_choice)

#         prev_choice = current_choice  # update for next trial

#     return predicted_choices[:-1]  # we predicted n+1 choices for n trials
