# TD learning

this model is trying to find out if the Value estimates can predict the outcome! I guess it's better to find out the action instead of the outcome!! so I'm trying to change it in the next code which is version 8.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

# reading data

In [2]:

folder_path = 'data_risk_added'
dataframes = []

for file_name in os.listdir(folder_path):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_excel(file_path)
        dataframes.append(df)


n_participant = len(dataframes)
print(f"There are {n_participant} participants.")


dataframes[0].head(5)


There are 31 participants.


Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,risk
0,570,uniform,831,lose,5,2,2209,9.5,0,response,arrowdown,1,0,0.5
1,1162,uniform,901,lose,4,3,5755,9.0,1,response,arrowdown,1,0,0.375
2,355,uniform,939,win,4,6,1209,9.5,2,response,arrowdown,1,0,0.375
3,1163,uniform,828,win,7,5,1997,10.0,3,response,arrowup,1,0,0.25
4,299,uniform,776,win,3,9,1324,10.5,4,response,arrowdown,1,0,0.25


In [3]:
def fit_td_model_general(input_data, yBinary, alpha_values=None):
    """
    Generalized TD-learning model for any input data (Reward or Risk).
    Runs TD updates for each alpha in alpha_values, then fits logistic regression.

    Returns:
        alpha_values (np.ndarray): Tested alpha values
        inverseTemps (np.ndarray): Inverse temperature for each alpha
        bestAlpha (float): Best alpha value
        bestValEstimate (np.ndarray): Value estimates using bestAlpha
        bestValEstimate_PE (np.ndarray): Prediction errors for bestAlpha
        bestModelParams (statsmodels parameters): Logistic regression parameters
    """

    if alpha_values is None:
        alpha_values = np.arange(0.01, 1.01, 0.01)  # Alpha values from 0.01 to 1.0

    nTrials = len(input_data)
    inverseTemps = np.zeros(len(alpha_values))

    # Loop through each alpha and perform TD updates
    for a_idx, alpha in enumerate(alpha_values):
        V = input_data[0]
        val_estimates = np.zeros(nTrials)
        val_estimates[0] = V

        for t in range(1, nTrials):
            PE = input_data[t - 1] - V
            V = V + alpha * PE
            val_estimates[t] = V

        # Fit logistic regression: outcome ~ val_estimates
        X = sm.add_constant(val_estimates)
        model = sm.Logit(yBinary, X)
        try:
            result = model.fit(disp=0)
            slope = result.params[1]  # Slope used as inverse temperature
            inverseTemps[a_idx] = abs(slope)
        except:
            inverseTemps[a_idx] = 0  # Handle fit failures

    # Identify best alpha
    best_idx = np.argmax(inverseTemps)
    bestAlpha = alpha_values[best_idx]

    # Compute final value estimates using bestAlpha
    best_val_est = np.zeros(nTrials)
    best_val_est_PE = np.zeros(nTrials)

    V = input_data[0]
    for t in range(1, nTrials):
        PE = input_data[t - 1] - V
        V = V + bestAlpha * PE
        best_val_est[t] = V
        best_val_est_PE[t] = PE

    # Fit logistic regression again with bestAlpha
    X_best = sm.add_constant(best_val_est)
    best_model = sm.Logit(yBinary, X_best)
    try:
        best_result = best_model.fit(disp=0)
        best_params = best_result.params
    except:
        best_params = [np.nan, np.nan]  # Handle regression failures

    return alpha_values, inverseTemps, bestAlpha, best_val_est, best_val_est_PE, best_params


In [4]:

# Define the output directory
output_dir = "7_RL_agent_TDlearn_output"
os.makedirs(output_dir, exist_ok=True)

best_alpha_across_participants_reward = []
best_alpha_across_participants_risk = []

# Process each participant's data
for idx, df in enumerate(dataframes):
    df = df[df['outcome'].str.lower() != 'na']  # Remove missing values

    reward_win = 0.5
    reward_lose = -0.5

    outcomes = df['outcome'].astype(str).values
    nTrials = len(df)

    # Reward Model Inputs
    reward = np.zeros(nTrials)
    yBinary = np.zeros(nTrials, dtype=int)

    for i, outcome in enumerate(outcomes):
        if outcome.lower() == 'win':
            reward[i] = reward_win
            yBinary[i] = 1
        elif outcome.lower() == 'lose':
            reward[i] = reward_lose
            yBinary[i] = 0

    # Risk Model Inputs
    risk = df['risk'].values  # Risk values from the dataset

    # Fit TD-learning models for Reward and Risk
    alpha_values, inverseTemps_reward, bestAlpha_reward, best_val_est_reward, best_val_est_PE_reward, _ = fit_td_model_general(reward, yBinary)
    alpha_values, inverseTemps_risk, bestAlpha_risk, best_val_est_risk, best_val_est_PE_risk, _ = fit_td_model_general(risk, yBinary)

    best_alpha_across_participants_reward.append(bestAlpha_reward)
    best_alpha_across_participants_risk.append(bestAlpha_risk)

    # Create the visualization
    fig, axs = plt.subplots(2, 4, figsize=(14, 8), gridspec_kw={'width_ratios': [1, 1, 1, 0.5]})

    # Add row titles (lowercase)
    fig.text(0.5, 0.85, "reward", fontsize=18, fontweight='bold', ha='center')
    fig.text(0.5, 0.42, "risk", fontsize=18, fontweight='bold', ha='center')


    # Adjust spacing to move the second row lower
    plt.subplots_adjust(hspace= 2)  # Increase vertical space between rows

    # Reward Model Plots (Top Row)
    axs[0, 0].plot(alpha_values, inverseTemps_reward, '-o', ms=4)
    axs[0, 0].set_xlabel('alpha')
    axs[0, 0].set_ylabel('inverse temperature')
    axs[0, 0].set_title('inverse temperature vs alpha')  # lowercase title
    axs[0, 0].axvline(bestAlpha_reward, color='r', linestyle='--', label=f'best alpha = {bestAlpha_reward:.2f}')
    axs[0, 0].legend()

    axs[0, 1].plot(best_val_est_reward, label='estimated value')
    axs[0, 1].plot(reward, drawstyle='steps-mid', alpha=0.5, label='reward')
    axs[0, 1].set_xlabel('trial')
    axs[0, 1].set_ylabel('value / reward')
    axs[0, 1].set_title('best value estimate vs actual reward')  # lowercase title
    axs[0, 1].legend()

    axs[0, 2].plot(best_val_est_PE_reward)
    axs[0, 2].set_xlabel('trial')
    axs[0, 2].set_ylabel('PE')
    axs[0, 2].set_title('prediction error over trials')  # lowercase title

    axs[0, 3].hist(best_val_est_PE_reward, bins=20, orientation='horizontal', alpha=0.7)
    axs[0, 3].set_xlabel('count')
    axs[0, 3].set_ylabel('PE')
    axs[0, 3].set_title('prediction error hist.')  # lowercase title

    # Risk Model Plots (Bottom Row)
    axs[1, 0].plot(alpha_values, inverseTemps_risk, '-o', ms=4)
    axs[1, 0].set_xlabel('alpha')
    axs[1, 0].set_ylabel('inverse temperature')
    axs[1, 0].set_title('inverse temperature vs alpha')  # lowercase title
    axs[1, 0].axvline(bestAlpha_risk, color='r', linestyle='--', label=f'best alpha = {bestAlpha_risk:.2f}')
    axs[1, 0].legend()

    axs[1, 1].plot(best_val_est_risk, label='estimated value')
    axs[1, 1].plot(risk, drawstyle='steps-mid', alpha=0.5, label='risk')
    axs[1, 1].set_xlabel('trial')
    axs[1, 1].set_ylabel('value / risk')
    axs[1, 1].set_title('best value estimate vs actual risk')  # lowercase title
    axs[1, 1].legend()

    axs[1, 2].plot(best_val_est_PE_risk)
    axs[1, 2].set_xlabel('trial')
    axs[1, 2].set_ylabel('PE')
    axs[1, 2].set_title('prediction error over trials')  # lowercase title

    axs[1, 3].hist(best_val_est_PE_risk, bins=20, orientation='horizontal', alpha=0.7)
    axs[1, 3].set_xlabel('count')
    axs[1, 3].set_ylabel('PE')
    axs[1, 3].set_title('prediction error hist.')  # lowercase title


    # Title
    fig.suptitle(f'participant {idx}', fontsize=16, fontweight='bold')

    plt.tight_layout(rect=[0, 0, 1, 0.9])  # Adjust layout to fit row titles

    filename = os.path.join(output_dir, f"plot_{idx}.pdf")
    plt.savefig(filename, format='pdf')
    plt.close(fig)

    print(f"saved: {filename}")


saved: 7_RL_agent_TDlearn_output/plot_0.pdf
saved: 7_RL_agent_TDlearn_output/plot_1.pdf
saved: 7_RL_agent_TDlearn_output/plot_2.pdf
saved: 7_RL_agent_TDlearn_output/plot_3.pdf
saved: 7_RL_agent_TDlearn_output/plot_4.pdf
saved: 7_RL_agent_TDlearn_output/plot_5.pdf
saved: 7_RL_agent_TDlearn_output/plot_6.pdf
saved: 7_RL_agent_TDlearn_output/plot_7.pdf
saved: 7_RL_agent_TDlearn_output/plot_8.pdf
saved: 7_RL_agent_TDlearn_output/plot_9.pdf
saved: 7_RL_agent_TDlearn_output/plot_10.pdf
saved: 7_RL_agent_TDlearn_output/plot_11.pdf
saved: 7_RL_agent_TDlearn_output/plot_12.pdf
saved: 7_RL_agent_TDlearn_output/plot_13.pdf
saved: 7_RL_agent_TDlearn_output/plot_14.pdf
saved: 7_RL_agent_TDlearn_output/plot_15.pdf
saved: 7_RL_agent_TDlearn_output/plot_16.pdf
saved: 7_RL_agent_TDlearn_output/plot_17.pdf
saved: 7_RL_agent_TDlearn_output/plot_18.pdf
saved: 7_RL_agent_TDlearn_output/plot_19.pdf
saved: 7_RL_agent_TDlearn_output/plot_20.pdf
saved: 7_RL_agent_TDlearn_output/plot_21.pdf
saved: 7_RL_agent_TD