# TD learning

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

# reading data

In [3]:

folder_path = 'data_risk_added'
dataframes = []

for file_name in os.listdir(folder_path):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_excel(file_path)
        dataframes.append(df)


n_participant = len(dataframes)
print(f"There are {n_participant} participants.")


dataframes[0].head(5)


There are 31 participants.


Unnamed: 0,arrowRT,distribution,interTrialInterval,outcome,myCard,yourCard,spaceRT,totalReward,trialIndex,trialType,choice,block,timeoutRepeat,risk
0,570,uniform,831,lose,5,2,2209,9.5,0,response,arrowdown,1,0,0.5
1,1162,uniform,901,lose,4,3,5755,9.0,1,response,arrowdown,1,0,0.375
2,355,uniform,939,win,4,6,1209,9.5,2,response,arrowdown,1,0,0.375
3,1163,uniform,828,win,7,5,1997,10.0,3,response,arrowup,1,0,0.25
4,299,uniform,776,win,3,9,1324,10.5,4,response,arrowdown,1,0,0.25


# reward model function

In [None]:
def fit_td_model(Reward, yBinary, alpha_values=None):
    """
    a simple TD-learning model for each alpha in alpha_values,
    then does logistic regression (outcome ~ final value estimate).
    
    returns:
        alpha_values (np.ndarray)
        inverseTemps (np.ndarray)
        bestAlpha (float)
        bestValEstimate (np.ndarray)
        bestModelParams (statsmodels parameters)
    """

    if alpha_values is None:
        alpha_values = np.arange(0.01, 1.01, 0.01)  # 0.01 to 1.0 in steps of 0.01

    nTrials = len(Reward)
    inverseTemps = np.zeros(len(alpha_values))

    # 1) a TD update, then fit logistic regression for each alpha
    for a_idx, alpha in enumerate(alpha_values):
        
        V = Reward[0]
        val_estimates = np.zeros(nTrials)
        val_estimates[0] = V
        
        for t in range(1, nTrials):
            PE = Reward[t-1] - V
            V = V + alpha * PE
            val_estimates[t] = V
        
        # fit logistic regression: outcome ~ val_estimates
        
        X = sm.add_constant(val_estimates)
        model = sm.Logit(yBinary, X)
        try:
            result = model.fit(disp=0)
            slope = result.params[1]  # the second parameter is slope whkch I'm using for inverse temperature
            inverseTemps[a_idx] = abs(slope)
        except:
            inverseTemps[a_idx] = 0  # in case fit fails for some alpha and gives fktard resutls

    # 2) pick best alpha
    best_idx = np.argmax(inverseTemps)
    bestAlpha = alpha_values[best_idx]

    # 3) recompute final value estimates for bestAlpha
    best_val_est = np.zeros(nTrials)
    best_val_est_PE = np.zeros(nTrials)
    
    V = Reward[0]
    for t in range(1, nTrials):
        PE = Reward[t-1] - V
        V = V + bestAlpha * PE
        best_val_est[t] = V
        best_val_est_PE[t]= PE

    # 4) fit logistic regression for bestAlpha
    X_best = sm.add_constant(best_val_est)
    best_model = sm.Logit(yBinary, X_best)
    try:
        best_result = best_model.fit(disp=0)
        best_params = best_result.params
    except:
        best_params = [np.nan, np.nan] # in case of poopy results :|

    return alpha_values, inverseTemps, bestAlpha, best_val_est, best_val_est_PE, best_params

# run reward model

In [None]:


output_dir = "6_RL_agent_TDlearn_output/reward"
best_alpha_accros_participants = []

for idx, df in enumerate(dataframes):

    df = df[df['outcome'].str.lower() != 'na']
    
    reward_win = 0.5
    reward_lose = -0.5

    outcomes = df['outcome'].astype(str).values
    nTrials = len(df)
    reward = np.zeros(nTrials)
    yBinary = np.zeros(nTrials, dtype=int)

    for i, outcome in enumerate(outcomes):
        if outcome.lower() == 'win':
            reward[i] = reward_win
            yBinary[i] = 1
        elif outcome.lower() == 'lose':
            reward[i] = reward_lose
            yBinary[i] = 0


    alpha_values, inverseTemps, bestAlpha, best_val_est, best_val_est_PE, best_params = fit_td_model(reward, yBinary)
    
    best_alpha_accros_participants.append(bestAlpha)

    fig, axs = plt.subplots(1, 4, figsize=(14, 4), gridspec_kw={'width_ratios': [1, 1, 1, 0.5]})

    axs[0].plot(alpha_values, inverseTemps, '-o', ms=4)
    axs[0].set_xlabel('alpha')
    axs[0].set_ylabel('inverse temperature')
    axs[0].axvline(bestAlpha, color='r', linestyle='--', label=f'best alpha = {bestAlpha:.2f}')
    axs[0].legend()

    axs[1].plot(best_val_est, label='estimated value')
    axs[1].plot(reward, drawstyle='steps-mid', alpha=0.5, label='reward')  
    axs[1].set_xlabel('trial')
    axs[1].set_ylabel('value / reward')
    axs[1].set_title('best value estimate vs. actual reward')
    axs[1].set_ylim(-0.6, 0.6)
    axs[1].legend()
    
    axs[2].plot(best_val_est_PE)
    axs[2].set_xlabel('trial')
    axs[2].set_ylabel('PE')
    axs[2].set_ylim(-1, 1)

    axs[3].hist(best_val_est_PE, bins=20, orientation='horizontal', alpha=0.7)
    axs[3].set_xlabel('count')
    axs[3].set_ylabel('PE')
    axs[3].set_ylim(-1, 1)
    axs[3].set_title('PE histogram')

    plt.tight_layout()

    filename = os.path.join(output_dir, f"plot_{idx}.pdf")
    plt.savefig(filename, format='pdf')

    plt.close(fig)

    print(f"Saved: {filename}")


# Risk model

In [None]:
def fit_td_model_risk(Risk, yBinary, alpha_values=None):
    """
    A simple TD-learning model for each alpha in alpha_values,
    then does logistic regression (outcome ~ final value estimate).
    
    Returns:
        alpha_values (np.ndarray)
        inverseTemps (np.ndarray)
        bestAlpha (float)
        bestValEstimate (np.ndarray)
        bestModelParams (statsmodels parameters)
    """

    if alpha_values is None:
        alpha_values = np.arange(0.01, 1.01, 0.01)  # 0.01 to 1.0 in steps of 0.01

    nTrials = len(Risk)
    inverseTemps = np.zeros(len(alpha_values))

    # 1) a TD update, then fit logistic regression for each alpha
    for a_idx, alpha in enumerate(alpha_values):
        
        V = Risk[0]
        val_estimates = np.zeros(nTrials)
        val_estimates[0] = V
        
        for t in range(1, nTrials):
            PE = Risk[t-1] - V
            V = V + alpha * PE
            val_estimates[t] = V
        
        # fit logistic regression: outcome ~ val_estimates
        
        X = sm.add_constant(val_estimates)
        model = sm.Logit(yBinary, X)
        try:
            result = model.fit(disp=0)
            slope = result.params[1]  # the second parameter is slope used for inverse temperature
            inverseTemps[a_idx] = abs(slope)
        except:
            inverseTemps[a_idx] = 0  # in case fit fails for some alpha

    # 2) pick best alpha
    best_idx = np.argmax(inverseTemps)
    bestAlpha = alpha_values[best_idx]

    # 3) recompute final value estimates for bestAlpha
    best_val_est = np.zeros(nTrials)
    best_val_est_PE = np.zeros(nTrials)
    
    V = Risk[0]
    for t in range(1, nTrials):
        PE = Risk[t-1] - V
        V = V + bestAlpha * PE
        best_val_est[t] = V
        best_val_est_PE[t] = PE

    # 4) fit logistic regression for bestAlpha
    X_best = sm.add_constant(best_val_est)
    best_model = sm.Logit(yBinary, X_best)
    try:
        best_result = best_model.fit(disp=0)
        best_params = best_result.params
    except:
        best_params = [np.nan, np.nan] 

    return alpha_values, inverseTemps, bestAlpha, best_val_est, best_val_est_PE, best_params



In [None]:
output_dir = "6_RL_agent_TDlearn_output/risk"
best_alpha_accros_participants = []

for idx, df in enumerate(dataframes):
    df = df[df['outcome'].str.lower() != 'na']
    
    outcomes = df['outcome'].astype(str).values
    nTrials = len(df)
    risk = df['risk'].values  # Directly using the risk column
    yBinary = np.zeros(nTrials, dtype=int)

    for i, outcome in enumerate(outcomes):
        if outcome.lower() == 'win':
            yBinary[i] = 1
        elif outcome.lower() == 'lose':
            yBinary[i] = 0
            

    alpha_values, inverseTemps, bestAlpha, best_val_est, best_val_est_PE, best_params = fit_td_model(risk, yBinary)
    
    best_alpha_accros_participants.append(bestAlpha)

    fig, axs = plt.subplots(1, 4, figsize=(14, 4), gridspec_kw={'width_ratios': [1, 1, 1, 0.5]})

    axs[0].plot(alpha_values, inverseTemps, '-o', ms=4)
    axs[0].set_xlabel('alpha')
    axs[0].set_ylabel('inverse temperature')
    axs[0].axvline(bestAlpha, color='r', linestyle='--', label=f'best alpha = {bestAlpha:.2f}')
    axs[0].legend()

    axs[1].plot(best_val_est, label='estimated value')
    axs[1].plot(risk, drawstyle='steps-mid', alpha=0.5, label='risk')  
    axs[1].set_xlabel('trial')
    axs[1].set_ylabel('value / risk')
    axs[1].set_title('best value estimate vs. actual risk')
    axs[1].set_ylim(0, 0.5)
    axs[1].legend()
    
    axs[2].plot(best_val_est_PE)
    axs[2].set_xlabel('trial')
    axs[2].set_ylabel('PE')
    # axs[2].set_ylim(-1, 1)

    axs[3].hist(best_val_est_PE, bins=20, orientation='horizontal', alpha=0.7)
    axs[3].set_xlabel('count')
    axs[3].set_ylabel('PE')
    # axs[3].set_ylim(-1, 1)
    axs[3].set_title('PE histogram')

    plt.tight_layout()

    filename = os.path.join(output_dir, f"plot_{idx}.pdf")
    plt.savefig(filename, format='pdf')
    plt.close(fig)

    print(f"Saved: {filename}")
