In [2]:
import hddm
import pandas as pd
import pickle
import numpy as np
import os
import ssms.basic_simulators 
from ssms.basic_simulators import simulator as sim

In [22]:
import pandas as pd
import ssms.basic_simulators 
from ssms.basic_simulators import simulator as sim

def simulate_subject_data(subjectIndex, test, modelParamsDF, num_simulations=100):
    """
    Simulates data for a given subject.

    Parameters:
    subjectIndex (int): The index of the subject.
    test (pd.DataFrame): The DataFrame containing test data.
    modelParamsDF (pd.DataFrame): The DataFrame containing model parameters.
    num_simulations (int): The number of simulations per trial.

    Returns:
    pd.DataFrame: A DataFrame containing the simulated data.
    """
    # Load parameters for the subject
    v_intercept = modelParamsDF.loc['v_Intercept_subj.{}'.format(subjectIndex), 'mean']
    #v_gain = modelParamsDF.loc['v_I(gain + loss)_subj.{}'.format(subjectIndex), 'mean'] # for no lambda models 
    v_gain = modelParamsDF.loc['v_gain_subj.{}'.format(subjectIndex), 'mean']
    #v_loss = modelParamsDF.loc['v_I(gain + loss)_subj.{}'.format(subjectIndex), 'mean'] # for no lambda models 
    v_loss = modelParamsDF.loc['v_loss_subj.{}'.format(subjectIndex), 'mean']
    #v_gain_loss = modelParamsDF.loc['v_gain:loss_subj.{}'.format(subjectIndex), 'mean']
    a = modelParamsDF.loc['a_subj.{}'.format(subjectIndex), 'mean']
    z = modelParamsDF.loc['z_subj.{}'.format(subjectIndex), 'mean']
    #v_gain_loss_diff =  modelParamsDF.loc['v_gain_loss_diff_subj.{}'.format(subjectIndex), 'mean']
    #z = 0.5 # for no gamma models
    t = modelParamsDF.loc['t_subj.{}'.format(subjectIndex), 'mean']
    
    # Filter test data for the current subject
    subject_data = test[test['subj_idx'] == subjectIndex]
    
    simulated_rts = []
    simulated_choices = []
    
    # Simulate for each trial of the subject
    for idx, row in subject_data.iterrows():
        v = v_intercept + v_gain * row['gain'] + v_loss * row['loss'] #+ v_gain_loss_diff*row['gain_loss_diff']
        #v = v_gain * row['gain'] + v_loss * row['loss'] # for no alpha models
        trial_data = sim.simulator(model='ddm', theta=[v, a, z, t], n_samples=num_simulations)
        
        for rt in trial_data['rts']:
            simulated_rts.append(rt[0])
        for choice in trial_data['choices']:
            simulated_choices.append(choice[0])

    # Convert -1 choices to 0
    simulated_choices = [1 if choice == 1 else 0 for choice in simulated_choices]

    # Replicate the subject and trial data to match the number of simulations
    subj_idx_replicated = [subjectIndex] * len(simulated_rts)
    gain_replicated = np.repeat(subject_data['gain'].values, num_simulations)
    loss_replicated = np.repeat(subject_data['loss'].values, num_simulations)

    simulated_df = pd.DataFrame({
        'subj_idx': subj_idx_replicated,
        'response': simulated_choices,
        'rt': simulated_rts,
        'gain': gain_replicated,
        'loss': loss_replicated
    })
    
    return simulated_df


In [23]:
modelName = 'Abundance'
m = hddm.load('savedModels/{}/{}'.format(modelName, modelName))
modelParamsDF = m.nodes_db
test = pd.read_csv('allparticipant_GTdata_10.csv')
test.rename(columns={'subject': 'subj_idx'}, inplace=True)


total_subjects = 60
num_simulations_per_trial = 100

# Initialize an empty DataFrame to hold all simulated data
all_simulated_data = pd.DataFrame()

for subject_index in range(1, total_subjects + 1):
    simulated_df = simulate_subject_data(subject_index, test, modelParamsDF, num_simulations=num_simulations_per_trial)
    
    # Append the simulated data for this subject to the main DataFrame
    all_simulated_data = pd.concat([all_simulated_data, simulated_df], ignore_index=True)

# Once all subjects are processed, save the combined DataFrame to CSV
all_simulated_data.to_csv('Sim_Abundance.csv', index=False)