In [None]:
# data_generation.py
import random
import pandas as pd
import numpy as np
import hddm

In [None]:
# 定义选择函数
def get_choice(row):
    if row.condition == 1:
        return 1 if row.response == 1 else 0
    elif row.condition == 0:
        return 1 if row.response == 0 else 0

In [None]:
# 定义数据生成函数
def simulate_data(a, v, t, z, dc, sv=0, sz=0, st=0, condition=0, nr_trials1=1000, nr_trials2=1000, seed=420):
    parameters1 = {'a': a, 'v': v + dc, 't': t, 'z': z, 'sv': sv, 'sz': sz, 'st': st}
    parameters2 = {'a': a, 'v': v - dc, 't': t, 'z': 1 - z, 'sv': sv, 'sz': sz, 'st': st}

    df_sim1, params_sim1 = hddm.generate.gen_rand_data(
        params=parameters1, size=nr_trials1, subjs=1, subj_noise=0, seed=seed
    )
    df_sim1['condition'] = 1  
    df_sim1['v'] = v  
    df_sim1['dc'] = dc              
    df_sim1['z'] = z  # 保存z参数值

    df_sim2, params_sim2 = hddm.generate.gen_rand_data(
        params=parameters2, size=nr_trials2, subjs=1, subj_noise=0, seed=seed + 1
    )
    df_sim2['condition'] = 0  
    df_sim2['v'] = v  
    df_sim2['dc'] = dc                    
    df_sim2['z'] = z  # 保存z参数值

    df_sim = pd.concat((df_sim1, df_sim2), ignore_index=True)
    df_sim['bias_response'] = df_sim.apply(get_choice, axis=1)
    df_sim['acc'] = df_sim['response'].astype(int)
    df_sim['response'] = df_sim['bias_response'].astype(int)
    df_sim['stimulus'] = np.array(
        (df_sim['response'] == 1) & (df_sim['acc'] == 1) | 
        (df_sim['response'] == 0) & (df_sim['acc'] == 0), 
        dtype=int
    )
    df_sim['condition'] = condition  
    df_sim = df_sim.drop(columns=['bias_response'])  

    return df_sim

In [None]:
# 多被试数据生成主逻辑（新增v和z的被试间波动）
def generate_multi_subject_data(n_subjects=20, trials_per_level=150, z_bias=True, z_mean=0.5):
    all_subjects_data = []
    # 定义组水平参数（控制被试间波动范围）
    group_params = {
        'v_mean': 1,          # v的组均值
        'v_std': 0.02,        # v的被试间标准差（和你最初逻辑一致）
        'a_mean': 1.0,        # a的组均值
        'a_std': 0.02,        # a的被试间标准差
        't_mean_zbias_true': 0.28,  # z_bias=True时t的组均值
        't_mean_zbias_false': 0.1,  # z_bias=False时t的组均值
        't_std': 0.02,        # t的被试间标准差
        'z_std': 0.01,        # z的被试间标准差（控制z的波动幅度）
    }

    for subject_id in range(n_subjects):
        subject_seed = 420 + subject_id
        np.random.seed(subject_seed)
        random.seed(subject_seed)

        # 每个被试的基础参数（带随机波动）
        subj_v = np.random.normal(loc=group_params['v_mean'], scale=group_params['v_std'])
        subj_a = np.random.normal(loc=group_params['a_mean'], scale=group_params['a_std'])
        # 根据z_bias选择对应的t均值
        t_mean = group_params['t_mean_zbias_true'] if z_bias else group_params['t_mean_zbias_false']
        subj_t = np.random.normal(loc=t_mean, scale=group_params['t_std'])
        # 每个被试的基础z（带随机波动，围绕z_mean）
        subj_z_base = np.random.normal(loc=z_mean, scale=group_params['z_std'])
        # 根据z_bias控制z的偏置（在被试基础z上叠加偏置）
        if z_bias:
            params_list = [
                {'cond': 'neutral', 'v': subj_v, 'a': subj_a, 't': subj_t, 'z': subj_z_base, 'dc': 0, 'sz': 0, 'st': 0, 'sv':0},
                {'cond': 'big_bias', 'v': subj_v, 'a': subj_a, 't': subj_t, 'z': subj_z_base + 0.1, 'dc': 0, 'sz': 0, 'st': 0, 'sv': 0},
                {'cond': 'small_bias', 'v': subj_v, 'a': subj_a, 't': subj_t, 'z': subj_z_base - 0.1, 'dc': 0, 'sz': 0, 'st': 0, 'sv': 0}
            ]
        else:
            params_list = [
                {'cond': 'neutral', 'v': subj_v, 'a': subj_a, 't': subj_t, 'z': subj_z_base, 'dc': 0, 'sz': 0, 'st': 0, 'sv': 0},
                {'cond': 'big_bias', 'v': subj_v, 'a': subj_a, 't': subj_t, 'z': subj_z_base, 'dc': 0, 'sz': 0, 'st': 0, 'sv': 0},
                {'cond': 'small_bias', 'v': subj_v, 'a': subj_a, 't': subj_t, 'z': subj_z_base, 'dc': 0, 'sz': 0, 'st': 0, 'sv': 0}
            ]

        subject_dfs = []
        for params in params_list:
            df_cond = simulate_data(
                z=params['z'], a=params['a'], v=params['v'], dc=params['dc'],
                t=params['t'], sv=params['sv'], st=params['st'], sz=params['sz'],
                condition=params['cond'], nr_trials1=trials_per_level, nr_trials2=trials_per_level,
                seed=subject_seed
            )
            df_cond['subj_idx'] = subject_id
            subject_dfs.append(df_cond)

        subject_data = pd.concat(subject_dfs, ignore_index=True)
        all_subjects_data.append(subject_data)

    data = pd.concat(all_subjects_data, ignore_index=True)
    data['choice'] = data['response']
    data['stimulus2'] = data['stimulus'].map({1: 1, 0: -1})
    return data