# Split Half

In [1]:
import pandas
import numpy
import multiprocess

In [2]:
df = pandas.read_csv('./DATA/df1.csv')

In [3]:
# 创建一个空list用于存放分半的结果
shl = [[] for _ in range(10)]

# 单线程分半
for i in range(10):

    # 按照指定的四个变量进行分组
    grouped = df.groupby(['Subject', 'Matching', 'Identity', 'Session'])

    # 设置随机种子
    numpy.random.seed(i)

    # 定义拆分函数
    def split(group):
        indices = numpy.random.permutation(len(group))  # 随机打乱索引
        if len(indices) % 2 != 0:  # 检查索引长度是否为奇数
            indices = indices[:-1]  # 如果是，就删除最后一个索引
        else:
            indices = indices # 偶数就保留原索引
        half = len(indices) // 2 # 得到索引的一半位置是多少
        return group.iloc[indices[:half]].reset_index(drop=True), group.iloc[indices[half:]].reset_index(drop=True)

    # 创建两个空DataFrame来存储拆分后的结果
    result1 = pandas.DataFrame()
    result2 = pandas.DataFrame()

    # 对每个组应用拆分函数，并将结果存储到对应的DataFrame中
    for (_, _, _, _), (group1, group2) in grouped.apply(split).items():
        result1 = pandas.concat([result1, group1], ignore_index=True)
        result2 = pandas.concat([result2, group2], ignore_index=True)

    # 解除分组
    result1 = result1.reset_index(drop=True)
    result2 = result2.reset_index(drop=True)
    
    shl[i].append(result1)  # 添加 result1 到 shl[i] 的列表中
    shl[i].append(result2)  # 添加 result2 到 shl[i] 的列表中

del group1, group2, grouped, i, result1, result2

In [3]:
# 多线程(函数)
def process_iteration(i):
    
    import pandas
    import numpy
    df = pandas.read_csv('./DATA/df1.csv')
 
    # 按照指定的四个变量进行分组
    grouped = df.groupby(['Subject', 'Matching', 'Identity', 'Session'])

    # 创建一个空list用于存放分半的结果
    shl = [[] for _ in range(i)]

    # 设置随机种子
    numpy.random.seed(i)

    # 定义拆分函数
    def split(group):
        indices = numpy.random.permutation(len(group))  # 随机打乱索引
        if len(indices) % 2 != 0:  # 检查索引长度是否为奇数
            indices = indices[:-1]  # 如果是，就删除最后一个索引
        else:
            indices = indices # 偶数就保留原索引
        half = len(indices) // 2 # 得到索引的一半位置是多少
        return group.iloc[indices[:half]].reset_index(drop=True), group.iloc[indices[half:]].reset_index(drop=True) # 以乱序索引的中点二分

    # 创建两个空DataFrame来存储拆分后的结果
    result1 = pandas.DataFrame()
    result2 = pandas.DataFrame()

    # 对每个组应用拆分函数，并将结果存储到对应的DataFrame中
    for (_, _, _, _), (group1, group2) in grouped.apply(split).items():
        result1 = pandas.concat([result1, group1], ignore_index=True)
        result2 = pandas.concat([result2, group2], ignore_index=True)

    # 解除分组
    result1 = result1.reset_index(drop=True)
    result2 = result2.reset_index(drop=True)

    return result1, result2

In [4]:
# 多线程(执行)
if __name__ == '__main__':
    num_iterations = 10
    pool = multiprocess.Pool(processes=16)

    results = pool.map(process_iteration, range(num_iterations))

    shl = [[] for _ in range(num_iterations)]
    for i, (result1, result2) in enumerate(results):
        shl[i].append(result1)
        shl[i].append(result2)

del i, num_iterations, pool, result1, result2, results,

# PyDDM

In [5]:
import pyddm

In [7]:
from pyddm import Sample

roitman_sample = Sample.from_pandas_dataframe(shl[0][0], rt_column_name="RT_sec", correct_column_name="ACC")

In [32]:
import pyddm as ddm
class DriftCoherence(ddm.models.Drift):
    name = "Drift depends linearly on coherence"
    required_parameters = ["driftcoh"] # <-- Parameters we want to include in the model
    required_conditions = ["Session"] # <-- Task parameters ("conditions"). Should be the same name as in the sample.
    
    # We must always define the get_drift function, which is used to compute the instantaneous value of drift.
    def get_drift(self, conditions, **kwargs):
        return self.driftcoh * conditions['Session']

In [33]:
from pyddm import Model, Fittable
from pyddm.functions import fit_adjust_model, display_model
from pyddm.models import NoiseConstant, BoundConstant, OverlayChain, OverlayNonDecision, OverlayPoissonMixture
model_rs = Model(name='Roitman data, drift varies with coherence',
                 drift=DriftCoherence(driftcoh=Fittable(minval=0, maxval=20)),
                 noise=NoiseConstant(noise=1),
                 bound=BoundConstant(B=Fittable(minval=.1, maxval=1.5)),
                 # Since we can only have one overlay, we use
                 # OverlayChain to string together multiple overlays.
                 # They are applied sequentially in order.  OverlayNonDecision
                 # implements a non-decision time by shifting the
                 # resulting distribution of response times by
                 # `nondectime` seconds.
                 overlay=OverlayChain(overlays=[OverlayNonDecision(nondectime=Fittable(minval=0, maxval=.4)),
                                                OverlayPoissonMixture(pmixturecoef=.02,
                                                                      rate=1)]),
                 dx=.001, dt=.01, T_dur=2)

# Fitting this will also be fast because PyDDM can automatically
# determine that DriftCoherence will allow an analytical solution.
fit_model_rs = fit_adjust_model(sample=roitman_sample, model=model_rs, verbose=False)

Info: Params [0.56361487 0.60989595 0.34492846] gave -7941.3121146826425


In [34]:
display_model(fit_model_rs)

Model Roitman data, drift varies with coherence information:
Drift component DriftCoherence:
    Drift depends linearly on coherence
    Fitted parameters:
    - driftcoh: 0.563615
Noise component NoiseConstant:
    constant
    Fixed parameters:
    - noise: 1.000000
Bound component BoundConstant:
    constant
    Fitted parameters:
    - B: 0.609896
IC component ICPointSourceCenter:
    point_source_center
    (No parameters)
Overlay component OverlayChain:
    Overlay component OverlayNonDecision:
        Add a non-decision by shifting the histogram
        Fitted parameters:
        - nondectime: 0.344928
    Overlay component OverlayPoissonMixture:
        Poisson distribution mixture model (lapse rate)
        Fixed parameters:
        - pmixturecoef: 0.020000
        - rate: 1.000000
Fit information:
    Loss function: Negative log likelihood
    Loss function value: -7941.3121146826425
    Fitting method: differential_evolution
    Solver: auto
    Other properties:
        - n

In [35]:
fit_model_rs.parameters()

{'drift': {'driftcoh': Fitted(0.5636148656719929, minval=0, maxval=20)},
 'noise': {'noise': 1},
 'bound': {'B': Fitted(0.6098959509158732, minval=0.1, maxval=1.5)},
 'IC': {},
 'overlay': {'nondectime': Fitted(0.3449284578730313, minval=0, maxval=0.4),
  'pmixturecoef': 0.02,
  'rate': 1}}