In [None]:
from numba import jit, njit

@njit
def np_reduce_along_axis(func1d, axis, arr):
    assert arr.ndim == 2
    assert axis in [0, 1]
    if axis == 0:
        result = np.empty(arr.shape[1])
        for i in range(len(result)):
            result[i] = func1d(arr[:, i])
    else:
        result = np.empty(arr.shape[0])
        for i in range(len(result)):
            result[i] = func1d(arr[i, :])
    return result

@njit
def np_apply_along_axis(func1d, axis, arr):
    assert arr.ndim == 2
    assert axis in [0, 1]
    result = np.empty(arr.shape)
    if axis == 0:
        for i in range(len(result)):
            result[:, i] = func1d(arr[:, i])
    else:
        for i in range(len(result)):
            result[i, :] = func1d(arr[i, :])
    return result

@njit
def np_mean(array, axis):
    return np_reduce_along_axis(np.mean, axis, array)

@njit
def np_std(array, axis):
    return np_reduce_along_axis(np.std, axis, array)        

@njit
def np_cumsum(array, axis):
    return np_apply_along_axis(np.cumsum, axis, array)


@njit()
def _simulate_trials(ndt, drift, s2, n_trials=1000, max_t=5., dt=.001):
    threshold = 1.
    t = np.arange(0, max_t, dt)
    signal = np.where(t > ndt, drift, 0)
    drift = np.cumsum(signal*dt)
    noise = np.random.normal(0, s2, (n_trials, len(t)))
    diffusion = np_cumsum(noise * np.sqrt(dt), 1)
#     X = drift + diffusion
#     return X

def simulate_trials(pars, n_trials=1000, max_t=5., dt=.001):
    ndt, drift, s2 = pars
    return _simulate_trials(ndt, drift, s2, n_trials=n_trials, max_t=max_t, dt=dt)
_ = simulate_trials(true_pars)

len(np.arange(0, 5, .001))

@njit()
def r():
    return np.random.normal(0, 1, (1000, 500))
_ = r()
%timeit r()

%time _ = simulate_trials(true_pars)

%lprun -f _simulate_trials simulate_trials(true_pars)

%lprun -f simulate_trials simulate_trials(true_pars[0], true_pars[1], true_pars[2])

# def _simulate_trials(ndt, drift, s2, n_trials=1000, max_t=5., dt=.01):
#     threshold = 1.
#     t = np.arange(0, max_t, dt)
#     signal = np.where(t > ndt, drift, 0)
#     drift = np.cumsum(signal*dt)
#     noise = np.random.normal(0, s2, (n_trials, len(t)))
#     diffusion = np_cumsum(noise * np.sqrt(dt), 1)
#     X = drift + diffusion
#     return X

# def simulate_trials(pars, n_trials=1000, max_t=5., dt=.01):
#     ndt, drift, s2 = pars
#     return _simulate_trials(ndt, drift, s2, n_trials=n_trials, max_t=max_t, dt=dt)
# %time simulate_trials(true_pars)