In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns

# DDM Performance Comparison

## Objective
Compare the performance of different DDM implementations and explore how hardware backends affect simulation speed.

## Implementations Available
You have three implementations to compare:
- Pure Python: Simple but slow (uses interpreted loops)
- NumPy: Vectorized operations (faster)
- TensorFlow: GPU-accelerated (potentially fastest)

## Assignment Tasks

### Part 1: Basic Performance Comparison
1. Run each implementation with the same parameters (e.g., `n_trials=10000`)
2. Measure and compare execution times
3. Verify that all implementations produce similar results by comparing the RT and response distributions.

### Part 2: Hardware Backend Exploration

If possible, (i.e., on a Macbook M1/M2/M3), try out how much the GPU helps compared to CPU.

#### Hints
```python
# CPU execution
with tf.device('/CPU:0'):
    # Your code here

# GPU execution (if available)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_memory_growth(gpus[0], True)  # Prevents memory issues
    with tf.device('/GPU:0'):
        # Your code here
```

**Key Tips:**
- Metal (M1/M2) works automatically - no config needed
- Always clear session between tests: `tf.keras.backend.clear_session()`
- First run includes compilation time - discard it
- Take minimum of 3+ runs for accurate timing
### Part 3: Performance Analysis
1. Test with different numbers of trials: [100, 1000, 10000, 100000]
2. Create performance plots showing:
   - Execution time vs. number of trials
   - Speedup factors between implementations
3. Answer these questions:
   - Which implementation is fastest for small vs. large trial counts?
   - How does performance scale with the number of trials?
   - What are the tradeoffs between the different implementations?

### Part 4: Advanced Exploration (Bonus)
1. Test different time steps (`dt` values) and their effect on:
   - Performance
   - Result accuracy
2. Compare single (float32) vs. double (float64) precision
3. Investigate memory usage differences between implementations

### Part 5: GPUs on Sciencecluster
If you have an account on sciencecluster, and you feel like it, you
can try to run the different implementation on the high-end GPUs
(T4/A100/V100/H100)
available there.
https://docs.s3it.uzh.ch/cluster/resources/

# Functions

In [None]:
import numpy as np
import tensorflow as tf
import time

def ddm_python(n_trials=1000, max_t=10.0, dt=0.001, drift=0.5, noise=0.1, bound=1.0):
    """
    Basic Drift-Diffusion Model (DDM) simulation in pure Python.
    This is the most intuitive implementation but slowest due to Python's interpreted loops.

    Parameters:
    -----------
    n_trials : int
        Number of simulated trials
    max_t : float
        Maximum decision time in seconds
    dt : float
        Time step for simulation in seconds
    drift : float
        Drift rate (evidence accumulation rate)
    noise : float
        Noise standard deviation (diffusion coefficient)
    bound : float
        Decision boundary (±bound)

    Returns:
    --------
    responses : ndarray
        Array of responses (1=upper bound, 0=lower bound)
    rts : ndarray
        Array of reaction times for each trial in seconds
    """
    responses = []  # Store responses (1=upper bound, 0=lower bound)
    rts = []        # Store reaction times in seconds

    for trial in range(n_trials):
        t = 0.0          # Initialize time at 0
        evidence = 0.0   # Initialize accumulated evidence at 0

        # Simulate until time limit reached or boundary crossed
        while t < max_t:
            # Evidence accumulation with noise (Wiener process):
            # drift*dt = systematic evidence accumulation
            # noise*sqrt(dt)*randn() = Gaussian noise (Brownian motion)
            evidence += drift * dt + noise * np.sqrt(dt) * np.random.randn()

            # Check if evidence hit either boundary
            if abs(evidence) >= bound:
                break  # Decision made, exit loop

            t += dt  # Increment time

        # Record response and reaction time
        responses.append(1 if evidence > 0 else 0)  # 1 if evidence > 0, else 0
        rts.append(t)  # Actual time when decision was made

    return np.array(responses), np.array(rts)

def ddm_optimal(n_trials=1000, max_t=10.0, dt=0.001, drift=0.5, noise=0.1, bound=1.0):
    """
    Optimized NumPy DDM simulation that:
    1. Runs all trials in parallel for full duration
    2. Uses vectorized operations for speed
    3. Finds first crossing time for each trial
    4. Handles both upper and lower bounds

    Parameters:
    -----------
    Same as ddm_python()

    Returns:
    --------
    responses : ndarray (int)
        1=upper bound, 0=lower bound, -1=no decision (timeout)
    rts : ndarray (float)
        Reaction times in seconds
    """
    n_steps = int(max_t / dt)  # Number of time steps

    # Generate all noise terms at once for all trials and time steps
    noise_terms = noise * np.sqrt(dt) * np.random.randn(n_trials, n_steps)

    # Create drift terms matrix (same for all trials)
    drift_terms = np.ones((n_trials, n_steps)) * drift * dt

    # Compute evidence trajectories using cumulative sum (vectorized)
    evidence = np.cumsum(drift_terms + noise_terms, axis=1)

    # Add initial evidence (zeros at t=0)
    evidence = np.column_stack([np.zeros(n_trials), evidence])

    # Find boundary crossings
    crossed_up = evidence >= bound    # Upper boundary crossings
    crossed_down = evidence <= -bound # Lower boundary crossings
    crossed = crossed_up | crossed_down  # Any boundary crossing

    # Find first crossing step for each trial (argmax gives first True index)
    crossing_steps = np.argmax(crossed, axis=1)

    # Calculate reaction times: crossing_step * dt, or max_t if no crossing
    rts = np.where(crossed.any(axis=1),  # If any crossing occurred
                  crossing_steps * dt,   # RT = crossing time
                  max_t)                 # Timeout if no crossing

    # Determine responses: 1=upper, 0=lower, -1=no decision
    responses = np.where(crossed_up.any(axis=1), 1, 0)
    responses[crossing_steps == 0] = -1  # Mark no-decision trials

    return responses, rts

def ddm_tensorflow(n_trials=1000, max_t=10.0, dt=0.001, drift=0.5, noise=0.1, bound=1.0):
    """
    TensorFlow DDM implementation with scoped operations for visualization.
    Uses GPU acceleration if available.

    Parameters:
    -----------
    Same as ddm_python()

    Returns:
    --------
    A TensorFlow function that returns (responses, rts) when called
    """
    n_steps = int(max_t / dt)  # Number of time steps

    @tf.function
    def simulate():
        with tf.name_scope('DDM_Simulation'):
            # Create constants for parameters with descriptive names
            with tf.name_scope('Parameters'):
                n_trials_const = tf.constant(n_trials, dtype=tf.int32, name='n_trials')
                n_steps_const = tf.constant(n_steps, dtype=tf.int32, name='n_steps')
                dt_const = tf.constant(dt, dtype=tf.float32, name='dt')
                drift_const = tf.constant(drift, dtype=tf.float32, name='drift')
                noise_const = tf.constant(noise, dtype=tf.float32, name='noise')
                bound_const = tf.constant(bound, dtype=tf.float32, name='bound')
                max_t_const = tf.constant(max_t, dtype=tf.float32, name='max_t')

            # Generate random noise for all trials and time steps
            with tf.name_scope('Noise_Generation'):
                noise_terms = noise_const * tf.sqrt(dt_const) * tf.random.normal([n_trials, n_steps])

            # Create drift terms: v * dt for each time step
            with tf.name_scope('Drift_Terms'):
                drift_terms = tf.ones([n_trials, n_steps]) * drift_const * dt_const

            # Accumulate evidence over time: X(t) = Σ(v*dt + σ*dW)
            with tf.name_scope('Evidence_Accumulation'):
                evidence = tf.cumsum(drift_terms + noise_terms, axis=1)

            # Add initial condition: evidence starts at 0 at t=0
            with tf.name_scope('Initial_Condition'):
                evidence = tf.pad(evidence, [[0, 0], [1, 0]], constant_values=0.0)

            # Check which trials crossed each boundary
            with tf.name_scope('Boundary_Checks'):
                crossed_up = evidence >= bound_const    # Upper boundary crossings
                crossed_down = evidence <= -bound_const  # Lower boundary crossings
                any_crossing = crossed_up | crossed_down  # Any boundary crossing

            # Calculate response times
            with tf.name_scope('Response_Time_Calculation'):
                rts = tf.where(
                    tf.reduce_any(any_crossing, axis=1),  # If any crossing occurred
                    dt_const * tf.cast(tf.argmax(tf.cast(any_crossing, tf.int32), axis=1), tf.float32),
                    max_t_const  # Timeout value if no crossing
                )

            # Determine responses: 1=upper, 0=lower, -1=no decision
            with tf.name_scope('Response_Determination'):
                responses = tf.where(
                    tf.reduce_any(crossed_up, axis=1),     # If upper boundary was crossed
                    tf.constant(1, dtype=tf.int32),       # Response = 1
                    tf.where(                              # Else check lower boundary
                        tf.reduce_any(crossed_down, axis=1),
                        tf.constant(0, dtype=tf.int32),    # Response = 0
                        tf.constant(-1, dtype=tf.int32)    # No decision
                    )
                )
        return responses, rts

    return simulate

In [None]:
# import matplotlib.pyplot as plt
# # # Force CPU
# tf.config.set_visible_devices([], 'GPU')

# # Use GPU if available
# # tf.config.set_visible_devices(tf.config.list_physical_devices('GPU'), 'GPU')
# tf_simulate = ddm_tensorflow(10000, drift=0.2, bound=.5)
# start = time.time()
# tf_resp, tf_rts = tf_simulate()
# print(f'Time taken: {time.time() - start:.4f}s')