
## 5. Implementation Code

Here is the exact benchmarking script described in the transcript, proving the 300x speed difference.



In [1]:
import numpy as np
import time

def demo_vectorization_speed():
    # 1. Create massive arrays (1 Million elements)
    a = np.random.rand(1000000)
    b = np.random.rand(1000000)
    
    # --- VECTORIZED VERSION (The Right Way) ---
    tic = time.time()
    c = np.dot(a, b)
    toc = time.time()
    
    print(f"Vectorized Result: {c:.4f}")
    print(f"Vectorized Time:   {1000 * (toc - tic):.4f} ms")
    
    # --- FOR LOOP VERSION (The Wrong Way) ---
    c = 0
    tic = time.time()
    for i in range(1000000):
        c += a[i] * b[i]
    toc = time.time()
    
    print(f"For-Loop Result:   {c:.4f}")
    print(f"For-Loop Time:     {1000 * (toc - tic):.4f} ms")
    
    # --- RATIO ---
    # On typical hardware, this prints ~300-500x speedup
    print("\nVectorization allows us to train models in minutes instead of days.")

if __name__ == "__main__":
    demo_vectorization_speed()

Vectorized Result: 249840.0777
Vectorized Time:   4.9579 ms
For-Loop Result:   249840.0777
For-Loop Time:     293.2701 ms

Vectorization allows us to train models in minutes instead of days.


In [3]:

import numpy as np

def half_vectorized_step(w, b, X, Y, learning_rate):
    """
    Removes the inner loop over features.
    Still has the loop over examples (m).
    """
    m = X.shape[1]
    n_x = X.shape[0]
    
    # Initialize gradient vector (Not separate scalars!)
    dw = np.zeros((n_x, 1)) 
    db = 0
    
    # Loop over examples (We will remove this next!)
    for i in range(m):
        # 1. Select single example
        # Reshape ensures it keeps (n_x, 1) dimension
        x_i = X[:, i].reshape(-1, 1) 
        y_i = Y[:, i]
        
        # 2. Forward Pass
        z_i = np.dot(w.T, x_i) + b
        a_i = 1 / (1 + np.exp(-z_i))
        
        # 3. Backward Pass
        dz_i = a_i - y_i
        
        # 4. VECTORIZED ACCUMULATION
        # Instead of dw1+=..., dw2+=...
        # We update the entire vector at once.
        dw += x_i * dz_i 
        db += dz_i

    # Average
    dw = dw / m
    db = db / m
    
    # Update
    w = w - learning_rate * dw
    b = b - learning_rate * db
    
    return w, b