<a href="https://colab.research.google.com/github/2303A51531/23CSBTB39-40/blob/main/hpc_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import numpy as np
import time

def serial_vector_compute(A, B, alpha):
    N = len(A)
    C = np.zeros(N)
    for i in range(N):
        C[i] = alpha * A[i] + B[i]
    return C


from numba import njit, prange

@njit(parallel=True)
def parallel_vector_compute(A, B, alpha):
    N = len(A)
    C = np.zeros(N)
    for i in prange(N):
        C[i] = alpha * A[i] + B[i]
    return C
def benchmark(N):
    A = np.random.rand(N)
    B = np.random.rand(N)
    alpha = 2.5

    # Serial timing
    start = time.time()
    serial_vector_compute(A, B, alpha)
    t_serial = time.time() - start

    # Parallel timing
    start = time.time()
    parallel_vector_compute(A, B, alpha)
    t_parallel = time.time() - start

    return t_serial, t_parallel
sizes = [10**5, 10**6, 5*10**6]

for N in sizes:
    ts, tp = benchmark(N)
    print(f"N = {N}")
    print("Serial time   :", round(ts, 6), "seconds")
    print("Parallel time :", round(tp, 6), "seconds")
    print("Speedup       :", round(ts / tp, 2))
    print("-" * 40)


N = 100000
Serial time   : 0.029032 seconds
Parallel time : 0.255562 seconds
Speedup       : 0.11
----------------------------------------
N = 1000000
Serial time   : 0.275817 seconds
Parallel time : 0.001148 seconds
Speedup       : 240.26
----------------------------------------
N = 5000000
Serial time   : 1.906098 seconds
Parallel time : 0.019722 seconds
Speedup       : 96.65
----------------------------------------


In [12]:
import numpy as np
import time
from numba import njit, prange

# -------------------------------
# Serial Matrix Multiplication
# -------------------------------
def serial_matrix_multiply(A, B):
    N = A.shape[0]
    C = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            for k in range(N):
                C[i, j] += A[i, k] * B[k, j]
    return C


# --------------------------------
# Parallel Outer Loop Version
# --------------------------------
@njit(parallel=True)
def parallel_outer_matrix_multiply(A, B):
    N = A.shape[0]
    C = np.zeros((N, N))
    for i in prange(N):
        for j in range(N):
            for k in range(N):
                C[i, j] += A[i, k] * B[k, j]
    return C


# --------------------------------
# Parallel Collapsed Loop Version
# --------------------------------
@njit(parallel=True)
def parallel_collapsed_matrix_multiply(A, B):
    N = A.shape[0]
    C = np.zeros((N, N))
    for idx in prange(N * N):
        i = idx // N
        j = idx % N
        for k in range(N):
            C[i, j] += A[i, k] * B[k, j]
    return C


# --------------------------------
# MAIN (This ensures OUTPUT comes)
# --------------------------------
if __name__ == "__main__":

    N = 200   # keep small first (important!)
    A = np.random.rand(N, N)
    B = np.random.rand(N, N)

    print("Matrix size:", N, "x", N)
    print("-" * 40)

    # Serial timing
    start = time.time()
    serial_matrix_multiply(A, B)
    t_serial = time.time() - start
    print("Serial Time:", round(t_serial, 4), "seconds")

    # Parallel outer (first call compiles)
    start = time.time()
    parallel_outer_matrix_multiply(A, B)
    t_outer = time.time() - start
    print("Parallel Outer Loop Time:", round(t_outer, 4), "seconds")

    # Parallel collapsed
    start = time.time()
    parallel_collapsed_matrix_multiply(A, B)
    t_collapsed = time.time() - start
    print("Parallel Collapsed Loop Time:", round(t_collapsed, 4), "seconds")

    print("-" * 40)
    print("Speedup (Outer):", round(t_serial / t_outer, 2))
    print("Speedup (Collapsed):", round(t_serial / t_collapsed, 2))



Matrix size: 200 x 200
----------------------------------------
Serial Time: 3.8277 seconds
Parallel Outer Loop Time: 0.6979 seconds
Parallel Collapsed Loop Time: 0.4653 seconds
----------------------------------------
Speedup (Outer): 5.48
Speedup (Collapsed): 8.23


In [15]:
import time
import random

def serial_irregular_workload(N):
    result = 0
    for i in range(N):
        work = random.randint(1000, 10000)  # variable work
        for j in range(work):
            result += j % 7
    return result
from numba import njit, prange

@njit(parallel=True)
def parallel_irregular_workload(workloads):
    result = 0
    for i in prange(len(workloads)):
        temp = 0
        for j in range(workloads[i]):
            temp += j % 7
        result += temp
    return result
import numpy as np
import time
import random
from numba import njit, prange

def serial_irregular_workload(workloads):
    result = 0
    for i in range(len(workloads)):
        for j in range(workloads[i]):
            result += j % 7
    return result

@njit(parallel=True)
def parallel_irregular_workload(workloads):
    result = 0
    for i in prange(len(workloads)):
        temp = 0
        for j in range(workloads[i]):
            temp += j % 7
        result += temp
    return result


if __name__ == "__main__":

    N = 50  # number of images
    workloads = np.array([random.randint(5_000, 50_000) for _ in range(N)])

    print("Number of images:", N)
    print("Workload variation (sample):", workloads[:5])
    print("-" * 40)

    # Serial timing
    start = time.time()
    serial_irregular_workload(workloads)
    t_serial = time.time() - start
    print("Serial Execution Time:", round(t_serial, 4), "seconds")

    # Parallel timing (first run includes compilation)
    start = time.time()
    parallel_irregular_workload(workloads)
    t_parallel = time.time() - start
    print("Parallel Execution Time:", round(t_parallel, 4), "seconds")

    print("-" * 40)
    print("Speedup:", round(t_serial / t_parallel, 2))


Number of images: 50
Workload variation (sample): [12703 27777 36476 40446 47627]
----------------------------------------
Serial Execution Time: 0.0524 seconds
Parallel Execution Time: 0.3343 seconds
----------------------------------------
Speedup: 0.16


In [16]:
import numpy as np
import time

def serial_sum_max(arr):
    total = 0.0
    max_val = arr[0]
    for i in range(len(arr)):
        total += arr[i]
        if arr[i] > max_val:
            max_val = arr[i]
    return total, max_val
from numba import njit, prange

@njit(parallel=True)
def parallel_sum(arr):
    total = 0.0
    for i in prange(len(arr)):
        total += arr[i]
    return total

@njit(parallel=True)
def parallel_max(arr):
    max_val = arr[0]
    for i in prange(len(arr)):
        if arr[i] > max_val:
            max_val = arr[i]
    return max_val
if __name__ == "__main__":

    N = 5_000_000   # large dataset
    data = np.random.rand(N)

    print("Dataset size:", N)
    print("-" * 40)

    # Serial
    start = time.time()
    s_sum, s_max = serial_sum_max(data)
    t_serial = time.time() - start

    print("Serial Sum :", round(s_sum, 4))
    print("Serial Max :", round(s_max, 4))
    print("Serial Time:", round(t_serial, 4), "seconds")
    print("-" * 40)

    # Parallel Sum
    start = time.time()
    p_sum = parallel_sum(data)
    t_psum = time.time() - start

    # Parallel Max
    start = time.time()
    p_max = parallel_max(data)
    t_pmax = time.time() - start

    print("Parallel Sum :", round(p_sum, 4))
    print("Parallel Max :", round(p_max, 4))
    print("Parallel Time:", round(t_psum + t_pmax, 4), "seconds")
    print("-" * 40)

    print("Sum Correct :", abs(s_sum - p_sum) < 1e-6)
    print("Max Correct :", abs(s_max - p_max) < 1e-6)
    print("Speedup    :", round(t_serial / (t_psum + t_pmax), 2))



Dataset size: 5000000
----------------------------------------
Serial Sum : 2499675.3066
Serial Max : 1.0
Serial Time: 1.0159 seconds
----------------------------------------
Parallel Sum : 2499675.3066
Parallel Max : 0.1774
Parallel Time: 0.5708 seconds
----------------------------------------
Sum Correct : True
Max Correct : False
Speedup    : 1.78


In [18]:
import numpy as np
import time

def serial_pi_estimate(x, y):
    inside = 0
    for i in range(len(x)):
        if x[i] * x[i] + y[i] * y[i] <= 1.0:
            inside += 1
    return 4.0 * inside / len(x)
from numba import njit, prange

@njit(parallel=True)
def parallel_pi_estimate(x, y):
    inside = 0
    for i in prange(len(x)):
        if x[i] * x[i] + y[i] * y[i] <= 1.0:
            inside += 1
    return 4.0 * inside / len(x)
import numpy as np
import time
from numba import njit, prange

def serial_pi_estimate(x, y):
    inside = 0
    for i in range(len(x)):
        if x[i]*x[i] + y[i]*y[i] <= 1.0:
            inside += 1
    return 4.0 * inside / len(x)

@njit(parallel=True)
def parallel_pi_estimate(x, y):
    inside = 0
    for i in prange(len(x)):
        if x[i]*x[i] + y[i]*y[i] <= 1.0:
            inside += 1
    return 4.0 * inside / len(x)

if __name__ == "__main__":

    N = 50_000_000   # ≥ 50 million (as required)
    print("Monte Carlo Samples:", N)

    # Generate random points OUTSIDE loops (important!)
    x = np.random.rand(N)
    y = np.random.rand(N)

    print("-" * 50)

    # Serial execution
    start = time.time()
    pi_serial = serial_pi_estimate(x, y)
    t_serial = time.time() - start

    print("Serial π Estimate :", pi_serial)
    print("Serial Time       :", round(t_serial, 4), "seconds")

    print("-" * 50)

    # Parallel execution (first call includes JIT compile)
    start = time.time()
    pi_parallel = parallel_pi_estimate(x, y)
    t_parallel = time.time() - start

    print("Parallel π Estimate:", pi_parallel)
    print("Parallel Time      :", round(t_parallel, 4), "seconds")

    print("-" * 50)
    print("Speedup:", round(t_serial / t_parallel, 2))


Monte Carlo Samples: 50000000
--------------------------------------------------
Serial π Estimate : 3.14179352
Serial Time       : 23.3395 seconds
--------------------------------------------------
Parallel π Estimate: 3.14179352
Parallel Time      : 0.33 seconds
--------------------------------------------------
Speedup: 70.73
