<a href="https://colab.research.google.com/github/Manikanta2804/hpc_2026_b07_2303A51458/blob/main/HCP_LAB_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#2303A51458
Batch:-7

In [None]:
#Parallel Vector Addition
import numpy as np
import time
from numba import njit, prange

def serial_vector_op(A,B,alpha):
  C=np.empty_like(A)
  for i in range(len(A)):
    C[i]=A[i]+B[i]
  return C

@njit(parallel=True)
def parallel_vector_op(A,B,alpha):
  C=np.empty_like(A)
  for i in prange(len(A)):
    C[i]=A[i]+B[i]
  return C

def main():
  sizes=[10_000, 100_000, 1_000_000, 2_000_000]
  alpha=2.5

  print(f"{'Size':>10} | {'Serial(s)':>10} | {'Parallel(s)':>12} | Speedup")
  print("-"*55)

  for N in sizes:
    A =np.random.rand(N)
    B= np.random.rand(N)

    # Run once to compile Numba functions
    parallel_vector_op(A,B,alpha)

    #serial timing
    start=time.time()
    serial_vector_op(A,B,alpha)
    t_serial=time.time()-start

    #parallel timing
    start=time.time()
    parallel_vector_op(A,B,alpha)
    t_parallel=time.time()-start

    speedup=t_serial/t_parallel
    print(f"{N:10d} | {t_serial:10.4f} | {t_parallel:12.4f} | {speedup:6.2f}x")

if __name__ == "__main__":
  main()

      Size |  Serial(s) |  Parallel(s) | Speedup
-------------------------------------------------------
     10000 |     0.0037 |       0.0001 |  59.74x
    100000 |     0.0365 |       0.0003 | 114.22x
   1000000 |     0.3813 |       0.0021 | 183.83x
   2000000 |     0.7324 |       0.0040 | 182.64x


In [None]:
#Static Scheduling in Parallel Loop
import numpy as np
import time
from numba import njit, prange, get_num_threads, get_thread_id

#serial computation
def serial_compute(A,B):
  N=len(A)
  C=np.zeros(N)
  for i in range(N):
    C[i]=A[i]*B[i]+A[i]
  return C

#static Scheduling
@njit(parallel=True)
def parallel_statsic_compute(A,B,thread_work):
  N=len(A)
  C=np.zeros(N)
  for i in prange(N):
    tid=get_thread_id()
    thread_work[tid]+=1
    C[i]=A[i]*B[i]+A[i]
  return C

def main():
  N=10_000_000
  A=np.random.rand(N)
  B=np.random.rand(N)

  #serial Execution
  start=time.time()
  C_serial=serial_compute(A,B)
  serial_time=time.time()-start
  print("Serial Execution Time:",serial_time)

  #paralell Execution
  num_threads=get_num_threads()
  thread_work=np.zeros(num_threads)

  start=time.time()
  C_parallel=parallel_statsic_compute(A,B,thread_work)
  parallel_time=time.time()-start
  print("Parallel Execution Time:",parallel_time)

  #verify Correctness
  print("Results Match:",np.allclose(C_serial,C_parallel))

  #work distribution
  print("\n Work Distribution Among Threads: ")
  for i in range(num_threads):
    print(f"Thread{i} handled {int(thread_work[i])} iterations")

if __name__ == "__main__":
  main()


Serial Execution Time: 6.329025983810425
Parallel Execution Time: 1.118285894393921
Results Match: True

 Work Distribution Among Threads: 
Thread0 handled 5000000 iterations
Thread1 handled 5000000 iterations


In [None]:
#Load Imbalance in Parallel Execution
import numpy as np
import time
from numba import njit, prange, get_num_threads, get_thread_id

#seria. computation
def serial_imbalance(arr):
  N=len(arr)
  result=np.zeros(N)
  for i in range(N):
    #light computation
    if arr[i]<0.5:
      s=0
      for j in range(100):
        s+=j*arr[i]
    #Heavy computation
    else:
      s=0
      for j in range(5000):
        s+=j*arr[i]
    result[i]=s
  return result



#parallel Computation
@njit(parallel=True)
def parallel_imbalance(arr,thread_work):
  N=len(arr)
  result=np.zeros(N)
  for i in prange(N):
    tid=get_thread_id()
    thread_work[tid]+=1
    #light computation
    if arr[i]<0.5:
      s=0
      for j in range(100):
        s+=j*arr[i]

    #heavy computation
    else:
      s=0
      for j in range(5000):
        s+=j*arr[i]
    result[i]=s
  return result

def main():
  N=200000
  #creat random array
  arr=np.random.rand(N)
  #serial Execution
  start=time.time()
  serial_result=serial_imbalance(arr)
  serial_time=time.time()-start
  print("Serial Execution Time:",serial_time)

  #parallel Execution
  num_thread=get_num_threads()
  thread_work=np.zeros(num_thread)

  start=time.time()
  parallel_result=parallel_imbalance(arr,thread_work)
  parallel_time=time.time()-start

  print("Parallel Execution Time:",parallel_time)

  #verify correctness
  print("Results Match:",np.allclose(serial_result,parallel_result))

  #work Distribution
  print("\nWork Distribution Among Threads:")
  for i in range(num_thread):
    print(f"Thread {i} handled {int(thread_work[i])} interations")

if __name__ == "__main__":
  main()

Serial Execution Time: 156.69098162651062
Parallel Execution Time: 0.9361996650695801
Results Match: True

Work Distribution Among Threads:
Thread 0 handled 100000 interations
Thread 1 handled 100000 interations


In [None]:
#Parallel Reduction (Synchronization)
#Static Scheduling in Parallel Loop
import numpy as np
import time
from numba import njit, prange, get_num_threads, get_thread_id

#serial computation
def serial_compute(A,B): # B is not used, but kept for signature consistency
  N=len(A)
  C=0.0 # Initialize C as a float for summation
  for i in range(N):
    C+=A[i] # Corrected: Summing directly to C
  return C

#static Scheduling
@njit(parallel=True)
def parallel_statsic_compute(A,B,thread_work): # B is not used, but kept for signature consistency
  N=len(A)
  C=0.0 # Initialize C as a float for summation. Numba will perform reduction automatically.
  for i in prange(N):
    tid=get_thread_id()
    thread_work[tid]+=1
    C+=A[i] # Corrected: Summing directly to C
  return C

def main():
  N=10_000_000
  A=np.random.rand(N)
  B=np.random.rand(N)

  #serial Execution
  start=time.time()
  C_serial=serial_compute(A,B)
  serial_time=time.time()-start
  print("Serial Execution Time:",serial_time)

  #paralell Execution
  num_threads=get_num_threads()
  thread_work=np.zeros(num_threads)

  start=time.time()
  C_parallel=parallel_statsic_compute(A,B,thread_work)
  parallel_time=time.time()-start
  print("Parallel Execution Time:",parallel_time)

  #verify Correctness
  # For a simple sum, np.allclose is appropriate for comparing float sums
  print("Results Match:",np.allclose(C_serial,C_parallel))

  #work distribution
  print("\n Work Distribution Among Threads: ")
  for i in range(num_threads):
    print(f"Thread{i} handled {int(thread_work[i])} iterations")

if __name__ == "__main__":
  main()


Serial Execution Time: 1.9442782402038574
Parallel Execution Time: 0.5030653476715088
Results Match: True

 Work Distribution Among Threads: 
Thread0 handled 5000000 iterations
Thread1 handled 5000000 iterations


In [None]:
#Barrier Synchronization (Two-Phase Computation)
import numpy as np
import time
from numba import njit, prange

@njit(parallel=True)
def phase1(A,B,C):
  N=len(A)
  for i in prange(N):
    C[i]=A[i]+B[i]

@njit(parallel=True)
def phase2(C, D):
  N=len(C)
  for i in prange(N):
    C[i]=C[i]*2

def main():
  N=5_000_000
  A=np.random.rand(N)
  B=np.random.rand(N)
  C=np.zeros(N)
  D=np.zeros(N)
  print("Sttarting phase 1")

  start=time.time()
  phase1(A,B,C)

  print("Barrier reached: All threads completed Phase 1")

  phase2(C,D)
  end=time.time()
  print("Phase2 Completed")
  print("Total Execution Time: ",end-start)

if __name__ == "__main__":
  main()


Sttarting phase 1
Barrier reached: All threads completed Phase 1
Phase2 Completed
Total Execution Time:  0.6877920627593994
