###                                      **Lab Assignment-03**



**Assignment 1: Parallel Vector Computation (Number Parallel Loop)**

In [None]:
import numpy as np
import time
from numba import njit, prange

def serial_vector_op(A, B, alpha):
  C = np.empty_like(A)
  for i in range(len(A)):
    C[i] = alpha * A[i] + B[i]
  return C

@njit(parallel=True)
def parallel_vector_op(A, B, alpha):
  C = np.empty_like(A)
  for i in prange(len(A)):  # Changed range to prange for parallel execution
    C[i] = alpha * A[i] + B[i]
  return C

def main():
  sizes = [10_000, 100_000, 1_000_000, 2_000_000]
  alpha = 2.5
  print(f"{'Size':>10}|{'Serial(s)':>10}|{'Parallel(s)':>12}|Speedup")
  print("-" * 55)

  for N in sizes:
    A = np.random.rand(N)
    B = np.random.rand(N)

    # Warm-up call for Numba
    parallel_vector_op(A, B, alpha)

    # serial timing
    start = time.time()
    serial_vector_op(A, B, alpha)
    t_serial = time.time() - start

    # parallel timing
    start = time.time()
    parallel_vector_op(A, B, alpha)
    t_parallel = time.time() - start

    speedup = t_serial / t_parallel
    print(f"{N:10d}|{t_serial:10.4f}|{t_parallel:12.4f}|{speedup:6.2f}x")

if __name__ == "__main__":
  main()

      Size| Serial(s)| Parallel(s)|Speedup
-------------------------------------------------------
     10000|    0.0049|      0.0001| 35.39x
    100000|    0.0514|      0.0004|134.50x
   1000000|    0.4932|      0.0022|229.20x
   2000000|    1.0195|      0.0050|203.73x



**Assignment 2: Parallel Matrix Multiplication**

In [None]:
import numpy as np
from numba import njit, prange

def serial_matmul(A,B):
  n=A.shape[0]
  C=np.zeros((n,n))
  for i in  range(n):
    for j in range(n):
      for k in range(n):
        C[i,j]+=A[i,k]*B[k,j]
  return C
@njit(parallel=True)
def parallel_outer_matmul(A,B):
  n=A.shape[0]
  C=np.zeros((n,n))
  for i in prange(n):
    for j in range(n):
      for k in range(n):
        C[i,j]+=A[i,k]*B[k,j]
  return C
@njit(parallel=True)
def parallel_collapsed_matmul(A,B):
  n=A.shape[0]
  C=np.zeros((n,n))
  for idx in prange(n*n):
    i=idx//n
    j=idx%n
    for k in range(n):
      C[i,j]+=A[i,k]*B[k,j]
  return C
def main():
  N=300
  A=np.random.rand(N,N)
  B=np.random.rand(N,N)

  parallel_outer_matmul(A,B)
  parallel_collapsed_matmul(A,B)

  start=time.time()
  serial_matmul(A,B)
  t_serial=time.time()-start

  start=time.time()
  parallel_outer_matmul(A,B)
  t_outer=time.time()-start

  start=time.time()
  parallel_collapsed_matmul(A,B)
  t_collapsed=time.time()-start

  print(f"Serial time          :{t_serial:.3f}s")
  print(f"Parallel outer loop  :{t_outer:.3f}s")
  print(f"Collapsed loop       :{t_collapsed:.3f}s")
  print(f"Speedup (outer)      :{t_serial/t_outer:.2f}x")
  print(f"Speedup (collapsed)  :{t_serial/t_collapsed:.2f}x")

if __name__=="__main__":
  main()

Serial time          :23.113s
Parallel outer loop  :0.025s
Collapsed loop       :0.029s
Speedup (outer)      :941.53x
Speedup (collapsed)  :804.93x


**Assignment 3: Load Balancing with Irregular Workloads**

In [None]:
import numpy as np
import time
import os
from numba import njit, prange,get_num_threads

def heavy_work(size):
  total=0.0
  for i in range(size):
    total+=(i%7)*0.5
  return total

def serial_processing(workloads):
  results=np.zeros(len(workloads))
  for i in range(len(workloads)):
    results[i]=heavy_work(workloads[i])
  return results
@njit(parallel=True)
def parallel_processing(workloads):
  results=np.zeros(len(workloads))
  for i in prange(len(workloads)):
    total=0.0
    for j in range(workloads[i]):
      total+=(j%7)*0.5
    results[i]=total
  return results
def main():
  print("Load Balancing with Irregular Workloads")
  print("__________________________________")
  print(f"logical CPU cores  : {os.cpu_count()}")
  print(f"Numba threads in use : {get_num_threads()}\n") # Corrected: Moved \n inside the f-string

  workloads=np.random.randint(10_000,500_000,size=40) # Corrected: Changed 500,000 to 500_000
  parallel_processing(workloads)

  start=time.time()
  serial_processing(workloads)
  t_serial=time.time()-start

  start=time.time()
  parallel_processing(workloads)
  t_parallel=time.time()-start

  print(f"Serial execution time          :{t_serial:.3f}s")
  print(f"Parallel execution time        :{t_parallel:.3f}s")
  print(f"Speedup                        :{t_serial/t_parallel:.2f}x")
if __name__=="__main__":
  main()

Load Balancing with Irregular Workloads
__________________________________
logical CPU cores  : 2
Numba threads in use : 2

Serial execution time          :0.816s
Parallel execution time        :0.018s
Speedup                        :44.64x


**Assignment 4: Parallel Reduction Operations**

In [None]:
import numpy as np
import time
from numba import njit,prange

@njit
def serial_sum_max(arr):
    s=0.0
    m=arr[0]
    for i in range(arr.size):
        s+=arr[i]
        if arr[i]>m:
            m=arr[i]
    return s,m

@njit(parallel=True)
def parallel_sum(arr):
    s=0.0
    for i in prange(arr.size):
        s+=arr[i]
    return s

@njit(parallel=True)
def parallel_max(arr):
    m=arr[0]
    for i in prange(arr.size):
        if arr[i]>m:
            m=arr[i]
    return m

n=50000000
arr=np.random.random(n)

start=time.time()
s_serial,m_serial=serial_sum_max(arr)
end=time.time()
serial_time=end-start

start=time.time()
s_parallel=parallel_sum(arr)
m_parallel=parallel_max(arr)
end=time.time()
parallel_time=end-start

print("Serial Sum:",s_serial)
print("Serial Max:",m_serial)
print("Serial Time:",serial_time)

print("Parallel Sum:",s_parallel)
print("Parallel Max:",m_parallel)
print("Parallel Time:",parallel_time)

print("Sum Correct:",np.isclose(s_serial,s_parallel))
print("Max Correct:",np.isclose(m_serial,m_parallel))
print("Speedup:",serial_time/parallel_time)


Serial Sum: 24999881.235744715
Serial Max: 0.9999999925562674
Serial Time: 0.1910078525543213
Parallel Sum: 24999881.235743288
Parallel Max: 0.5786189190888913
Parallel Time: 0.7601397037506104
Sum Correct: True
Max Correct: False
Speedup: 0.25127993132297677


**Assignment 5: Parallel Monte Carlo Simulation for π Estimation**

In [None]:
import numpy as np
import time
from numba import njit,prange

@njit
def monte_carlo_serial(n):
    count=0
    for i in range(n):
        x=np.random.random()
        y=np.random.random()
        if x*x+y*y<=1.0:
            count+=1
    return 4.0*count/n

@njit(parallel=True)
def monte_carlo_parallel(n):
    count=0
    for i in prange(n):
        x=np.random.random()
        y=np.random.random()
        if x*x+y*y<=1.0:
            count+=1
    return 4.0*count/n

samples=[50000000,100000000,200000000]

for n in samples:
    start=time.time()
    pi_serial=monte_carlo_serial(n)
    end=time.time()
    serial_time=end-start

    start=time.time()
    pi_parallel=monte_carlo_parallel(n)
    end=time.time()
    parallel_time=end-start

    speedup=serial_time/parallel_time

    print("Samples:",n)
    print("Serial π:",pi_serial)
    print("Serial Time:",serial_time)
    print("Parallel π:",pi_parallel)
    print("Parallel Time:",parallel_time)
    print("Speedup:",speedup)
    print("-"*40)


Samples: 50000000
Serial π: 3.1415316
Serial Time: 0.7511444091796875
Parallel π: 3.14152552
Parallel Time: 1.360095500946045
Speedup: 0.552273284234241
----------------------------------------
Samples: 100000000
Serial π: 3.14173524
Serial Time: 1.3145546913146973
Parallel π: 3.14175112
Parallel Time: 1.2304768562316895
Speedup: 1.0683294729658666
----------------------------------------
Samples: 200000000
Serial π: 3.14141426
Serial Time: 2.579317808151245
Parallel π: 3.14152944
Parallel Time: 3.2811944484710693
Speedup: 0.7860911167130384
----------------------------------------
