<a href="https://colab.research.google.com/github/2303a51060Nirnaya/High_performance_computing-Hcp-/blob/main/Assignment_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Vector Addition (Scalar vs SIMD-like)

In [1]:
import numpy as np
import time

N = 10_000_000

A = np.arange(N, dtype=np.float64)
B = np.arange(N, dtype=np.float64)

# Scalar loop
C = np.zeros(N)
start = time.time()
for i in range(N):
    C[i] = A[i] + B[i]
end = time.time()
print("Normal loop time:", end - start)

# Vectorized (SIMD-like)
start = time.time()
C = A + B
end = time.time()
print("Vectorized time:", end - start)


Normal loop time: 7.748841762542725
Vectorized time: 0.04339742660522461


Reduction (Sum)

In [2]:
import numpy as np
import time

N = 10_000_000
A = np.ones(N, dtype=np.float64)

# Normal loop
start = time.time()
s = 0.0
for i in range(N):
    s += A[i]
end = time.time()
print("Normal sum:", s, "Time:", end - start)

# Vectorized reduction
start = time.time()
s = np.sum(A)
end = time.time()
print("Vectorized sum:", s, "Time:", end - start)


Normal sum: 10000000.0 Time: 2.739647388458252
Vectorized sum: 10000000.0 Time: 0.008611440658569336


Memory Alignment Effect

In [3]:
import numpy as np
import time

N = 10_000_000

unaligned = np.arange(N + 1, dtype=np.float64)[1:]
aligned = np.arange(N, dtype=np.float64)

start = time.time()
np.sum(unaligned)
print("Unaligned time:", time.time() - start)

start = time.time()
np.sum(aligned)
print("Aligned time:", time.time() - start)


Unaligned time: 0.007345676422119141
Aligned time: 0.007325649261474609


Parallel + SIMD (Implicit)

In [4]:
import numpy as np
import time

N = 10_000_000
A = np.arange(N, dtype=np.float64)

start = time.time()
B = A * 2.0
print("Vectorized (SIMD + multithreaded) time:", time.time() - start)


Vectorized (SIMD + multithreaded) time: 0.026205778121948242


Branch Divergence

In [5]:
import numpy as np
import time

N = 10_000_000
A = np.random.rand(N) * 100
B = np.zeros(N)

start = time.time()
for i in range(N):
    if A[i] > 50:
        B[i] = A[i] * 2
    else:
        B[i] = A[i] / 2
print("Branch loop time:", time.time() - start)


Branch loop time: 6.7121851444244385


In [6]:
start = time.time()
B = np.where(A > 50, A * 2, A / 2)
print("Vectorized conditional time:", time.time() - start)


Vectorized conditional time: 0.1503133773803711
