### Python , Numpy, and Vectorization

In [3]:
import numpy as np
import time

#### Vector Creation

In [12]:
a = np.zeros(4)
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.zeros((4,))
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.random.random_sample(4)
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")

a = [0. 0. 0. 0.], a shape = (4,), a data type = float64
a = [0. 0. 0. 0.], a shape = (4,), a data type = float64
a = [0.51508946 0.09456134 0.42084001 0.46817327], a shape = (4,), a data type = float64


In [22]:
a = np.arange(4.)
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.arange(4)
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.random.rand(4)
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.array([5,4,3,2,1])
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")

a = [0. 1. 2. 3.], a shape = (4,), a data type = float64
a = [0 1 2 3], a shape = (4,), a data type = int32
a = [0.40697683 0.46006905 0.2933848  0.65745839], a shape = (4,), a data type = float64
a = [5 4 3 2 1], a shape = (5,), a data type = int32


### Slicing

In [43]:
a = np.arange(10)
print(f"a = {a}")
c = a[2:7:1]
print(f"c = {c}")
d = a[2:7:2]
print(f"d = {d}")
e = a[3:] ; print(f"e = {e}")
f = a[:3]; print(f"f = {f}")
g = a[:]; print(f"g = {g}")

a = [0 1 2 3 4 5 6 7 8 9]
c = [2 3 4 5 6]
d = [2 4 6]
e = [3 4 5 6 7 8 9]
f = [0 1 2]
g = [0 1 2 3 4 5 6 7 8 9]


### Vector Operations

In [52]:
a = np.array([1,2,3,4,5])
b = -a
print(f"b = {b}")
c = np.sum(a)
print(f"sum = {c}")
d = np.mean(a)
print(f"mean = {d}")
e = a**2
print(f"square:{e}")

b = [-1 -2 -3 -4 -5]
sum = 15
mean = 3.0
square:[ 1  4  9 16 25]


In [56]:
# Try a mismatched vector operations
a = np.array([1,2,3,4])
b = np.array([1,2])
try:
    c = a+b
except Exception as e:
    print(f"the error message you will see is = {e}")

the error message you will see is = operands could not be broadcast together with shapes (4,) (2,) 


### vector vector dot product
Implement a function which returns the dot product of two vectors. The function to return given inputs $a$ and $b$:
$$ x = \sum_{i=0}^{n-1} a_i b_i $$

In [60]:
def dotProduct(a,b):
    x = 0 
    for i in range(a.shape[0]):
        x = x + a[i]*b[i]
    return x

In [63]:
a = np.array([1,2,3,4])
b = np.array([-1,4,3,2])
print(f"ans = {dotProduct(a,b)}")

ans = 24


### The need for speed: vector vs for loop
This code compares the efficiency of vectorized dot product computation using NumPy's np.dot() function against a manual implementation using a loop (dotProduct())
### Seed
The np.random.seed(seed_value) function in NumPy initializes the random number generator (RNG) to produce consistent (reproducible) random numbers.

### Why Use a Seed?
Normally, np.random generates different random numbers every time your program runs. However, by setting a seed, you ensure that the same sequence of random numbers is generated every time, which is useful for:

Reproducibility (e.g., debugging, scientific experiments, ML models).
Testing (getting consistent results for validation).

In [67]:
import numpy as np
import time

# Define a manual dot product function
def my_dot(a, b):
    result = 0
    for i in range(len(a)):
        result += a[i] * b[i]
    return result

# Set random seed for reproducibility
np.random.seed(1)

# Generate large random arrays
a = np.random.rand(10_000_000)
b = np.random.rand(10_000_000)

# Measure NumPy's dot product performance
tic = time.time()
c = np.dot(a, b)
toc = time.time()
print(f"np.dot(a, b) =  {c:.4f}")
print(f"Vectorized version duration: {1000*(toc-tic):.4f} ms ")

# Measure loop-based dot product performance
tic = time.time()
c = my_dot(a, b)
toc = time.time()
print(f"my_dot(a, b) =  {c:.4f}")
print(f"Loop version duration: {1000*(toc-tic):.4f} ms ")

# Free up memory
del a, b

np.dot(a, b) =  2501072.5817
Vectorized version duration: 138.0010 ms 
my_dot(a, b) =  2501072.5817
Loop version duration: 8689.9245 ms 


### Matrix Creation

In [77]:
a = np.zeros((1, 5))                                       
print(f"a shape = {a.shape}, a = {a}")                     

a = np.zeros((2, 1))                                                                   
print(f"a = {a}, a shape = {a.shape}") 

a = np.random.random_sample((1, 1))  
print(f"a shape = {a.shape}, a = {a}") 

a shape = (1, 5), a = [[0. 0. 0. 0. 0.]]
a = [[0.]
 [0.]], a shape = (2, 1)
a shape = (1, 1), a = [[0.04997798]]


In [79]:
#vector indexing operations on matrices
a = np.arange(6).reshape(-1, 2) 
print(f"a.shape: {a.shape}, \na= {a}")

#access an element
print(f"\na[2,0].shape:   {a[2, 0].shape}, a[2,0] = {a[2, 0]},     type(a[2,0]) = {type(a[2, 0])} Accessing an element returns a scalar\n")

#access a row
print(f"a[2].shape:   {a[2].shape}, a[2]   = {a[2]}, type(a[2])   = {type(a[2])}")

a.shape: (3, 2), 
a= [[0 1]
 [2 3]
 [4 5]]

a[2,0].shape:   (), a[2,0] = 4,     type(a[2,0]) = <class 'numpy.int32'> Accessing an element returns a scalar

a[2].shape:   (2,), a[2]   = [4 5], type(a[2])   = <class 'numpy.ndarray'>


In [81]:
#vector 2-D slicing operations
a = np.arange(20).reshape(-1, 10)
print(f"a = \n{a}")

#access 5 consecutive elements (start:stop:step)
print("a[0, 2:7:1] = ", a[0, 2:7:1], ",  a[0, 2:7:1].shape =", a[0, 2:7:1].shape, "a 1-D array")

#access 5 consecutive elements in two rows
print("a[:, 2:7:1] = \n", a[:, 2:7:1], ",  a[:, 2:7:1].shape =", a[:, 2:7:1].shape, "a 2-D array")

# access all elements
print("a[:,:] = \n", a[:,:], ",  a[:,:].shape =", a[:,:].shape)

# access all elements in one row 
print("a[1,:] = ", a[1,:], ",  a[1,:].shape =", a[1,:].shape, "a 1-D array")
# same as
print("a[1]   = ", a[1],   ",  a[1].shape   =", a[1].shape, "a 1-D array")


a = 
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]
a[0, 2:7:1] =  [2 3 4 5 6] ,  a[0, 2:7:1].shape = (5,) a 1-D array
a[:, 2:7:1] = 
 [[ 2  3  4  5  6]
 [12 13 14 15 16]] ,  a[:, 2:7:1].shape = (2, 5) a 2-D array
a[:,:] = 
 [[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]] ,  a[:,:].shape = (2, 10)
a[1,:] =  [10 11 12 13 14 15 16 17 18 19] ,  a[1,:].shape = (10,) a 1-D array
a[1]   =  [10 11 12 13 14 15 16 17 18 19] ,  a[1].shape   = (10,) a 1-D array
