# Numpy specific computing package

In [5]:
import numpy as np
import time
import math

## Vectors 
- Vectors are ordered arrays of numbers
- The elements of a vector are all the same
- A vector does not contain both characters and numbers
- The number of elements in the array is referred to the dimension or may be referred rank
- The vector has n-dimension. 
- In python, the indexing will run from 0 to n-1

In [None]:
# Can be either a single value for a 1-D or a tuple (n,m,....)
a = np.zeros(5)
print(f"np.zeros(5) : a = {a}, a shape = {a.shape}, a data type = {a.dtype}")

b = np.zeros((5,))
print(f"np.zeros(4,) : b = {b}, b shape = {b.shape}, b data type = {b.dtype}")

c = np.random.random_sample(5)
print(f"np.random.random_sample(5) : c = {c}, c shape = {c.shape}, c data type = {c.dtype}")


np.zeros(5) : a = [0. 0. 0. 0. 0.], a shape = (5,), a data type = float64
np.zeros(4,) : b = [0. 0. 0. 0. 0.], b shape = (5,), b data type = float64
np.random.random_sample(5) : c = [0.76790316 0.83367361 0.43256577 0.99124258 0.14135867], c shape = (5,), c data type = float64


In [19]:
# Fill arrays with value but do not accept shape as input argument
d = np.arange(4.)
print(f"np.arange(4.) : d = {d}, d shape = {d.shape}, d data type = {d.dtype}")

e = np.random.rand(4)
print(f"np.random.rand(4) : e = {e}, e shape = {e.shape}, e data type = {e.dtype}")

np.arange(4.) : d = [0. 1. 2. 3.], d shape = (4,), d data type = float64
np.random.rand(4) : e = [0.25606118 0.74977864 0.26591196 0.58632868], e shape = (4,), e data type = float64


In [21]:
# create an array 
f = np.array([8,7,6,5,4,3,2])
print(f"np.array([8,7,6,5,4,3,2]) : f ={f}, f shape = {f.shape}, f data type = {f.dtype}")

g = np.array([6.,5,4,3])
print(f"np.array([5.,4,3,2]) : g = {g}, g shape = {g.shape}, g data type = {g.dtype}")

np.array([8,7,6,5,4,3,2]) : f =[8 7 6 5 4 3 2], f shape = (7,), f data type = int64
np.array([5.,4,3,2]) : g = [6. 5. 4. 3.], g shape = (4,), g data type = float64


## Operations on Vectors 
- Indexing is an element of an array by its position within the array. Numpy starts indexing at 0 
- Slicing is to get a subset of elements from an array based on their indices. For example a[2] = 3 is the 3rd element of a vector [5,4,3]

In [28]:
# Indexing
# vector indexing operations on 1-D arrays
h = np.arange(10)
print(h)

# access shape of h[2]
print(f"h[2].shape = {h[2].shape}")

# access an element returns scalar 
print(f"h[2] = {h[2]}")

# access the last element, negative indexes count from the end 
print(f"h[-1] = {h[-1]}")

# indexes must be within the range of the vector ir they will produce an error 
try:
    res = h[10]
except Exception as e: 
    print(f"The error message: {e}")

[0 1 2 3 4 5 6 7 8 9]
h[2].shape = ()
h[2] = 2
h[-1] = 9
The error message: index 10 is out of bounds for axis 0 with size 10


In [40]:
# Slicing creates an array of indices using a set of 3 values (start:stop:step)
i = np.arange(10)
print(f"i = {i}")

# access 5 consecutive elements 
print(f"5 consecutive elements: i[1:6:1] = {i[1:6:1]}")

# access 3 elements separated by two
print(f"3 elements separtated by two: i[1:6:2] = {i[1:6:2]}")

# access all elements index 3 and above
print(f"all elements index 3 and above: i[3:] = {i[3:]}")

# access all elements below index 3 
print(f"all elements below index 3: i[:3] = {i[:3]}")

# access all elements 
print(f"all elements: i[:] = {i[:]}")

i = [0 1 2 3 4 5 6 7 8 9]
5 consecutive elements: i[1:6:1] = [1 2 3 4 5]
3 elements separtated by two: i[1:6:2] = [1 3 5]
all elements index 3 and above: i[3:] = [3 4 5 6 7 8 9]
all elements below index 3: i[:3] = [0 1 2]
all elements: i[:] = [0 1 2 3 4 5 6 7 8 9]


In [None]:
# single vector operations
m = np.array([1,2,3,4])

# negate element of m 
print(f"negate element of m: -m = {-m}")

# sum all elements of m returns a scalar 
print(f"sum all elements of m: np.sum(m) = {np.sum(m)}")

# average all elements of m 
print(f"average all elements of m: np.mean(m) = {np.mean(m)}")

print(f"m**2 = {m**2}")

negate element of m: -m = [-1 -2 -3 -4]
sum all elements of m: np.sum(m) = 10
average all elements of m: np.mean(m) = 2.5
m**2 = [ 1  4  9 16]


In [None]:
# Vector vector element-wise operations (element-by-element)
a = np.array([1,2,3,4])
b = np.array([-1,-2,3,4])
print(f"binary operators work element wise: a + b = {a+b}")

binary operators work element wise: a + b = [0 0 6 8]


In [48]:
# mismatched vector operation 
c = np.array([1,2])
try:
    d = a + c
except Exception as e:
    print(f"The error message is {e}")

The error message is operands could not be broadcast together with shapes (4,) (2,) 


In [50]:
# Scalar vector operations 
# vectors can be scaled by scalar values. A scalar multiplies all the elements of the vector
a = np.array([1,2,3,4])

print(f"multiply a vector a by 5: 5*a = {5*a}")

multiply a vector a by 5: 5*a = [ 5 10 15 20]


## Vector dot product
- the dot product multiplies the values in 2 vectors element-by-element
- It requires the dimension of two vectors to be the same


The function of computing the dot product of two vectors. Given input $a$ and $b$:
$$ x = \sum_{i=0}^{n-1} a_i b_i $$
Assume both `a` and `b` are the same shape

In [51]:
def my_dot(a,b):
    """
    Compute the dot product of two vectors

    Args:
        a (ndarray (n,)): input vector
        b (ndarray (n,)): input vector with same dimension as a
    
    Returns:
        x (scalar)
    """
    x = 0 
    for i in range(a.shape[0]):
        x = x + a[i] * b[i]
    return x

In [52]:
# test 1-D array 
a = np.array([1,2,3,4])
b = np.array([-1,4,3,2])
print(f"The dot product: my_dot(a,b) = {my_dot(a,b)}")

The dot product: my_dot(a,b) = 24


In [54]:
# using numpy to compute the dot product 
c =np.dot(a,b)
print(f"numpy 1-D array dot product: np.dot(a,b) = {c}, np.dot(a,b).shape = {c.shape}")
d =np.dot(b,a)
print(f"numpy 1-D array dot product: np.dot(b,a) = {d}, np.dot(b,a).shape = {d.shape}")

numpy 1-D array dot product: np.dot(a,b) = 24, np.dot(a,b).shape = ()
numpy 1-D array dot product: np.dot(b,a) = 24, np.dot(b,a).shape = ()


In [58]:
# using np.dot improve speed memory efficiency
np.random.seed(1)
# create very large arrays 
a = np.random.rand(10000000)
b = np.random.rand(10000000)

# using np.dot(a,b)
# create starting time
start_time = time.time()
# compute the dot product of vector a and b using np.dot()
c = np.dot(a,b)
# create ending time
end_time  = time.time()

print(f"np.dot(a,b) = {c:.4f}, vectorized duration: {1000*(end_time-start_time):.4f} ms")

# using my_dot(a,b)
# create starting time
start_time = time.time()
# compute the dot product of vector a and b using my_dot()
d = my_dot(a,b)
# create ending time
end_time = time.time()

print(f"my_dot(a,b) = {d:.4f}, loop version duriation: {1000*(end_time-start_time):.4f} ms")

np.dot(a,b) = 2501072.5817, vectorized duration: 2.8269 ms
my_dot(a,b) = 2501072.5817, loop version duriation: 1285.2499 ms
