## NumPy, Python and Vectorization for Multiple Linear Regression

In [3]:
import numpy as np
import time

Numpy or Numerical Python: https://numpy.org/doc/stable/user/absolute_beginners.html
- Matrix,
- Matrix functions/operations
- Vectors: an ordered sequence of the same data type (can't contain both characters and numbers) that run from 0 to n-1 in code and 1 to n in math
- Dimension/ Rank - n: column vector (x1,x2,x3......,xn)

### Broadcasting in NumPy
How NumPy changes the shape of an n-dimensional array to carry out various aritmetic operations

#### Vector multiplication:

In [18]:
a = np.array([1.0,2.0,3.0])
b = np.array([2,4])
c = np.reshape(a,(3,1))*b
print(c)


[[ 2.  4.]
 [ 4.  8.]
 [ 6. 12.]]


### NumPy Arrays:
Indexable, n-dimensional array containing elemnts of the same data type
- 1-D array, shape (n,): n elements indexed [0] through [n-1]



In [43]:
# Examples of NumPy routines that allocate memory and fill arrays with value: zeros(4), zeros((4,)), random.random_sample(4)

# Allocate memory and fill arrays with value
a = np.zeros(4) # this uses the shape of the array as an input as well
b = np.zeros((4,)) # requires 4 as the input argument indicating that they require an array of shape 4
print(f"b = {b}\nb.shape = {b.shape}\nb.dtype = {b.dtype}")
print(f"a = {a}\na.shape = {a.shape}\na.dtype = {a.dtype}")
print(len(b))
a = np.random.random_sample(4)
print(f"a = {a}\na.shape = {a.shape}\na.dtype = {a.dtype}")

b = [0. 0. 0. 0.]
b.shape = (4,)
b.dtype = float64
a = [0. 0. 0. 0.]
a.shape = (4,)
a.dtype = float64
4
a = [0.93965912 0.6892852  0.29526915 0.3215618 ]
a.shape = (4,)
a.dtype = float64


In [45]:
# Examples of NumPy routine that allocate memory but do not accept shape as an input argument: 
# arange(4.): start and stop, random.rand(4)
# Allocate memory but do not accept shape as an input argument
a = np.arange(4.) # arange uses start and stop: 4. means that the array will stop at 4-1, and start from 0
print(f"a = {a}\na.shape = {a.shape}\na.dtype = {a.dtype}")
b = np.random.rand(4)
print(f"b = {b}\nb.shape = {b.shape}\nb.dtype = {b.dtype}")


# Does not accept 4 as the shape of the array
c = np.arange(4,)
print(f"c = {c}\nc.shape = {c.shape}\nc.dtype = {c.dtype}")

len(b)

a = [0. 1. 2. 3.]
a.shape = (4,)
a.dtype = float64
b = [0.08184469 0.60174855 0.06828144 0.91246989]
b.shape = (4,)
b.dtype = float64
c = [0 1 2 3]
c.shape = (4,)
c.dtype = int32


4

In [50]:
# Examples of NumPy routines that allocate memory and fill with user specified values
b = np.array([4,5,6,7])
print(f"b = {b}\nb.shape = {b.shape}\nb.dtype = {b.dtype}")
a = np.array([4.,5,78,43])
print(f"a = {a}\na.shape = {a.shape}\na.dtype = {a.dtype}") # float data type


b = [4 5 6 7]
b.shape = (4,)
b.dtype = int32
a = [ 4.  5. 78. 43.]
a.shape = (4,)
a.dtype = float64


##### **a.shape = (4,) -> dimension of the array is 4**

### Slicing and Indexing
- Slicing - extracting a subset from that array
- Indexing - obaining the value in that array at that partciular index


In [57]:
# Indexing
a = np.arange(10)
print(a)

print(f"a[2] = {a[2]}")
print(f"a[-1] = {a[-1]}")
try:
    c = a[10]
except Exception as e:
    print("The error message you will see is:")
    print(e)

[0 1 2 3 4 5 6 7 8 9]
a[2] = 2
a[-1] = 9
The error message you will see is:
index 10 is out of bounds for axis 0 with size 10


In [68]:
# Slicing (start:stop:step)
a = np.arange(10)
print(f"a[10:2:-1] = {a[10:2:-1]}")
print(f"a[1:6:] = {a[1:6:]}")
print(f"a[1:6:2] = {a[1:6:2]}")
print(f"a[3:] = {a[3:]}")
print(f"a[:3] = {a[:3]}")
print(f"a[:]={a[:]}")

a[10:2:-1] = [9 8 7 6 5 4 3]
a[1:6:] = [1 2 3 4 5]
a[1:6:2] = [1 3 5]
a[3:] = [3 4 5 6 7 8 9]
a[:3] = [0 1 2]
a[:]=[0 1 2 3 4 5 6 7 8 9]


### Single Vector Operation
- Mean
- Sum 
- Multiplication with a scalar
- Negative


In [79]:
a = np.array([1,2,3,4])
print(a)

b = -a
print(b)

c = 2*a
print(c)

d = a**2
print(d)

e = np.sum(a)
print(e)

d = np.mean(a)
print(d)

[1 2 3 4]
[-1 -2 -3 -4]
[2 4 6 8]
[ 1  4  9 16]
10
2.5


### Vector vector element wise operations
$$c_i = a_i +b_i$$
Vectors should of the same size for addition to take place

In [82]:
a = np.array([1,2,3,4])
b = np.array([-1,-2,-3,-4])
print(a+b)

[0 0 0 0]


In [85]:
a = np.array([1,2])
b = np.arange(3,)
print(b)
try:
    c = a+b
except Exception as e:
    print("The error is: ")
    print(e)

[0 1 2]
The error is: 
operands could not be broadcast together with shapes (2,) (3,) 


### Scalar Vector operations

In [88]:
A = np.array([1,2,3,4])

b = 5*A
print(b)

[ 5 10 15 20]


### Vector Vector Dot Product

$$a.b = \sum \limits_{i=0}^{n-1} a_ib_i$$
$$ a.b = (a_0,a_1,a_2,a_3)(b_0,b_1,b_2,b_3) = [a_0b_0 + a_1b_1 + a_2b_2 + a_3b_3 ]$$

In [94]:
a = np.array([1,2,3])
b = np.array([3,4,5])
print(f"shape  of a = {a.shape}")
print(f"shape  of b = {b.shape}")
c = np.dot(a,b)
print(c)

shape  of a = (3,)
shape  of b = (3,)
26


### Using a for loop

In [95]:
def my_dot(a,b):
    x=0
    for i in range(len(a)):
        x = x + a[i]*b[i]
    return x

In [97]:
a = np.array([1,2,3,4])
b = np.array([3,4,5,6])
my_dot(a,b) # for loop without vectorization

50

In [98]:
print(np.dot(a,b)) # vectorization

50


### Consider a very large array


In [128]:
np.random.seed(1)
a = np.random.rand(10000000)
b = np.random.rand(10000000)
print(a)
print(b)

tic = time.time() # start_time
c = np.dot(a,b)
toc = time.time() # end time

print(f"Dot Product of a and b = {c}, Vectorised version duration: {1000*(toc-tic):.4f}")

tic = time.time()
d = my_dot(a,b)
toc = time.time()

print(f"Dot Product of a and b = {d}, Vectorised version duration: {1000*(toc-tic):.4f}")
del(a);del(b) # remove big arrays from memory

[4.17022005e-01 7.20324493e-01 1.14374817e-04 ... 1.62642575e-01
 8.55441337e-01 6.51160047e-01]
[0.49884527 0.5194437  0.9480511  ... 0.99416697 0.61599986 0.41453835]
Dot Product of a and b = 2501072.5816813176, Vectorised version duration: 8.0104
Dot Product of a and b = 2501072.5816813707, Vectorised version duration: 18482.4843


In [132]:
x = np.array([[1],[2],[3],[4]])
print(x.shape)
print(x[1].shape)


(4, 1)
(1,)


### Matrix

In [136]:
# Creation 
x = np.zeros((2,4))
print(x)
print(x.shape)
print(x[1].shape)


[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
(2, 4)
(4,)


In [141]:
a = np.array([[1],[2],[3],[4]])
b = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(a)
print(a.shape)
print(b)

[[1]
 [2]
 [3]
 [4]]
(4, 1)
[[1 2 3]
 [4 5 6]
 [7 8 9]]


### Indexing 


In [163]:
a = np.arange(0,6)
a = a.reshape(3,2) # changes into 3 rows and 2 columns
print(a)
print(a[2].shape)
print(a[2][-1])
print(a[2][0].shape)
print(type(a[2][1]))



[[0 1]
 [2 3]
 [4 5]]
(2,)
5
()
<class 'numpy.int32'>


### Slicing

In [177]:
a = np.arange(20).reshape(2,10)
print(a)

# access consecutive elements (start:stop:step)
print("a[0, 2:7:1] = ", a[0, 2:7:1])

# access consecutive elements in two rows
print("a[:, 2:7:1] = ", a[:, 2:7:1])

# access all elements
print("a[:,:]= ",a[:,:])

# access all elements in own row 
print("a[0, :]= ",a[0,:])




[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]
a[0, 2:7:1] =  [2 3 4 5 6]
a[:, 2:7:1] =  [[ 2  3  4  5  6]
 [12 13 14 15 16]]
a[:,:]=  [[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]
a[0, :]=  [0 1 2 3 4 5 6 7 8 9]
