# Machine Learning Zoomcamp


## 1.7 Introduction to NumPy


Plan:

* Creating arrays
* Multi-dimensional arrays
* Randomly generated arrays
* Element-wise operations
    * Comparison operations
    * Logical operations
* Summarizing operations

In [2]:
import pandas as pd 
import numpy as np

### Creating arrays

In [3]:
# Creates a 1-dimensional array of zeros with 5 elements
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [4]:
# Creates a 1-dimensional array of ones with 10 elements
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [33]:
# Creates a 1-dimensional array of 10 elements, each initialized to the value 2.5
np.full(10, 2.5)

array([2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])

In [45]:
#python list -> can be appended/dynamic
[1,2,3,5,7,12] 

#turning list into an array -> array is static
a = np.array([1,2,3,5,7,12])

In [None]:
# accessing a specific value of an array using the index -> reminder index begins at zero
a[2]

3

In [47]:
#modifying an array at a specific index
a[2]=10
a

array([ 1,  2, 10,  5,  7, 12])

In [None]:
# Can take up to three arguments
    #1. starting index (inclusive) [1]
    #2. ending index (exclusive) [54]
    #3. step -> the interval at which the increments by [10]
# Creates an array starting at the first value and ending before the second value
np.arange(1,54,10)


array([ 1, 11, 21, 31, 41, 51])

In [None]:
# Takes up to three arguments
    #1. start (inclusive)
    #2. stop (inclusive) 
    #3. number of evenly spaced values to generate
np.linspace(0,1,11)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

### Multi-dimensional arrays

In [None]:
# Creates a 2-dimensional array of zeros with five rows and two columns
    # arg 1 number of rows
    # arg 2 number of columns 
np.zeros((5,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [81]:
# defining our own 2D array
n = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])

In [6]:
# accessing specific value in a 2D array
    # first argument is specifying the row index
    # second argument is specifies the column index
n[1,1]

5

In [7]:
# accessing specific row in a 2D array
n[1]

array([4, 5, 6])

In [82]:
# modifying a 2D array at a specific index -> modifying a specific value
n[0,1]=20
n

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [83]:
# modify an entire row in a 2D array
    # in this example the row at index 2 is being modified
n[2] = [1,1,1]
n

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 1,  1,  1]])

In [None]:
# accessing a column based on index
n[:,1]

array([20,  5,  1])

In [84]:
# modify an entire column in a 2D array
    # in this example the column at index 2 is being modified
n[:,2] = [0,1,2]
n

array([[ 1, 20,  0],
       [ 4,  5,  1],
       [ 1,  1,  2]])

In [239]:
n[0:2, 1]  # gets rows 0 and 1 from column 1


array([20,  5])

In [36]:
n[:,:]

array([[ 1, 20,  0],
       [ 4,  5,  1],
       [ 1,  1,  2]])

### Randomly generated arrays

In [None]:
# creates random 2D array
    # in this example the first argument are the rows
    # 2nd argument is the amount of columns
np.random.rand(5,2)

array([[0.59029492, 0.86915622],
       [0.99381853, 0.6839331 ],
       [0.4198518 , 0.44325647],
       [0.82210166, 0.60800069],
       [0.89399929, 0.28160075]])

In [None]:
# reproducible random array
    # uniform distribution (between 0 and 1)
    # Every number has an equal chance of being anywhere from 0 to 1
np.random.seed(2)
np.random.rand(5,2)

array([[0.4359949 , 0.02592623],
       [0.54966248, 0.43532239],
       [0.4203678 , 0.33033482],
       [0.20464863, 0.61927097],
       [0.29965467, 0.26682728]])

In [19]:
# array for standard normal distribution
np.random.seed(2)
np.random.randn(5,2)

array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737],
       [ 0.50288142, -1.24528809],
       [-1.05795222, -0.90900761]])

In [28]:
np.random.seed(2)
100 *np.random.rand(5,2)

array([[43.59949021,  2.59262318],
       [54.96624779, 43.53223926],
       [42.03678021, 33.0334821 ],
       [20.4648634 , 61.92709664],
       [29.96546737, 26.68272751]])

In [26]:
np.random.seed(2)
np.random.randint(low=0,high=100, size=(5,2))

array([[40, 15],
       [72, 22],
       [43, 82],
       [75,  7],
       [34, 49]])

### Element-wise operations

In [58]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [None]:
# adding one to each value of the array
a + 1


array([1, 2, 3, 4, 5])

In [None]:
# multiplying the array by 2
a*2

array([0, 2, 4, 6, 8])

In [37]:
# dividing by x for each value in the array
a/100

array([0.  , 0.01, 0.02, 0.03, 0.04])

In [59]:
# Chaining Operations

b=(10+(a*2)) **2/100
b

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [61]:
# Combining arrays
a+b

array([1.  , 2.44, 3.96, 5.56, 7.24])

### Comparison Operations

In [64]:
a

array([0, 1, 2, 3, 4])

In [65]:
b

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [63]:
a >= 2

array([False, False,  True,  True,  True])

In [68]:
# comparing arrays to each other
a>b

array([False, False,  True,  True,  True])

In [69]:
a[a>b]

array([2, 3, 4])

### Summary operations

In [71]:
a

array([0, 1, 2, 3, 4])

In [73]:
a.min()

0

In [74]:
a.max()

4

In [76]:
a.mean()

2.0

In [78]:
a.sum()

10

In [86]:
a.std()

1.4142135623730951

In [87]:
n

array([[ 1, 20,  0],
       [ 4,  5,  1],
       [ 1,  1,  2]])

In [88]:
n.sum()

35

In [89]:
n.std()

5.89622184952071

In [90]:
n.mean()

3.888888888888889

In [96]:
n+n

array([[ 2, 40,  0],
       [ 8, 10,  2],
       [ 2,  2,  4]])

## 1.8 Linear Algebra

### Multiplication

#### Vector Vector Multiplication

In [None]:
# defining 1-dimentional array u
u = np.array([2, 4, 5, 6])
u

array([2, 4, 5, 6])

In [76]:
# defining 1-dimentional array v
v = np.array([1, 0, 0, 2])
v

array([1, 0, 0, 2])

In [None]:
# Finding Dot product using Numpy built-in function
u.dot(v)

14

In [78]:
# Finding Dot product manuel function
def vector_vector_multiplication(u, v):
    # checks to see if dimenstions are the same (could also use if statement)
    assert u.shape[0] == v.shape[0]
    n = v.shape[0] # could be either vector since both have same shape
    result = 0
    for i in range(n): # [0,1,2,3]
        result = result + u[i] * v[i] #i -> each value of vector
    return result

vector_vector_multiplication(u, v)

14

In [37]:
v.shape[0]

4

In [None]:
# seeing hoe the range will loop for the vectors
list(range(0, 4))

[0, 1, 2, 3]

In [16]:
list(range(u.shape[0]))

[0, 1, 2, 3]

#### Matrix Vector Multiplication

In [90]:
U = np.array([
    [2, 4, 5, 6],
    [1, 2, 1, 2],
    [3, 1, 2, 1],
])
U

array([[2, 4, 5, 6],
       [1, 2, 1, 2],
       [3, 1, 2, 1]])

In [49]:
s = np.array([1,0.5,2,1])

In [91]:
U.shape[1]

4

In [67]:
v.shape[0]

4

In [101]:

def matrix_vector_multiplication(U,v):
    assert U.shape[1] == v.shape[0]
    n_rows = U.shape[0]
    result = np.zeros(n_rows) 
    for i in range(n_rows):
        result[i] = vector_vector_multiplication(U[i],v)
    return result
matrix_vector_multiplication(U,v)
    

array([14.,  5.,  5.])

In [82]:
num_rows = U.shape[0]
num_rows

3

In [94]:
list(range(U.shape[0]))

[0, 1, 2]

In [87]:
np.zeros(num_rows)

array([0., 0., 0.])

In [None]:
# Dot product using numpy
U.dot(v)

array([14,  5,  5])

In [70]:
U.shape[0]

3

#### Matrix Matrix Multiplication