In [1]:
import numpy as np


## List vs numpy array

In [3]:
l = [1,2,3]
arr = np.array([1,2,3])

In [4]:
l + l

[1, 2, 3, 1, 2, 3]

In [5]:
arr + arr

array([2, 4, 6])

In [6]:
l * 2

[1, 2, 3, 1, 2, 3]

In [7]:
arr * 2

array([2, 4, 6])

In [8]:
# some errors happened
l ** 2

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

In [10]:
l = [num ** 2 for num in l]
l

[1, 4, 9]

In [9]:
arr ** 2

array([1, 4, 9])

Examples above shows that some differenes between python list and numpy array, if we want to do some operations for each element, we need a for loop if using python list, while numpy array is more convient and efficient compared to python list.

what we should keep in mind about numpy is that, most function operate **element-wise**, we should avoid using for loop in python for the sake of efficiency.

Let's see more examples below:

In [11]:
np.sqrt(arr)

array([1.        , 1.41421356, 1.73205081])

In [12]:
np.log(arr)

array([0.        , 0.69314718, 1.09861229])

In [13]:
np.exp(arr)

array([ 2.71828183,  7.3890561 , 20.08553692])

## Dot product

default is column vector

In [17]:
a = np.array([1,2])
b = np.array([2,1])

a * b # element-wise

array([2, 2])

In [18]:
np.sum(a * b)
# (a * b).sum() 

4

In [19]:
np.dot(a , b)
# a.dot(b)
# b.dot(a)

4

In [21]:
a_norm = np.linalg.norm(a) # np.sqrt(a.dot(a))
a_norm

2.23606797749979

In [24]:
cosine = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
cosine

0.7999999999999998

### Speed comparison between for loop and np.dot

Run the following code, as you could see, np.dot is **45 times faster** than for loop!

In [28]:
from time import time

a = np.random.randn(100)
b = np.random.randn(100)
T = 10000

def slow_dot(a, b):
    result = 0
    for e, f in zip(a, b):
        result += e*f
    return result

t0 = time() 
for t in range(T):
    slow_dot(a, b)
dt1 = time() - t0 


t0 = time() 
for t in range(T):
    a.dot(b)
dt2 = time() - t0

print("dt1 / dt2:", dt1 / dt2)

dt1 / dt2: 45.87883932096329


## Vector and Matrix

In [29]:
matrix = np.array([[1,2], [3,4]])
matrix

array([[1, 2],
       [3, 4]])

In [30]:
l = [[1,2], [3,4]]
l

[[1, 2], [3, 4]]

In [31]:
l[0]

[1, 2]

In [32]:
l[0][0]

1

In [33]:
matrix[0, 0]
# matrix[0][0]

1

**Note: official document recommends using `np.array()` instead of `np.matrix()`**

In [34]:
matrix2 = np.matrix([[1,2], [3,4]])
matrix2

matrix([[1, 2],
        [3, 4]])

In [36]:
# convert it to np.array
matrix2 = np.array(matrix2)
matrix2

array([[1, 2],
       [3, 4]])

In [37]:
matrix2.T

array([[1, 3],
       [2, 4]])

## Generating matrix

In [38]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [39]:
np.zeros((5, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [40]:
np.ones((5, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [41]:
np.random.random((5, 5))

array([[0.21695243, 0.78386619, 0.48893651, 0.92066325, 0.47104191],
       [0.81380145, 0.0550958 , 0.42626864, 0.49729779, 0.00587362],
       [0.86575135, 0.56825061, 0.65180913, 0.7424681 , 0.33432839],
       [0.18126493, 0.44896505, 0.55685382, 0.01273527, 0.08352198],
       [0.5301564 , 0.23760157, 0.13173621, 0.54388527, 0.81684255]])

The following function generates a (5, 5) matirx with random number from a uniform ditribution  between [0, 1]. What if we want number from a Gaussian ditribution?

Simply call `np.random.randn()`

In [42]:
np.random.randn((5, 5))

TypeError: 'tuple' object cannot be interpreted as an integer

Oops, that's wrong. 

In fact, for `randn()`, we should pass two seperate integers as below, which give us the standard normal distribution

In [44]:
g_matrix = np.random.randn(5, 5)
g_matrix

array([[-1.3277477 ,  1.54827018, -0.76579292, -0.12413678, -0.93389291],
       [-0.33064332,  0.88905255, -1.10763003,  2.25360156,  0.33999932],
       [ 0.19396113,  0.14342911, -1.17977915, -1.30889526,  0.85044897],
       [-3.38673393,  0.26019555,  0.80073438,  1.34081015, -1.72967271],
       [ 1.07487767,  0.49993933, -2.22862547, -1.4237975 ,  0.96417804]])

In [45]:
g_matrix.mean()

-0.18751398955322574

In [46]:
g_matrix.var()

1.690390479223531

## Matrix products

`np.dot()` could give us the **matrix multiplication in mathmatical definitions**.

Previously, when we use asterisk (`*`) for two vectors(1D arrays), it's element-wise multiplication. Here it also works for two matrices(2D arrays). But do keep in mind: both matrices(2D arrays) should have the same shape!

In [49]:
m1 = np.array([[1,2], [3,4]])
m2 = np.ones((2,2))

element_multi = m1 * m2
element_multi

array([[1., 2.],
       [3., 4.]])

In [50]:
matrix_multi = m1.dot(m2)
matrix_multi

array([[3., 3.],
       [7., 7.]])

### matrix multiplies with vector

In [58]:
m1 = np.array([[1,2], [3,4]])
v2 = np.ones(2)

In [59]:
element_multi = m1 * v2
element_multi

array([[1., 2.],
       [3., 4.]])

In [60]:
matrix_multi = m1.dot(v2)
matrix_multi

array([3., 7.])

## More Matrix Operations

In [61]:
m1 = np.array([[1,2], [3,4]])

m1_inv = np.linalg.inv(m1)
m1_inv

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [62]:
# this shoudl give us the identity matrix
m1.dot(m1_inv)

array([[1.00000000e+00, 1.11022302e-16],
       [0.00000000e+00, 1.00000000e+00]])

In [63]:
np.linalg.det(m1)

-2.0000000000000004

In [64]:
# if you pass a 2D array, you will get the diagonal elements 
np.diag(m1)

array([1, 4])

In [65]:
# if you pass a 1D array, you will get the diagonal matrix
np.diag([1,2])

array([[1, 0],
       [0, 2]])

### outer product vs inner product

You might need outer product when calculating the covariance of some sample vectors

In [66]:
a = np.array([1,2])
b = np.array([3,4])

In [67]:
np.outer(a, b)

array([[3, 4],
       [6, 8]])

In [68]:
np.inner(a,b) 
# np.dot(a,b)

11

In [70]:
# np.diag(a).sum()
np.trace(m1)

5

### eigen value and eigen vectors

In [71]:
# random generated data: 100 samples with 3 features
X = np.random.randn(100, 3)

In [72]:
cov = np.cov(X)
cov.shape
# (100, 100) is the wrong shape, we expect the shape to be (3, 3)

(100, 100)

In [74]:
# remember transpose X first to get the covariance
cov = np.cov(X.T)
cov

array([[0.94384695, 0.01478218, 0.05864014],
       [0.01478218, 0.84222313, 0.01750041],
       [0.05864014, 0.01750041, 1.05057945]])

In [75]:
# eigen-values, eigen-vectors = np.eig()
# np.eigh()
# symmetric matrix: A = A.T, could use np.eigh()

np.linalg.eigh(cov)

(array([0.83965222, 0.91844881, 1.07854849]),
 array([[ 0.11197149, -0.90700176, -0.40596822],
        [-0.9923911 , -0.08101235, -0.09271948],
        [ 0.05120829,  0.41326119, -0.90917155]]))

In [76]:
np.linalg.eig(cov)

(array([1.07854849, 0.91844881, 0.83965222]),
 array([[ 0.40596822,  0.90700176,  0.11197149],
        [ 0.09271948,  0.08101235, -0.9923911 ],
        [ 0.90917155, -0.41326119,  0.05120829]]))

## Solve a linear system

Given Ax = b, find x

In [77]:
A = np.array([[1,2], [3,4]])
b = np.array([1,2])

x = np.linalg.inv(A).dot(b)
x

array([2.22044605e-16, 5.00000000e-01])

In [78]:
# equivalently
x = np.linalg.solve(A,b)
x

array([0. , 0.5])

**Never use the inverse to solve, use the `solve()` function!**