## Introduction to Python, Lecture 2

### Linear Algebra

In [None]:
from numpy import linalg as LA

In [None]:
import numpy as np

In [None]:
# first we have to check the rank
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
LA.matrix_rank(A)

In [None]:
A = np.array([[2,-1,1],[-1,2,-1],[1,-1,2]])
LA.matrix_rank(A)

In [None]:
b = np.array([1,1,1])
x = LA.solve(A,b)
print(x)

In [None]:
Ainv = LA.inv(A)
print(Ainv)

In [None]:
LA.det(A)

In [None]:
w, v = LA.eig(A)
print("eigenvalues:\n", w)
print("eigenvectors:\n", v)

### Randomness

In [3]:
from numpy import random

#### Random integers

In [4]:
# random.randint()
# simulate a die rolling sequence
N = 20
X = np.zeros(N)
for i in range(N):
    X[i] = random.randint(1, 7)   # from 1 (inclusive) to 7 (exclusive)
print(X)

NameError: name 'np' is not defined

#### Uniform distribution
The easiest distribution is the uniform distribution on $(0,1)$, in which all numbers in a given interval are equally likely. We can use the function `random.random()` that will produce a uniformly distributed random number in $(0,1)$. Furthermore, we can turn this uniform random number from $(0,1)$ into random numbers from $a$ to $b$.

In [5]:
random.seed(42)
# the seed will initialize the random number generator
# fixing the seed will fix the "random" number generated
for i in range(5):
    r = random.random()
    print(r)

0.3745401188473625
0.9507143064099162
0.7319939418114051
0.5986584841970366
0.15601864044243652


In [6]:
def rnum(a,b):
    return a + (b-a)*random.random()

for i in range(5):
    print(rnum(-3,6))

-1.596049316974176
-2.4772474904862047
4.795585311974417
2.4100351056888796
3.372653200164409


In [7]:
import matplotlib.pyplot as plt

In [8]:
N = 300
x = np.random.uniform(0,1,N) # this syntax is okay as well
y = np.random.uniform(low=0,high=1,size=N)
plt.scatter(x,y)
plt.show()

NameError: name 'np' is not defined

#### Adding scattered noise to a linear function

In [None]:
X = np.linspace(0,1,100)
Y = 3 * X + 1
plt.plot(X,Y)
plt.show()

In [None]:
# let's add some noise
Z = 3 * X + 1 + np.random.normal(loc=0,scale=1, size= X.shape[0])
# np.random.normal(0,1, X.shape[0]) same output 
# shape returns the dimension of the array
# loc is mean
# scale is standard dev
# size is the number of samples we draw in this distribution
# we'll see much more about randomness later
plt.scatter(X,Z)  # we use a scatter plot
plt.plot(X,Y, color = "red", linewidth= 2.0)
plt.grid(True, linestyle = 'dashed')
plt.show()

#### Normal distribution

In [None]:
N = 50 # no. of samples
mu = 0.0
sigma = 1.0
X = np.random.normal(loc=mu, scale=sigma, size=N)
plt.hist(X, bins=10, edgecolor='k') # draw the histogram of X in 10 bins
plt.show()

In [None]:
N = 500000 # no of samples
mu = 0.0
sigma = 1.0
X = np.random.normal(loc=mu, scale=sigma, size=N)
plt.axis([-6, 6, 0, 0.45]) # fix our axes view
plt.hist(X,  bins=20,  density=True, edgecolor= 'k')
# plt.hist()
# bin size = (total sample)/(no. of bins)
plt.grid(True, linestyle = 'dashed')
plt.show()

In [None]:
N = 500000
mu = 0.0
sigma = 2.0  # highers standard dev
X = np.random.normal(loc=mu, scale=sigma, size=N)
plt.axis([-6, 6, 0, 0.45])
plt.hist(X, bins=20, density=True, edgecolor ='k')
plt.grid(True, linestyle = 'dashed')
plt.show()

# looks the same but look at the numbers above and below

#### Gradient descent: an example

Suppose $f(x,y)=x^2+4y^2+4x+y+6$. Use gradient descent method to find its minimizer.

Algorithm:

> Choose initial point $x_0$

> For k=0,1,..., M

    >> $(x_{k+1}, y_{k+1})=(x_k,y_k) - \eta \nabla f(x_k,y_k)$
    
Here for simplicity we only consider a fixed number of steps.

In [None]:
f = lambda x, y: x*x + 2*y*y + 4*x + y + 6
partialfx = lambda x, y: 2*x + 4  # partial derivative of our function f with respect to x
partialfy = lambda x, y: 4*y + 1

# starting at (10,10) for no reason
x, y = 10, 10

# this is the rate at which we'll move to the opposite of the gradient
# called "learning rate" in machine learning
eta = 0.1

# total number of steps we will perform this descent
num_steps = 200

# we'll store all the intermediate values during the descent:
x_vals = np.zeros(num_steps)
y_vals = np.zeros(num_steps)
f_vals = np.zeros(num_steps)

for i in range(num_steps):
    # update x and y
    dx = partialfx(x, y)
    dy = partialfy(x, y)
    x = x - eta*dx
    y = y - eta*dy
    
    # let's store the x, y and f(x,y) values for later use
    x_vals[i] = x
    y_vals[i] = y
    f_vals[i] = f(x, y)


In [None]:
x_vals.shape

In [None]:
print("Local minimum of f(x,y): ", f(x,y), "at point", (x,y))
# let's see what the f(x,y) values were    
plt.plot(range(num_steps), f_vals) # the changes of f(x,y) over these 200 iters
plt.show()

Let's also visualize what happens on a contour graph.

This is the contour graph of $f$. The curves are the solutions to $f(x,y)=c$ where $c$ is the labeled number on each curve.

In [None]:
X = np.linspace(-10,10,300)
Y = np.linspace(-10,10,300)
X, Y = np.meshgrid(X,Y)
Z = f(X, Y)

plt.figure(figsize=(8, 6))
CS = plt.contour(X, Y, Z, [1, 4, 9, 16, 25, 36, 49, 64, 81, 
                           100, 121, 144, 169, 196],cmap='jet')
# the contour plot is when f(x,y) = the values above
plt.axis([-8,8,-8,8])
plt.clabel(CS, inline=True, fontsize=10)
plt.show()

Let's include the arrows for how gradient descent moves us.

In [None]:
plt.figure(figsize=(8, 6))
CS = plt.contour(X, Y, Z, [1, 4, 9, 16, 25, 36, 49, 64, 81, 
                           100, 121, 144, 169, 196], cmap='jet')
plt.axis([-8,8,-8,8])
plt.clabel(CS, inline=True, fontsize=10)
# let's plot every few times to avoid congestion of arrows in the picture
delta_n = 3
for i in range(0,99,delta_n):
#     plt.scatter(x_vals[i], y_vals[i])
    plt.arrow(x_vals[i], y_vals[i], (x_vals[i+delta_n] - x_vals[i]), 
              (y_vals[i+delta_n] - y_vals[i]), 
              head_width=0.3, head_length=0.2, linewidth = 1.5, color='red')

# plt.plot(x_vals, y_vals)
plt.show()