# Multivariate Calculus
## WK3
#### PQ: Training Neural Networks

In [3]:
import numpy as np
# 1. Cost function for single neural link

# First we set the state of the network
σ = np.tanh
w1 = 1.3
b1 = -0.1

# Then define the neuron activation.
def a1(a0) :
    z = w1 * a0 + b1
    return σ(z)

x = 0
a1(0)
C0 = (a1(0) - 1) ** 2

In [None]:
# 4 Derivative of individual cost functions

# First define our sigma function.
sigma = np.tanh

# Next define the feed-forward equation.
def a1 (w1, b1, a0) :
    z = w1 * a0 + b1
    return sigma(z)

# The individual cost function is the square of the difference between
# the network output and the training data output.
def C (w1, b1, x, y) :
    return (a1(w1, b1, x) - y)**2

# This function returns the derivative of the cost function with
# respect to the weight.
def dCdw (w1, b1, x, y) :
    z = w1 * x + b1
    dCda = 2 * (a1(w1, b1, x) - y) # Derivative of cost with activation
    dadz = 1/np.cosh(z)**2 # derivative of activation with weighted sum z
    dzdw = x # derivative of weighted sum z with weight
    return dCda * dadz * dzdw # Return the chain rule product.

# This function returns the derivative of the cost function with
# respect to the bias.
# It is very similar to the previous function.
def dCdb (w1, b1, x, y) :
    z = w1 * x + b1
    dCda = 2 * (a1(w1, b1, x) - y)
    dadz = 1/np.cosh(z)**2
    dzdb = 1
    return dCda * dadz * dzdb

# Test with an unfit weight and bias.
w1 = 2.3
b1 = -1.2
# Test on a single data point pair of x and y.
x = 0
y = 1
# Output how the cost would change in proportion to a small change in the bias
print( dCdw(w1, b1, x, y) )
print( dCdb(w1, b1, x, y) )

In [None]:
# 4 Derivative of cost function in vector form using modulus squared

# Define the activation function.
sigma = np.tanh

# Use a random initial weight and bias.
W = np.array([[-0.94529712, -0.2667356 , -0.91219181],
              [ 2.05529992,  1.21797092,  0.22914497]])
b = np.array([ 0.61273249,  1.6422662 ])

# define our feed forward function
def a1 (a0) :
  # Notice the next line is almost the same as previously,
  # except we are using matrix multiplication rather than scalar multiplication
  # hence the '@' operator, and not the '*' operator.
  z = W @ a0 + b
  # Everything else is the same though,
  return sigma(z)

# Next, if a training example is,
x = np.array([0.7, 0.6, 0.2])
y = np.array([0.9, 0.6])

# Then the cost function is,
d = a1(x) - y # Vector difference between observed and expected activation
C = d @ d # Absolute value squared of the difference.

#### LAB: Backpropagation

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# PACKAGE
# First load the worksheet dependencies.
# Here is the activation function and its derivative.
sigma = lambda z : 1 / (1 + np.exp(-z))
d_sigma = lambda z : np.cosh(z/2)**(-2) / 4

# This function initialises the network with it's structure, it also resets any training already done.
def reset_network (n1 = 6, n2 = 7, random=np.random) :
    global W1, W2, W3, b1, b2, b3
    W1 = random.randn(n1, 1) / 2
    W2 = random.randn(n2, n1) / 2
    W3 = random.randn(2, n2) / 2
    b1 = random.randn(n1, 1) / 2
    b2 = random.randn(n2, 1) / 2
    b3 = random.randn(2, 1) / 2

# This function feeds forward each activation to the next layer. It returns all weighted sums and activations.
def network_function(a0) :
    z1 = W1 @ a0 + b1
    a1 = sigma(z1)
    z2 = W2 @ a1 + b2
    a2 = sigma(z2)
    z3 = W3 @ a2 + b3
    a3 = sigma(z3)
    return a0, z1, a1, z2, a2, z3, a3

# This is the cost function of a neural network with respect to a training set.
def cost(x, y) :
    return np.linalg.norm(network_function(x)[-1] - y)**2 / x.size

#######################################################################################################################
# Jacobian for the third layer weights. There is no need to edit this function.
def J_W3 (x, y) :
    # First get all the activations and weighted sums at each layer of the network.
    a0, z1, a1, z2, a2, z3, a3 = network_function(x)
    # We'll use the variable J to store parts of our result as we go along, updating it in each line.
    # Firstly, we calculate dC/da3, using the expressions above.
    J = 2 * (a3 - y)
    # Next multiply the result we've calculated by the derivative of sigma, evaluated at z3.
    J = J * d_sigma(z3)
    # Then we take the dot product (along the axis that holds the training examples) with the final partial derivative,
    # i.e. dz3/dW3 = a2
    # and divide by the number of training examples, for the average over all training examples.
    J = J @ a2.T / x.size
    # Finally return the result out of the function.
    return J

# In this function, you will implement the jacobian for the bias.
# As you will see from the partial derivatives, only the last partial derivative is different.
# The first two partial derivatives are the same as previously.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_b3 (x, y) :
    # As last time, we'll first set up the activations.
    a0, z1, a1, z2, a2, z3, a3 = network_function(x)
    # Next you should implement the first two partial derivatives of the Jacobian.
    # ===COPY TWO LINES FROM THE PREVIOUS FUNCTION TO SET UP THE FIRST TWO JACOBIAN TERMS===
    J =
    J =
    # For the final line, we don't need to multiply by dz3/db3, because that is multiplying by 1.
    # We still need to sum over all training examples however.
    # There is no need to edit this line.
    J = np.sum(J, axis=1, keepdims=True) / x.size
    return J

#######################################################################################################################
# Compare this function to J_W3 to see how it changes.
# There is no need to edit this function.
def J_W2 (x, y) :
    #The first two lines are identical to in J_W3.
    a0, z1, a1, z2, a2, z3, a3 = network_function(x)    
    J = 2 * (a3 - y)
    # the next two lines implement da3/da2, first σ' and then W3.
    J = J * d_sigma(z3)
    J = (J.T @ W3).T
    # then the final lines are the same as in J_W3 but with the layer number bumped down.
    J = J * d_sigma(z2)
    J = J @ a1.T / x.size
    return J

# As previously, fill in all the incomplete lines.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_b2 (x, y) :
    a0, z1, a1, z2, a2, z3, a3 = network_function(x)
    J = 2 * (a3 - y)
    J =
    J =
    J =
    J = np.sum(J, axis=1, keepdims=True) / x.size
    return J

#######################################################################################################################
# Fill in all incomplete lines.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_W1 (x, y) :
    a0, z1, a1, z2, a2, z3, a3 = network_function(x)
    J =
    J =
    J =
    J =
    J =
    J =
    J = J @ a0.T / x.size
    return J

# Fill in all incomplete lines.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_b1 (x, y) :
    a0, z1, a1, z2, a2, z3, a3 = network_function(x)
    J =
    J =
    J =
    J =
    J =
    J =
    J = np.sum(J, axis=1, keepdims=True) / x.size
    return J




<hr style="border:1px solid gray"> </hr>

## WK4
#### LAB: Gram-Schmidt Process

<hr style="border:1px solid gray"> </hr>

## WK5
#### PQ: eigen-things

In [26]:
# 2
ev2 = np.linalg.eig(np.array([[1,0],[0,2]]))
ev2

(array([1., 2.]),
 array([[1., 0.],
        [0., 1.]]))

In [31]:
# 4
ev4 = np.linalg.eig(np.array([[3,4],[0,5]]))
ev4

(array([3., 5.]),
 array([[1.        , 0.89442719],
        [0.        , 0.4472136 ]]))

In [32]:
# 6
ev6 = np.linalg.eig(np.array([[1,0],[-1,4]]))
ev6

(array([4., 1.]),
 array([[0.        , 0.9486833 ],
        [1.        , 0.31622777]]))

In [33]:
# 8
ev8 = np.linalg.eig(np.array([[-3,8],[2,3]]))
ev8

(array([-5.,  5.]),
 array([[-0.9701425 , -0.70710678],
        [ 0.24253563, -0.70710678]]))

In [35]:
# 9
ev9 = np.linalg.eig(np.array([[5,4],[-4,-3]]))
ev9

(array([1.+2.98023224e-08j, 1.-2.98023224e-08j]),
 array([[ 0.70710678+0.00000000e+00j,  0.70710678-0.00000000e+00j],
        [-0.70710678+5.26835606e-09j, -0.70710678-5.26835606e-09j]]))

In [36]:
# 10
ev10 = np.linalg.eig(np.array([[-2,-3],[1,1]]))
ev10

(array([-0.5+0.8660254j, -0.5-0.8660254j]),
 array([[ 0.8660254+0.j  ,  0.8660254-0.j  ],
        [-0.4330127-0.25j, -0.4330127+0.25j]]))

<hr style="border:1px solid gray"> </hr>

#### PQ: Diagonalization and Applications

In [52]:
# 1
T = np.array([[6,-1],[2,3]])
C = np.linalg.eig(T)[1]
invC = np.linalg.inv(C)
D = invC @ T @ C
T, C, invC, D

(array([[ 6, -1],
        [ 2,  3]]),
 array([[0.70710678, 0.4472136 ],
        [0.70710678, 0.89442719]]),
 array([[ 2.82842712, -1.41421356],
        [-2.23606798,  2.23606798]]),
 array([[5.0000000e+00, 8.8817842e-16],
        [0.0000000e+00, 4.0000000e+00]]))

In [53]:
# 2
T = np.array([[2,7],[0,-1]])
C = np.linalg.eig(T)[1]
invC = np.linalg.inv(C)
D = invC @ T @ C
T, C, invC, D

(array([[ 2,  7],
        [ 0, -1]]),
 array([[ 1.        , -0.91914503],
        [ 0.        ,  0.3939193 ]]),
 array([[1.        , 2.33333333],
        [0.        , 2.53859104]]),
 array([[ 2.0000000e+00, -4.4408921e-16],
        [ 0.0000000e+00, -1.0000000e+00]]))

In [55]:
# 3
T = np.array([[1,0],[2,-1]])
C = np.linalg.eig(T)[1]
invC = np.linalg.inv(C)
D = invC @ T @ C
T, C, invC, D

(array([[ 1,  0],
        [ 2, -1]]),
 array([[0.        , 0.70710678],
        [1.        , 0.70710678]]),
 array([[-1.        ,  1.        ],
        [ 1.41421356,  0.        ]]),
 array([[-1.,  0.],
        [ 0.,  1.]]))

In [57]:
# 4
C = np.array([[1,2],[0,1]])
invC = np.linalg.inv(C)
D = np.identity(2)
T = C @ D @ invC
T, C, invC, D

(array([[1., 0.],
        [0., 1.]]),
 array([[1, 2],
        [0, 1]]),
 array([[ 1., -2.],
        [ 0.,  1.]]),
 array([[1., 0.],
        [0., 1.]]))

In [59]:
# 5
C = np.array([[1,1],[1,2]])
invC = np.linalg.inv(C)
D = np.array([[5,0],[0,4]])
T = C @ D @ invC
T, C, invC, D

(array([[ 6., -1.],
        [ 2.,  3.]]),
 array([[1, 1],
        [1, 2]]),
 array([[ 2., -1.],
        [-1.,  1.]]),
 array([[5, 0],
        [0, 4]]))

In [64]:
T3 = C @ D ** 3 @ invC
T3, T ** 3, T @ T @ T

(array([[186., -61.],
        [122.,   3.]]),
 array([[216.,  -1.],
        [  8.,  27.]]),
 array([[186., -61.],
        [122.,   3.]]))

In [65]:
# 6
C = np.array([[7,1],[-3,0]])
invC = np.linalg.inv(C)
D = np.array([[-1,0],[0,2]])
T = C @ D @ invC
T, C, invC, D, T @ T @ T

(array([[ 2.,  7.],
        [ 0., -1.]]),
 array([[ 7,  1],
        [-3,  0]]),
 array([[ 0.        , -0.33333333],
        [ 1.        ,  2.33333333]]),
 array([[-1,  0],
        [ 0,  2]]),
 array([[ 8., 21.],
        [ 0., -1.]]))

In [66]:
# 7
C = np.array([[1,0],[1,1]])
invC = np.linalg.inv(C)
D = np.array([[1,0],[0,-1]])
T = C @ D @ invC
T, C, invC, D, T @ T @ T @ T @ T

(array([[ 1.,  0.],
        [ 2., -1.]]),
 array([[1, 0],
        [1, 1]]),
 array([[ 1.,  0.],
        [-1.,  1.]]),
 array([[ 1,  0],
        [ 0, -1]]),
 array([[ 1.,  0.],
        [ 2., -1.]]))

#### LAB:  PageRank algorithm

In [None]:
b

#### QUIZ:  Eigen-things