<a href="https://colab.research.google.com/github/H4D32/AndrewNg-ML-Python-Notebooks/blob/main/Ex2/AndrewEx3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Andrew Ng Assignment 3 (Week 4)

## Multi-Class Classification

In [61]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
from scipy.io import loadmat
import scipy.optimize as op

In [62]:
mat=loadmat("ex3data1.mat")

X=mat["X"]
y=mat["y"]

X.shape

(5000, 400)

In [102]:
fig_sub = make_subplots(rows=5, cols=5, start_cell="bottom-left")

for i in range(1,6):
  for j in range(1,6):
    fig_sub.add_trace(go.Heatmap(z = np.flipud(X[np.random.randint(0,5000)].reshape(20,20,order = 'F'))),
              row=i, col=j)
    
fig_sub.show()

In [64]:
digit_rng = X[np.random.randint(0,5000),:].reshape(20,20,order = 'F')
fig = px.imshow(digit_rng, color_continuous_scale='gray')
fig.show()

In [65]:
def sigmoid(z):
  """
  Compute the sigmoid of each value of z
  """

  return 1 / (1 + np.exp(-z))

In [66]:
def lrCostFunction(theta, X, y, Lambda):
    """
    Takes in numpy array of theta, X, y, and float lambda to compute the regularized logistic cost function 
    """
    
    m,n = X.shape;
    theta = theta.reshape(n,1) 
    h_x = sigmoid(X @ theta)
    cost = (-y.T @ np.log(h_x) - (1-y).T @ np.log(1-h_x))/m 
    regcost = cost + (Lambda/(2*m)) * (theta[1:].T @ theta[1:])
    return regcost[0]

In [67]:
def lrGradient(theta, X, y, Lambda):
    """
    Takes in numpy array of theta, X, y, and float lambda to compute the regularized Gradient 
    """
    m,n = X.shape
    theta = theta.reshape(n,1)
    Z = np.matmul(X,theta)
    h_x = sigmoid(Z)
    newtheta = np.copy(theta)
    newtheta[0] = 0
    term2 = h_x - y
    grad = 1/m * (X.T @ (term2))
    grad = grad + (Lambda/m)*newtheta
    return grad.flatten()

In [68]:
#Testing the Cost and Gradient functions on a smaller set:
# (test results borrowed from Github/Benlau93)
theta_t = np.array([-2,-1,1,2]).reshape(4,1)
X_t =np.array([np.linspace(0.1,1.5,15)]).reshape(3,5).T
X_t = np.hstack((np.ones((5,1)), X_t))
y_t = np.array([1,0,1,0,1]).reshape(5,1)
J = lrCostFunction(theta_t, X_t, y_t, 3)
grad = lrGradient(theta_t, X_t, y_t, 3)
print("Cost:",J,"Expected cost: 2.534819")
print("Gradients:\n",grad,"\nExpected gradients:\n 0.146561\n -0.548558\n 0.724722\n 1.398003")

Cost: [2.5348194] Expected cost: 2.534819
Gradients:
 [ 0.14656137 -0.54855841  0.72472227  1.39800296] 
Expected gradients:
 0.146561
 -0.548558
 0.724722
 1.398003


#### One vs All

In [69]:
def optimizer(X,y,initial_theta,Lambda):
    Result = op.minimize(fun = lrCostFunction, 
                                 x0 = initial_theta, 
                                 args = (X, y, Lambda),
                                 method = 'CG',
                                 jac = lrGradient);
    optimal_theta = Result.x;
    return optimal_theta

In [70]:
def oneVsAll(X, y, num_labels, Lambda):
    m,n = X.shape
    initial_theta = np.zeros((n+1,1))
    all_theta = []
    X = np.hstack((np.ones((m,1)),X))
    for i in range(1,num_labels+1):
        theta = optimizer(X,np.where(y==i,1,0),initial_theta,Lambda)
        all_theta.extend(theta)
    return np.array(all_theta).reshape(num_labels,n+1)      

In [71]:
def OVA_noBias(X, y, num_labels, Lambda):
    m,n = X.shape
    initial_theta = np.zeros((n,1))
    all_theta = []
    for i in range(1,num_labels+1):
        theta = optimizer(X,np.where(y==i,1,0),initial_theta,Lambda)
        all_theta.extend(theta)
    return np.array(all_theta).reshape(num_labels,n)   

In [72]:
all_theta = oneVsAll(X, y, 10, 0.1)
X.shape

(5000, 400)

In [73]:
all_theta_noBias = OVA_noBias(X, y, 10, 0.1)
X.shape

(5000, 400)

In [74]:
def predictOneVsAll(all_theta, X):
    """
    Using all_theta, compute the probability of X(i) for each class and predict the label
    
    return a vector of prediction
    """
    m= X.shape[0]
    X = np.hstack((np.ones((m,1)),X))
    
    predictions = X @ all_theta.T
    return np.argmax(predictions,axis=1)+1

In [75]:
def predictNoBias(all_theta, X):
    """
    Using all_theta, compute the probability of X(i) for each class and predict the label
    
    return a vector of prediction
    """
    m= X.shape[0]
    
    predictions = X @ all_theta.T
    return np.argmax(predictions,axis=1)+1

In [76]:
pred = predictOneVsAll(all_theta, X)
print("Training Set Accuracy:",sum(pred[:,np.newaxis]==y)[0]/5000*100,"%")

Training Set Accuracy: 96.46000000000001 %


In [77]:
prednoBias = predictNoBias(all_theta_noBias, X)
print("Training Set Accuracy:",sum(prednoBias[:,np.newaxis]==y)[0]/5000*100,"%")

Training Set Accuracy: 95.88 %


## Neural Networks

In [80]:
mat2=loadmat("ex3weights.mat")
Theta1=mat2["Theta1"] # Theta1 has size 25 x 401
Theta2=mat2["Theta2"] # Theta2 has size 10 x 26

(10, 26)

In [87]:
def predict(Theta1, Theta2, X):
    """
    Predict the label of an input given a trained neural network
    """
    m= X.shape[0]
    a1 = np.hstack((np.ones((m,1)),X))
    z2 = a1 @ Theta1.T
    a2 = sigmoid(z2)
    a2 = np.hstack((np.ones((m,1)), a2))
    z3 = a2 @ Theta2.T
    a3 = sigmoid(z3)

    return np.argmax(a3,axis=1)+1

In [88]:
pred2 = predict(Theta1, Theta2, X)
print("Training Set Accuracy:",sum(pred2[:,np.newaxis]==y)[0]/5000*100,"%")

Training Set Accuracy: 97.52 %
