In [3]:
import scipy
import numpy as np
from sklearn import metrics 

In [None]:
# In this example we will not: Scale the data or splite the data into train/test 
# since the focus is purely the algorithm

In [2]:
# Forward pass 
# backward propagation
# gradient descent

In [None]:
######################################################################################################################################
########## Single iteration of a forward pass 
######################################################################################################################################

In [10]:
#read in the data for breast cancer dataset from sklearn
from sklearn.datasets import load_breast_cancer


#place dataset into ds variable 
ds = load_breast_cancer()

# feature matrix X: mxn
X = ds.data
print("X:",X.shape)


# target y: mx1
y = ds.target
y = y.reshape(y.shape[0],1)
print("y:",y.shape)

# number of samples: m
m = X.shape[0]
print("m:",m)

# number of features: n
n = X.shape[1]
features = ds.feature_names
print("n:",n)
print("features",features)

X: (569, 30)
y: (569, 1)
m: 569
n: 30
features ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [11]:
# Initialize hyperparameter and model parameters

# Model parameter: anything that we estimate as part of training.
# this would be anything inside of the loop of forward pass, backward propagation, gradient descent (think w's and b)


# Hyperparameters: are set before we enter the loop. alpah, number of iterations (niterations)

In [15]:
# set hyperparameter, learning rate - alpha 

alpha = 2.5e-6

#set hyperparameter, nepochs
nepochs = 5000

#initialize model parameters, e and b with 0's

w = np.zeros((n,1))
b = 0

In [12]:
# Lets first see how we can do a single iteration of the forward pass
# (FP) follwed by backward propagation (BP) followed by gradient descent (GD)

In [27]:
# z will have the same shape as y or yhat. We should have the same number of yhat values as y 

# single iteration of "forward pass" (calculate current loss):

#affin transformationz: mx1

z = X@w+b
print("z:   "+str(z.shape))

# non-linear activation yhat: mx1
# yhat = 1./(1+np.exp(-z))
# using built-in function scipy.special.expit instead to avoid overflow error 

yhat = scipy.special.expit(z)
print("yhat:"+str(yhat.shape))

# avg log-loss J: scalar 
# J = (-1./m) * (y.T@np.log(yhat)+ (1-y.T)@np.log(1-yhat))
# using built-in function sklearn.metrics.log_loss instead to avoid overflow 

J = metrics.log_loss(y,yhat)
print("J:    "+str(J))

z:   (569, 1)
yhat:(569, 1)
J:    0.6931471805599453


In [21]:
######################################################################################################################################
############## Calculate current gradient - Backward propagation 
######################################################################################################################################

In [28]:
# single iteration of "backward propagation" (calculate current gradient):

# below is an example of vectorization
# dj+dw: nx1 (same as w)
dj_dw = (1/m)*X.T@(yhat-y)
print(dj_dw.shape)

# dj_db: scalar (same as b)
dj_db = (1/m)*np.sum((yhat-y),axis=0,keepdims=True)
print(dj_db.shape)

(30, 1)
(1, 1)


In [29]:
######################################################################################################################################
############## Update parameters (gradient descent)
######################################################################################################################################

In [31]:
# single iteration of "gradient descent" (update parameters):

# update w: nx1

w = w -(alpha*dj_dw)
print(w.shape)

# update b: scalar
b = b -(alpha*dj_db) 
print(b.shape)



(30, 1)
(1, 1)
