In [96]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [133]:
data = load_breast_cancer()
X_org = np.array(data["data"]).T
Y_org = np.array(np.matrix(list(data["target"])))


In [134]:
#Normalize the data
scaler = preprocessing.StandardScaler().fit(X_org)
X_std = scaler.transform(X_org)


In [76]:
def initialize_parameters(layer_dims):
    parameters = {}
    for i in range(1,len(layer_dims)):
        #using Xavier initialization efficient using tanh activation.
        np.random.seed(10)
        parameters["W"+ str(i)] = np.random.randn(layer_dims[i],layer_dims[i-1])*np.sqrt(2/(layer_dims[i-1]))
        parameters["b"+ str(i)] = np.zeros((layer_dims[i],1))
    return parameters            

In [6]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [9]:
def tanh(x):
    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

In [10]:
def relu(x):
    dG = (x>0).astype(int)
    x = np.array(dG)*np.array(x)
    return x    

In [125]:
def get_layer_values(A_prev,W,b, activation):
    A_prev = A_prev
    W = W
    Z = np.dot(W,A_prev)+b
    if activation ==1 or activation== "sigmoid":
        A = sigmoid(Z)
    elif activation == 2 or activation == "tanh":
        A = tanh(Z)
    elif activation == 3 or activation == "relu":
        A = relu(Z)
    linear_cache = (A_prev,W,b)
    activation_cache = Z
    cache = (linear_cache, activation_cache)
    return A, cache

In [155]:
def get_cost(Y,AL, caches, lambd):
    m = Y.shape[1]
    cost = -(np.dot(np.log(AL),Y.T)+np.dot(np.log(1-AL),(1-Y).T))*(1/m)
    L = len(caches)
    reg_cost = 0
    for i in range(1,L+1):
        reg_cost = np.sum(np.square(caches[i-1][0][1])) + reg_cost
    return cost+ lambd*(reg_cost/(2*m))

In [160]:
def forward_propagation(X, parameters, activation, lambd):
    caches = []
    A_prev = X
    L = int(len(parameters)//2)
    for i in range(1,L+1):
        A,cache_temp = get_layer_values(A_prev= A_prev, W= parameters["W"+ str(i)], b = parameters["b"+ str(i)], activation = activation[i])
        caches.append(cache_temp)
        A_prev = A
    cost = get_cost(Y =Y, AL = A, caches = caches, lambd = lambd)
    cost = np.squeeze(cost)
    AL = A
    return AL, caches, cost

In [14]:
def get_dZ(dA,cache,activation):
    if activation == 1:
        n = 1/(1+np.exp(cache[1]))
        dG = n*(1-n)
        dZ = dA*dG
    elif activation == 2:
        dG = 1-np.square(cache[1])
        dZ = dA*dG
    else:
        dG = (cache[1]>0).astype(int)
        dZ = dA*dG
    return dZ     

In [165]:
def backward_propagation(AL,Y, caches, activation,lambd):
    grads = {}
    dAL = -(np.divide(Y,AL)- np.divide(1-Y, 1-AL))
    L = len(caches)
    m = AL.shape[1]
    #Y = Y.reshape(AL.shape)
    for i in reversed(range(L)):
        grads["dW"+ str(i+1)] = np.dot(get_dZ(dA= dAL,cache = caches[i], activation = activation[i+1]),caches[i][0][0].T)/m + ((lambd/m)*(caches[i][0][1]))
        grads["db"+ str(i+1)] = np.mean(get_dZ(dA = dAL,cache = caches[i], activation = activation[i+1]), axis = 1, keepdims = True)
        grads["dA"+ str(i)] = np.dot(caches[i][0][1].T,get_dZ(dA = dAL,cache = caches[i], activation = activation[i+1]))
        dAL = grads["dA"+ str(i)]
    return grads    

In [16]:
def update_parameters(grads, parameters, learning_rate):
    updated_params = parameters.copy()
    L = len(updated_params)
    for i in range(1,((L//2)+1)):
        updated_params["W"+ str(i)] = parameters["W"+ str(i)] - learning_rate*grads["dW"+ str(i)]
        updated_params["b"+ str(i)] = parameters["b"+ str(i)] - learning_rate*grads["db"+ str(i)]
    return updated_params    

In [166]:
def L_layer_model(X,Y,layers_dims, learning_rate ,num_iter , activation,lambd, print_cost = False):
    costs = []
    parameters = initialize_parameters(layers_dims)
    for i in range(num_iter):
        AL,caches, cost =forward_propagation(X,parameters, activation,lambd)
        grads = backward_propagation(AL, Y, caches, activation,lambd)
        parameters = update_parameters(grads, parameters, learning_rate)
        # Print the cost every 100 iterations
        if print_cost and i % 1000 == 0 or i == num_iter - 1:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if i % 100 == 0 or i == num_iter:
            costs.append(cost)
    return parameters, costs        
        


In [180]:
final_params, costs = L_layer_model(X_std,Y_org, layers_dims = [30,20,3,1], learning_rate = 0.09, num_iter = 18000, activation = [0,3,3,1],lambd= 0.3 ,print_cost = True)

Cost after iteration 0: 0.6871995821953196
Cost after iteration 1000: 0.2368781417656064
Cost after iteration 2000: 0.22716197076426478
Cost after iteration 3000: 0.2205814674522127
Cost after iteration 4000: 0.21631813703850755
Cost after iteration 5000: 0.2132037856223899
Cost after iteration 6000: 0.21076217533428004
Cost after iteration 7000: 0.2087225114313231
Cost after iteration 8000: 0.20632148015843943
Cost after iteration 9000: 0.20325435744775136
Cost after iteration 10000: 0.19570197082971846
Cost after iteration 11000: 0.19423323660320732
Cost after iteration 12000: 0.1885767400968159
Cost after iteration 13000: 0.19250192200411115
Cost after iteration 14000: 0.19176270257997485
Cost after iteration 15000: 0.18668097156529415
Cost after iteration 16000: 0.19092452750393157
Cost after iteration 17000: 0.1917354974120675
Cost after iteration 17999: 0.19753124003513847


In [None]:
a = np.array(np.matrix(X[:,0])).T
a.shape

In [19]:
def predict(X, final_params, activation):
    y = np.ones((1,X.shape[1]))
    y = X
    L = len(final_params)//2
    for i in range(1,L+1):
        A_prev = np.dot(final_params["W"+str(i)],y) + final_params["b"+ str(i)]
        if activation[i] == 1:
            A_prev = sigmoid(A_prev)
        elif activation[i] == 2:
            A_prev = tanh(A_prev)
        elif activation[i] == 3:
            A_prev = relu(A_prev)
        y = A_prev
        output = (y>0.5).astype(int)
    return output

In [181]:
k = predict(X, final_params, activation = [0,3,3,1])

  return 1/(1+np.exp(-x))


In [182]:
count = 0
for i in range((X.shape[1])):
    if k[:,i]==Y[:,i]:
        count+=1
accuracy = count/X.shape[1]
accuracy

0.9191564147627417