In [None]:
import numpy as np
import matplotlib.pyplot as plt
import copy
%matplotlib inline

In [2]:

def initialize_parameters_deep(layer_dims):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    
    Returns:
    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
                    Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                    bl -- bias vector of shape (layer_dims[l], 1)
    """
    
    np.random.seed(3)
    parameters = {}
    
    L = len(layer_dims) # number of layers in the network

    for l in range(1, L):
       
        parameters['W'+str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['b'+str(l)]=np.zeros((layer_dims[l],1),dtype=float)
                 
    return parameters

In [3]:

def linear_forward(A, W, b):
    """
    Implement the linear part of a layer's forward propagation.

    Arguments:
    A -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)

    Returns:
    Z -- the input of the activation function, also called pre-activation parameter 
    cache -- a python tuple containing "A", "W" and "b" ; stored for computing the backward pass efficiently
    """
    
    Z=np.dot(W ,A) +b
    
    cache = (A, W, b)
    
    return Z, cache

In [4]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    return A, Z

def sigmoid_backward(dA, activation_cache):
    Z = activation_cache
    s = 1 / (1 + np.exp(-Z))
    dZ = dA * s * (1 - s)
    return dZ

def relu(Z):
    A = np.maximum(0, Z)
    return A, Z

def relu_backward(dA, activation_cache):
    Z = activation_cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    return dZ

In [5]:
def linear_activation_forward(A_prev, W, b, activation):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

    Returns:
    A -- the output of the activation function, also called the post-activation value 
    cache -- a python tuple containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
    """
    
    if activation == "sigmoid":

        Z,linear_cache=linear_forward(A_prev,W,b)
        A,activation_cache=sigmoid(Z)
        
        
    elif activation == "relu":
       
        Z,linear_cache=linear_forward(A_prev,W,b)
        A,activation_cache=relu(Z)
      
    cache = (linear_cache, activation_cache)

    return A, cache

In [6]:

def L_model_forward(X, parameters):
    """
    Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
    
    Arguments:
    X -- data, numpy array of shape (input size, number of examples)
    parameters -- output of initialize_parameters_deep()
    
    Returns:
    AL -- activation value from the output (last) layer
    caches -- list of caches containing:
                every cache of linear_activation_forward() (there are L of them, indexed from 0 to L-1)
    """

    caches = []
    A = X
    L = len(parameters) // 2                  # number of layers in the neural network
    
    for l in range(1, L):

        A_prev = A 
        A,cache=linear_activation_forward(A_prev,parameters["W"+str(l)],parameters["b"+str(l)],"relu")
        caches.append(cache)
        
        
    
    AL,cache=linear_activation_forward(A,parameters["W"+str(L)],parameters["b"+str(L)],"sigmoid")
    caches.append(cache)
          
    return AL, caches

In [7]:
def compute_cost(AL, Y):
    """
    Implement the cross-entropy cost function.

    Arguments:
    AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat), shape (1, number of examples)

    Returns:
    cost -- cross-entropy cost
    """
    m= Y.shape[1]
    epsilon = 1e-8
    AL = np.clip(AL, epsilon, 1 - epsilon)
    cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / m

    
    return np.squeeze(cost)


In [8]:

def linear_backward(dZ, cache):
    """
    Implement the linear portion of backward propagation for a single layer (layer l)

    Arguments:
    dZ -- Gradient of the cost with respect to the linear output (of current layer l)
    cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer

    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW=np.dot(dZ,A_prev.T)/m
    db=np.sum(dZ,axis=1,keepdims=True)/m
    dA_prev= np.dot(W.T , dZ)
    
    return dA_prev, dW, db

In [9]:
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache
    
    if activation == "relu":
       
        dZ=relu_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)
       
    elif activation == "sigmoid":
       
        dZ=sigmoid_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)
        
       
    return dA_prev, dW, db

In [10]:
# GRADED FUNCTION: L_model_backward

def L_model_backward(AL, Y, caches):
    """
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
    
    Arguments:
    AL -- probability vector, output of the forward propagation (L_model_forward())
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    
    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ... 
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ... 
    """
    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
    
   
    dAL=- (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    
    current_cache=caches[-1]
    dA_prev_temp, dW_temp, db_temp=linear_activation_backward(dAL,current_cache,"sigmoid")
    grads["dA"+ str(L-1)]=dA_prev_temp
    grads["dW"+str(L)]=dW_temp
    grads["db"+str(L)]=db_temp
    
    
    for l in reversed(range(L-1)):
       
        current_cache=caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward( grads["dA"+str(l+1)], current_cache, "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
        
        
       
    return grads

In [11]:
def update_parameters(params, grads, learning_rate):
    """
    Update parameters using gradient descent
    
    Arguments:
    params -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients, output of L_model_backward
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
                  parameters["W" + str(l)] = ... 
                  parameters["b" + str(l)] = ...
    """
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2 

    for l in range(L):
        
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)]- learning_rate * grads["dW" +str(l+1)]
        parameters["b"+ str(l+1)] = parameters["b" + str(l+1)]- learning_rate * grads["db" +str(l+1)]
        
    return parameters

In [12]:
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.
    
    Arguments:
    X -- input data, of shape (n_x, number of examples)
    Y -- true "label" vector (containing 1 if cat, 0 if non-cat), of shape (1, number of examples)
    layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True, it prints the cost every 100 steps
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    np.random.seed(1)
    costs = []                         # keep track of cost
    
    parameters=initialize_parameters_deep(layers_dims)
    
    for i in range(0, num_iterations):

        AL,caches= L_model_forward(X,parameters)
        AL = np.array(AL)
        Y = np.array(Y)
                
        cost = compute_cost(AL,Y)
               
        grads=L_model_backward(AL,Y,caches)
        
        parameters=update_parameters(parameters,grads,learning_rate)
        
        if print_cost and i % 100 == 0 or i == num_iterations - 1:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if i % 100 == 0 or i == num_iterations:
            costs.append(cost)
    
    return parameters, costs

In [13]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the breast cancer dataset
data = load_breast_cancer()
X, Y = data.data, data.target  # Features and target
Y = Y.reshape(1, -1)  # Reshape Y to (1, number of examples)

# Split into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y.T, test_size=0.2, random_state=42)
Y_train = Y_train.T
Y_test = Y_test.T

# Standardize the data (normalize features to have zero mean and unit variance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train).T
X_test = scaler.transform(X_test).T

# Define the layer dimensions
layers_dims = [X_train.shape[0], 10, 5, 1]  # Input layer -> Hidden layers -> Output layer

# Train the model
parameters, costs = L_layer_model(X_train, Y_train, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=True)

# Predict function
def predict(X, parameters):
    AL, _ = L_model_forward(X, parameters)
    predictions = (AL > 0.5).astype(int)
    return predictions

# Evaluate on test set
Y_pred_test = predict(X_test, parameters)
accuracy = np.mean(Y_pred_test == Y_test) * 100

print(f"Test Set Accuracy: {accuracy:.2f}%")


Cost after iteration 0: 0.6931476713159066
Cost after iteration 100: 0.682800359068772
Cost after iteration 200: 0.6756858019404656
Cost after iteration 300: 0.6707860071061794
Cost after iteration 400: 0.6674043731186442
Cost after iteration 500: 0.6650651841012698
Cost after iteration 600: 0.6634434710133167
Cost after iteration 700: 0.6623167925385105
Cost after iteration 800: 0.661532444466154
Cost after iteration 900: 0.6609852943395198
Cost after iteration 1000: 0.6606030800473499
Cost after iteration 1100: 0.6603358617124779
Cost after iteration 1200: 0.6601487810165857
Cost after iteration 1300: 0.6600176581619674
Cost after iteration 1400: 0.6599256605596245
Cost after iteration 1500: 0.6598610370840541
Cost after iteration 1600: 0.6598155943458623
Cost after iteration 1700: 0.6597836006712045
Cost after iteration 1800: 0.6597610395299953
Cost after iteration 1900: 0.6597450961759975
Cost after iteration 2000: 0.6597338006903151
Cost after iteration 2100: 0.6597257658532344
Co