In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv('breast_cancer.csv')

In [3]:
from sklearn.preprocessing import LabelEncoder
lr = LabelEncoder()
df['diagnosis'] = lr.fit_transform(df['diagnosis'])

In [4]:
corr = df.corr()

In [5]:
cor_target = abs(corr["diagnosis"])
columns = cor_target[cor_target > 0.2]
names = list(columns.index)

if 'diagnosis' in names:
    names.remove('diagnosis')
print(names)


['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'radius_se', 'perimeter_se', 'area_se', 'compactness_se', 'concavity_se', 'concave points_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst']


In [6]:
X = df[names].values
y = df['diagnosis'].values

In [7]:
def scale(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    X = (X - mean) / std
    return X

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X = scale(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

#### ReLU Function
**\begin{equation}
f(Z) = max(0, Z)
\end{equation}**

In [10]:
def relu(Z):
    A = np.maximum(0,Z)
    cache = Z 
    return A, cache

#### Derivative of ReLU function
f'(x) = {0, if z <=0 and 1 if z > 0

In [11]:
def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    
    return dZ

#### Sigmoid Function
**\begin{equation}
f(Z) = \frac{1}{1+e^{-Z}}
\end{equation}**

In [12]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

#### Derivative of Sigmoid function
\begin{equation}
f'(Z) = f(Z)(1-f(Z))
\end{equation}

In [13]:
def sigmoid_backward(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    return dZ

In [14]:
_,shape = X.shape

In [15]:
print(shape)

25


# How the Neural Network Algorithm Works

---

## **1. Initialize Parameters**

For each layer \( l \) in the neural network:

- Initialize weights:

**\begin{equation}
W^{[l]} \in \mathbb{R}^{n^{[l]} \times n^{[l-1]}}
\end{equation}**

- Initialize biases:

**\begin{equation}
b^{[l]} \in \mathbb{R}^{n^{[l]} \times 1}
\end{equation}**

Where:    
- \( n^{[l]} \): number of neurons in layer \( l \)  
- \( n^{[l-1]} \): number of neurons in the previous layer



---

## **2. Forward Propagation**

We calculate the output of each layer from the input layer to the final layer.

For each layer \( l \):

**\begin{equation}
Z^{[l]} = W^{[l]} A^{[l-1]} + b^{[l]}
\end{equation}**

**\begin{equation}
A^{[l]} = g(Z^{[l]})
\end{equation}**

Where:
- \( A^{[0]} = X \) is the input data  
- \( g(\cdot) \) is the activation function (e.g., ReLU, sigmoid)

---

## **3. Compute the Cost**

To measure how well the model is performing, we use a cost function.  
For binary classification, the **binary cross-entropy loss** is:

**\begin{equation}
J = -\frac{1}{m} \sum_{i=1}^{m} \left[ y^{(i)} \log(a^{[L](i)}) + (1 - y^{(i)}) \log(1 - a^{[L](i)}) \right]
\end{equation}**

Where:
- \( a^{[L](i)} \) is the predicted output  
- \( y^{(i)} \) is the true label  
- \( m \) is the number of training examples

---

## **4. Backpropagation**

We use the chain rule to compute the gradient of the loss with respect to each parameter.

For each layer \( l \):

**\begin{equation}
dZ^{[l]} = dA^{[l]} \circ g'^{[l]}(Z^{[l]})
\end{equation}**

**\begin{equation}
dW^{[l]} = \frac{1}{m} dZ^{[l]} (A^{[l-1]})^T
\end{equation}**

**\begin{equation}
db^{[l]} = \frac{1}{m} \sum dZ^{[l]}
\end{equation}**

**\begin{equation}
dA^{[l-1]} = (W^{[l]})^T dZ^{[l]}
\end{equation}**

---

## **5. Update Parameters**

Using **gradient descent**, we update the weights and biases:

**\begin{equation}
W^{[l]} = W^{[l]} - \alpha \text{ } dW^{[l]}
\end{equation}**

**\begin{equation}
b^{[l]} = b^{[l]} - \alpha \text{ } db^{[l]}
\end{equation}**

Where:
- \( \alpha \): learning rate

---

## ✅ Summary

1. **Initialize** weights and biases  
2. **Forward Propagate** the input  
3. **Compute** the cost  
4. **Backpropagate** to compute gradients  
5. **Update** the parameters using gradient descent


In [16]:
class NeuralNetwork:
    def __init__(self, layer_dimensions=[shape,16,16,1],learning_rate=0.00001,activation='sigmoid'):
        self.layer_dimensions = layer_dimensions
        self.learning_rate = learning_rate
        self.activation = activation
        
    def initialize_parameters(self):
        np.random.seed(3)
        self.n_layers =  len(self.layer_dimensions)
        for l in range(1, self.n_layers):
            vars(self)[f'W{l}'] = np.random.randn(self.layer_dimensions[l], self.layer_dimensions[l-1]) * 0.01
            vars(self)[f'b{l}'] = np.zeros((self.layer_dimensions[l], 1))
 
    def _linear_forward(self, A, W, b):
        Z = np.dot(W,A) + b
        cache = (A, W, b)
        return Z, cache
    
    def _forward_propagation(self,A_prev ,W ,b , activation):

        if activation == "sigmoid":
            Z, linear_cache = self._linear_forward(A_prev, W, b)
            A, activation_cache = sigmoid(Z) 
        elif activation == "relu":
            Z, linear_cache = self._linear_forward(A_prev, W, b) 
            A, activation_cache = relu(Z) 

        cache = (linear_cache, activation_cache)
        return A, cache
    
    
    def forward_propagation(self, X):
        caches = []

        A = X
        L =  self.n_layers -1
        for l in range(1, L):
            A_prev = A 
            A, cache = self._forward_propagation(A_prev, vars(self)['W' + str(l)], vars(self)['b' + str(l)], "relu")
            caches.append(cache)

        predictions, cache = self._forward_propagation(A, vars(self)['W' + str(L)], vars(self)['b' + str(L)], "sigmoid")
        caches.append(cache)

        return predictions, caches
    
    def compute_cost(self, predictions, y):
        m = y.shape[0]
        cost = (-1/m) * (np.dot(y, np.log(predictions+1e-9).T) + np.dot((1-y), np.log(1-predictions+1e-9).T))
        cost = np.squeeze(cost)
        return cost   
        
    def _linear_backward(self, dZ, cache):
        A_prev, W, b = cache
        m = A_prev.shape[1]
        dW = (1/m) * np.dot(dZ, A_prev.T)
        db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = np.dot(W.T,dZ)
        return dA_prev, dW, db
    
    def _back_propagation(self, dA, cache, activation):
        linear_cache, activation_cache = cache

        if activation == "relu":
            dZ = relu_backward(dA, activation_cache)
        elif activation == "sigmoid":
            dZ = sigmoid_backward(dA, activation_cache)
            
        dA_prev, dW, db = self._linear_backward(dZ, linear_cache)
        return dA_prev, dW, db

    def back_propagation(self, predictions, Y, caches):
        L = self.n_layers - 1
        m = predictions.shape[1]
        Y = Y.reshape(predictions.shape)
        
        dAL = - (np.divide(Y, predictions + 1e-9) - np.divide(1 - Y, 1 - predictions + 1e-9))

        # Output layer (sigmoid)
        current_cache = caches[L - 1]
        vars(self)[f'dA{L - 1}'], vars(self)[f'dW{L}'], vars(self)[f'db{L}'] = self._back_propagation(dAL, current_cache, "sigmoid")

        # Hidden layers (ReLU)
        for l in reversed(range(L - 1)):
            current_cache = caches[l]
            dA_prev, dW, db = self._back_propagation(vars(self)[f'dA{l + 1}'], current_cache, "relu")
            vars(self)[f'dA{l}'] = dA_prev
            vars(self)[f'dW{l + 1}'] = dW
            vars(self)[f'db{l + 1}'] = db


    def update_parameters(self):
            L = self.n_layers - 1
            for l in range(L):
                vars(self)[f'W{l+1}'] = vars(self)[f'W{l+1}'] - self.learning_rate * vars(self)[f'dW{l+1}']
                vars(self)[f'b{l+1}']  = vars(self)[f'b{l+1}'] - self.learning_rate * vars(self)[f'db{l+1}']
                

    def fit(self,X, Y, epochs=2000, print_cost=True):
            X = X.T
            np.random.seed(1)
            costs = [] 
            m = X.shape[1]                           
            self.initialize_parameters()
            for i in range(0, epochs):
                predictions, caches = self.forward_propagation(X)
                cost = self.compute_cost(predictions, Y)
                self.back_propagation(predictions, Y, caches)

                self.update_parameters()

                if print_cost and i % 5000 == 0:
                    print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
                if print_cost and i % 5000 == 0:
                    costs.append(cost)
            if print_cost:         
                fig = px.line(y=np.squeeze(costs),title='Cost',template="plotly_dark")
                fig.update_layout(
                    title_font_color="#00F1FF", 
                    xaxis=dict(color="#00F1FF"), 
                    yaxis=dict(color="#00F1FF") 
                )
                fig.show()


    def predict(self,X,y):
        X = X.T
        predictions, _ = self.forward_propagation(X)
        predictions = (predictions > 0.5)
        predictions = np.squeeze(predictions.astype(int))
        return np.sum((predictions == y)/X.shape[1]), predictions.T

In [17]:
model = NeuralNetwork(learning_rate=0.0001,activation='relu')

model.fit(X_train, y_train,epochs=30000,print_cost=True)

Cost after iteration 0: 0.6931645938036044
Cost after iteration 5000: 0.6858348042007028
Cost after iteration 10000: 0.680130452262741
Cost after iteration 15000: 0.6756865532399361
Cost after iteration 20000: 0.6722206490091551
Cost after iteration 25000: 0.6695147148419733


In [18]:
accuracy,predictions = model.predict(X_test, y_test)

In [19]:
print(accuracy)

0.6228070175438597
