Building ANN Model from Scratch

Step-1: IMPORT LIBRARIES

In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

Step-2: Load the Dataset

In [7]:
data = pd.read_csv("Breast Cancer data set.csv")
data

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,
1,842517,M,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,
2,84300903,M,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,
4,84358402,M,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,M,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,
565,926682,M,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,
566,926954,M,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,
567,927241,M,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,


Step-3: Splitting Dependent and Independent Variables

In [8]:
X = data.drop(['diagnosis'], axis = 1).values
#Note: we are taking all input features except diagnosis because it is our output that we require and we give axis = 1 to drop that entire diagnosis coloumn

y = data['diagnosis'].values.reshape(-1,1)
#Note: Here we use reshape to get diagnosis as coloumn-axis values instead of row-axis values because when we droped diagnosis and taking it seperately we will get row-axis values


In [9]:
#Note:Try to run it without reshape you will get an array with all values in a row
y

array([['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['B'],
       ['B'],
       ['B'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['B'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['M'],
       ['B'],
       ['M'],
       ['B'],
       ['B'],
       ['B'],
       ['B'],
       ['B'],
       ['M'],
       ['M'],
       ['B'],
       ['M'],
       ['M'],
       ['B'],
       ['B'],
       ['B'],
       ['B'],
       ['M'],
       ['B'],
       ['M'],
       ['M'],
       ['B'],
       ['B'],
       ['B'],
       ['B'],
       ['M'],
      

Step-4: Standardize the features

In [10]:
#Note: Here we are standardizing the dataset because to give all values equal importance and to streamline the data and remove inconsistencies
scaler = StandardScaler()
X_Scaled = scaler.fit_transform(X)

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


Step-5: Split Data into Training data and Testing Data (either 70-30 or 80-20 ratio)

In [6]:
#Note: (X_train is input and y_train is output) for training data & (X_test is input x used for testing and y_test is required output)

X_train, X_test, y_train, y_test = train_test_split(X_Scaled, y, test_size=0.2, random_state=42) # we took test_size=0.2 because 20% = 20/100 = 0.2 dumbo and we give random_state=42 because we want reproducebility so that for every time you run this code it will always take the same samples everytime

X_train.shape, X_test.shape, y_train.shape, y_test.shape # we use shape to see how many samples are there in train and test (row, column)

((455, 32), (114, 32), (455, 1), (114, 1))

Step-6: Build the ANN Network

1. Initialize the Parameters

In [7]:
input_size = X_train.shape[1]
hidden_size = 10 # number of neurons in 1 hidden layer
output_size = 1

2. Define a function to initialize weights(w1, w2) using randand biases(b1, b2) using zeroes

In [8]:
#Note: w1 is adding the weights from input layer to hidden layer & w2 is adding the weights from hidden layer to output layer

def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)
    w1 = np.random.randn(input_size, hidden_size)*0.01
    b1 = np.zeros(1, hidden_size)
    w2 = np.random.randn(hidden_size, output_size)*0.01
    b2 = np.zeros(1, output_size)
    
    return w1, b1, w2, b2

3. Define Activasion Function

In [9]:
#Note: The sigmoid function is an activation function that maps any real-valued number into a value between 0 and 1. Useful for binary classification problems, where the output can be interpreted as a probability.
def sigmoid(z):
    return 1/(1+np.exp(-z))

#Note: The derivative of the sigmoid function is essential for the backpropagation algorithm, which is used to update the weights of the network during training.
def sigmoid_derivative(z):
    return z*(1-z)

#define relu derivative
def relu_derivative(z):
    return np.where(z>0, 1, 0)

4. Implement Forward Propagation

In [10]:
def forward_propagation(X, w1, b1, w2, b2):
    Z1 = np.dot(X, w1)+b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, w2)+b2
    A2 = sigmoid(Z2)
    
    return Z1, A1, Z2, A2

5. Define Loss Function for binary classification

In [11]:
def binary_cross_entry(y_train, A2):
    bce = -np.mean(-(y_train*np.log(A2)))
    return bce

6. Implementing backward propagation

In [12]:
def backword_propagation(X, y, Z1, A1, A2,W2):
    m = X.shape[0]
    dZ2 = A2-y.reshape(-1,1)
    dW2 = (1/m)*np.dot(A1.T, dZ2)
    db2 = (1/m)*np.sum(dZ2, axis=0)
    
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1*relu_derivative(Z1)
    dW1 = (1/m)*np.dot(X.T, dZ1)
    db1 = (1/m)*np.sum(dZ1, axis=0)
    
    return dW1, db1, dW2, db2

7. Update Parameters

In [13]:
def update_parameters(w1,b1,w2,b2, dw1,db1,dw2,db2, learning_rate):
    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * db1
    w2 = w2 - learning_rate * dw2
    b2 = b2 - learning_rate * db2
    
    return w1, b1, w2, b2

8. Training Loop

In [14]:
def train_model(X,y, hidden_size, learning_rate, epochs):
    input_size = X.shape[1]
    output_size = 1
    
    w1,b1,w2,b2 = initialize_parameters(input_size, hidden_size,output_size)
    
    for i in range(epochs):
        Z1, A1, Z2, A2 = forward_propagation(X,w1,b1,w2,b2) 
        y_pred = A2>0 
        cost = binary_cross_entropy(y_train, A2) 
        dw1, db1, dw2, db2 = backward_propagation (X,y, Z1, A1, Z2, A2) 
        w1, b1, w2, b2 = update_parameters (w1, b1, w2, b2, dw1,db1, dw2, db2, learning_rate) 
        if i%100 == 0: 
            print(f"epoch{i},cost={cost}") 
            accuracy-np.mean(y_pred.flatten()==y) 
            print(f"Accuracy: {accuracy}") 
        return accuracy
    
train_model(X_train, y_train, hidden_size=10, learning_rate=0.01, epochs=1000)

TypeError: Cannot interpret '10' as a data type