# Backpropagation Algorithm

_Building an Artificial Neural Network (ANN) by implementing the Backpropagation algorithm
and testing the same using appropriate data sets._

This is based on standard program written by Deepak D, Assistant Professor, Department of Computer Science & Enginnering, Canara Engineering College, Mangaluru, Karnataka, India. Required changes were made to make this more relevant to the need.

In [1]:
# Imports required packages

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

## Loading Data Set

In [2]:
sales = pd.read_csv("./../../Data/Advertising.csv")

In [3]:
display(sales.head())

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [4]:
# Checks the shape of the data
print(sales.shape)

(200, 5)


In [5]:
# Prints information about the data set
print(sales.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  200 non-null    int64  
 1   TV          200 non-null    float64
 2   Radio       200 non-null    float64
 3   Newspaper   200 non-null    float64
 4   Sales       200 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 7.9 KB
None


## Data Preparation

In [6]:
# Seperates features and target

X = sales[["TV", "Radio", "Newspaper"]]
y = sales["Sales"]

In [7]:
# Standardizes features
X = X.apply(lambda x: (x - X.mean()) / X.std(), axis = 1)

In [8]:
# Checks how the standardized features look like
display(X.head())

Unnamed: 0,TV,Radio,Newspaper
0,0.967425,0.979066,1.774493
1,-1.194379,1.080097,0.667903
2,-1.51236,1.524637,1.779084
3,0.051919,1.214806,1.283185
4,0.393196,-0.839507,1.278593


In [9]:
# Scaling target variable

y_scaler = MinMaxScaler()
y_transformed = y_scaler.fit_transform(np.reshape(y, (-1,1)))

## Helper Functions

In [10]:
def sigmoid(x):
    """
    Returns sigmoid value for the input parameter
    """
    
    return 1/(1 + np.exp(-x))

In [11]:
def sigmoid_derivative(x):
    """
    Returns derivative of sigmoid function
    """
    
    return x * (1 - x)

## Modeling

### Initialization

In [12]:
# Count of units in the input layer; The count is equal to the number of features in data set
input_layer_units = X.shape[1]

# number of units at output layer; It is one for target being a continous variable
output_layer_units = 1


# Hyperparameters initialization

# Number of times training data will be used for model training
epoch = 5000

learning_rate = 0.1

# Count of units in the hidden layer; It was assumed that there will be just one hidden layer
hidden_layer_units = 3

In [13]:

# Weights from input layer nodes to hidden layer nodes
hidden_layer_weights = np.random.uniform(size=(input_layer_units, hidden_layer_units))

# Biases for hidden layer nodes
hidden_layer_biases = np.random.uniform(size=(1, hidden_layer_units))
                                         
# Weights from hidden layer nodes to output layer nodes
output_layer_weights = np.random.uniform(size=(hidden_layer_units,output_layer_units))

# Biases for output layer nodes
output_layer_biases=np.random.uniform(size=(1,output_layer_units))

In [14]:
# Prints hidden layer weights
hidden_layer_weights

array([[0.3114909 , 0.50928443, 0.56975408],
       [0.68989413, 0.74121514, 0.4530692 ],
       [0.09502407, 0.95220931, 0.53587756]])

In [15]:
# Prints the hidden layer biases
hidden_layer_biases

array([[0.58308369, 0.28702572, 0.01840128]])

In [16]:
# Prints output layer weights
output_layer_weights

array([[0.66548532],
       [0.34931631],
       [0.06783312]])

In [17]:
# Prints output layer biases
output_layer_biases

array([[0.38730299]])

### Traning Model

In [18]:
for i in range(epoch):

    #Forward Propogation
    hidden_layer_nets = np.dot(X, hidden_layer_weights)
    hidden_layer_nets = hidden_layer_nets + hidden_layer_biases
    hidden_layer_outputs = sigmoid(hidden_layer_nets)
    
    output_layer_nets = np.dot(hidden_layer_outputs, output_layer_weights)
    output_layer_nets = output_layer_nets + output_layer_biases
    output = sigmoid(output_layer_nets)

    #Backpropagation
    output_error = y_transformed - output
    output_gradients = sigmoid_derivative(output)
    output_delta = output_error * output_gradients
    hidden_layer_error = output_delta.dot(output_layer_weights.T)

    # Calculation of hidden layer weights' contribution to error
    hidden_layer_gradients = sigmoid_derivative(hidden_layer_outputs)
    hidden_layer_delta = hidden_layer_error * hidden_layer_gradients

    # Weights updates for both output and hidden layer units
    output_layer_weights += learning_rate * hidden_layer_outputs.T.dot(output_delta)
    hidden_layer_weights += learning_rate * X.T.dot(hidden_layer_delta)


In [19]:
# Transforms data back from scaled ones
predictions = y_scaler.inverse_transform(output)

In [20]:
# Shows the predicted sales against actual sale for all data points
pd.DataFrame({"Actual Sale": y, "Predicted Sale": predictions.flatten()})

Unnamed: 0,Actual Sale,Predicted Sale
0,22.1,19.884677
1,10.4,10.335285
2,9.3,9.107748
3,18.5,17.091700
4,12.9,12.445042
...,...,...
195,7.6,7.337357
196,9.7,9.457026
197,12.8,13.166518
198,25.5,21.296262
