# Practice Notebook: Basics of Deep Learning and Neural Networks

## Importing Pre-requisites

In [1]:
# Importing the required libraries
# ---
#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set dataframe column width as max
# ---
#
pd.set_option('display.max.columns', None)
pd.set_option('display.max_colwidth', None)

# Show visualisation in the notebook
# ---
#
%matplotlib inline

## Example: Classification

In [2]:
## Example 1
# ---
# Create a classification model using neural networks that will make
# a prediction on whether a person survived the titanic disaster.
# ---
# Train Dataset = https://bit.ly/31azYjb
# Test Dataset = https://bit.ly/2XmmAYe
# ---
# YOUR CODE GOES BELOW
#

### Step 1: Data Importation

In [3]:
# Loading and previewing the train dataset
# ---
#
df = pd.read_csv('https://bit.ly/3d1Te88')
df.sample(3)

Unnamed: 0,Survived,Class,Sex,Age,Fare
821,1,3,1,27.0,8.6625
241,1,3,0,29.699118,15.5
366,1,1,0,60.0,75.25


### Step 2: Data Exploration

In [4]:
# finding unique value for target variable
df.Survived.unique()

array([0, 1], dtype=int64)

In [5]:
# checking the datasets' shape
print("Dataset shape:", df.shape)

Dataset shape: (891, 5)


In [6]:
# checking data types of Train
df.dtypes

Survived      int64
Class         int64
Sex           int64
Age         float64
Fare        float64
dtype: object

### Step 3: Data Preparation

In [7]:
# checking for missing data in Train
df.isna().sum()

Survived    0
Class       0
Sex         0
Age         0
Fare        0
dtype: int64

In [9]:
# Selecting our features
# ---
# The method we will use here will be to create a list containing
# all column names and to remove our target variable name then
# selecting the features with the feature names in list.
# ---
#
properties = list(df.columns.values)
properties.remove('Survived')
X = df[properties]

# Selecting our target variable
# ---
#
y = df['Survived']

In [10]:
# Splitting our dataset
# ---
#
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

### Step 4: Data Modeling

##### Creating Base Model

We will create a base model and compare its performance with our Artificial Neural Network.

In [11]:
# For our base model, we will use the Random Forest Classifier
# ---
#
from sklearn.ensemble import RandomForestClassifier

# Creating our base model instance
# ---
#
random_forest_classifier = RandomForestClassifier(random_state=0)

# Fitting our base model
# ---
#
random_forest_classifier = random_forest_classifier.fit(X_train, y_train)

# Performing our prediction with the base model
# ---
#
y_prediction = random_forest_classifier.predict(X_test)

In [12]:
from sklearn.metrics import accuracy_score
print("Random Forest Classifier", accuracy_score(y_prediction, y_test))

Random Forest Classifier 0.8097014925373134


##### Creating our Artificial Neural Network

In [13]:
# We first import the keras library which will help us build an Artificial Neural Network
# ---
# Artificial Neural Networks in Keras are defined as a sequence
# of layers which would be input, hidden and output-layers.
# Keras takes a group of sequential layers and stacks them together into a single model.
# We also add dropout dropout regularization functions to the input and
# hidden layers in order to prevent overfitting.
# ---
#
import keras
from keras.models import Sequential     # Used to initialize the Artificial Neural Network
from keras.layers import Dense          # Used to build the hidden Layers
from keras.layers import Dropout        # Used to prevent overfitting


# We start by creating an instance of Artificial Neural Network as shown
# ---
# Our classifier will return is an integer value, 0 or 1.
# ---
#
classifier = Sequential()

# Then adding the input layer and the first hidden layer with dropout function.
# The input layer would be the first layer of our Artificial Neural Network.
# ---
# ->  units = 100         : We specify the no. of units (neurons) our connected layer
#                           (the hidden layer attached) is going to have.
#                           Normally, you'd have to try different values as your no. of neurons
#                           per layer through trial and error.
# ->  input_dim = 4       : We make use of input_dim to pass the dimensions of the input data to the Dense layer.
#                           This would be the no. of features in our dataset.
# ->  activation = 'relu' : Within our hidden layers we use the relu function as it yields a satisfactory result most of the time.
#                           However, we can also experiment with other activation functions.
# Lastly, we add a dropout regularization function that will prevent our ANN from overfitting.
# - We should always use a dropout rate between 20% and 50%.
#   In our case will dropped 30% of the input data to avoid overfitting.
#   The seed is set to 2 in order to get reproducible results.
#   If we don't specify this each model's outcome would be different.
# ---
#
classifier.add(Dense(units = 100, input_dim = 4, activation = 'relu'))
classifier.add(Dropout(0.3, seed = 2))

# Adding a second hidden layer
# ---
# The second layer is similar, we dont need to specify input dimension
# as we have defined the model to be sequential so keras will automatically
# consider input dimension to be same as the output of last layer i.e 4.
# ---
#
classifier.add(Dense(units = 100, activation = 'relu'))
classifier.add(Dropout(0.3, seed = 2))

# Adding an output layer
# ---
# We set units = 1, because for our output, our ANN to return a single integer value, either 0 or 1.
# We also use the sigmoid function which maps the values between 0 and 1.
# ---
#
classifier.add(Dense(units = 1, activation = "sigmoid"))

In [14]:
# Finally Compile our ANN
# ---
# By compiling, we are simply configuring the model for training
# ---
# optimizer = 'adam'  :          The optimizer controls the learning rate throughout training,
#                                i.e. how fast the optimal weights for the model are calculated.
#                                A smaller learning rate would lead to more accurate weights (up to a certain point),
#                                but the time it takes to compute the weights will be longer.
#                                'adam' is generally a good optimizer to use for many cases.
# loss = 'binary_crossentropy':  This defines how we get closer to our loss.
#                                In our case, since our output is binary, we use ‘binary_crossentropy’.
#                                For multi-class classification we can use 'categorical_crossentropy; as our loss.
#                                This would evaluate how well our ANN models the given data
# Lastly, we choose accuracy as our evaluation metric.
# ---
#
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [15]:
# Training our model
# ---
# Lets now train our model using our dataset.
# Here learning is an iterative process and we tell the model to
# go through our training dataset to learn as much as it can from it
# ---
# Training occurs over epochs and each epoch is split into batches.
# - Epoch: One pass through all of the rows in the training dataset.
# - Batch: One or more samples considered by the model within an epoch before weights are updated.
#          The higher the batch size, the more memory space we'll need.
# These configurations can be chosen experimentally by trial and error.
# We want to train the model enough so that it learns a good (or good enough)
# mapping of rows of input data to the output classification.
# The model will always have some error, but the amount of error will level out
# after some point for a given model configuration.
# This point would be called as the point of model convergence.
# ---
# NB: We are using y_train set that underwent one hot encoding.
# ---
#
classifier.fit(X_train, y_train, epochs = 300, batch_size = 32)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.src.callbacks.History at 0x1cf0c9a5ea0>

In [16]:
# Model Evaluation
# ---
# We then evaluate our model for test set by checking the accuracy
# ---
# We can improve our model by:
# 1. Optimizing the epochs.
# 2. Optimizing the number of layers.
# 3. Optimizing the number of nodes per layer.
# ---
#
loss, accuracy = classifier.evaluate(X_test, y_test)
print('ANN Accuracy:', accuracy)

ANN Accuracy: 0.8134328126907349


### Step 5: Making Predictions

In [17]:
# Question
# ---
# Say we wanted to determine whether a 40 year woman in Class 4 and Paid 30
# survived the titanic we can make this prediction by"
# ---
#
new_value = np.array([[4, 0, 40, 30]])

# Making our prediction
# ---
# We use the predict() method to get the predicted probabilities for each class.
#
predicted_probabilities = classifier.predict(new_value)

# We use np.argmax() to find the index of the class with the highest probability, which is essentially the predicted class.
# This should give you the desired output indicating whether the woman survived or not.
# ---
#
predicted_class = np.argmax(predicted_probabilities)
print(predicted_class)

# The output would be 0 which, would mean the woman did not survive.

0


## Example: Regression

In [18]:
## Example 1
# ---
# Create a regression model using artificial neural networks
# to predict the weight of fish given the following dataset.
# ---
# Dataset = http://bit.ly/MRFishDataset
# ---
# YOUR CODE GOES BELOW
#

### Step 1: Data Importation

In [19]:
# Loading and previewing the train dataset
# ---
#
fish_df = pd.read_csv('http://bit.ly/MRFishDataset')
fish_df.sample(3)

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
139,Pike,770.0,44.8,48.0,51.2,7.68,5.376
23,Bream,680.0,31.8,35.0,40.6,15.4686,6.1306
134,Pike,456.0,40.0,42.5,45.5,7.28,4.3225


### Step 2: Data Exploration

In [20]:
# Previewing the statistical summary of our dataset
#
fish_df.describe()

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width
count,159.0,159.0,159.0,159.0,159.0,159.0
mean,398.326415,26.24717,28.415723,31.227044,8.970994,4.417486
std,357.978317,9.996441,10.716328,11.610246,4.286208,1.685804
min,0.0,7.5,8.4,8.8,1.7284,1.0476
25%,120.0,19.05,21.0,23.15,5.9448,3.38565
50%,273.0,25.2,27.3,29.4,7.786,4.2485
75%,650.0,32.7,35.5,39.65,12.3659,5.5845
max,1650.0,59.0,63.4,68.0,18.957,8.142


In [None]:
# Performing Exploratory Analysis
# ---
# This time we will plot a correlation matrix, to determine the relationships between the different variables.
# This matrix will give us a sense of how well the variables are correlated. By this we mean, whether an
# increase or decrease in variable affects the other variable.
# To break this down further, the matrix will provide us with values between -1 and 1. If the value between
# two variables is closer to 1 i.e. > 0.5, then it means the variables are strongly correlated, have a positive linear
# relationship and it also means that as one value increases the other increases.
# On the other hand, of the value is less than -0.5, it would mean that the variables are strongly correlated but
# have a negative linear relationship.
# If the value is 0 or < -0.5 or < 0.5 it means that the variables don't have any relationship with each other.
# ---
# This type of visualisation can help us examine an assumption of linear regression;
# relationship of predicor variables with the response variable.
# ---
#
corrMatrix = fish_df.corr()
corrMatrix

In [None]:
# We can plot a visualisation of the matrix for better clarity
# ---
#
import seaborn as sns

# We define how big we want our visualisation
#
plt.figure(figsize=(10, 10))

# Creating our visualisation
#
sns.heatmap(corrMatrix, annot = True);

### Step 3: Data Preparation

In [None]:
# Selecting our feature and response variables
# ---
#
X = fish_df[['Length1', 'Length2', 'Length3', 'Height', 'Width']]
y = fish_df['Weight']

In [None]:
from sklearn.model_selection import train_test_split

# Performing our split
# ---
#
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Step 4: Data Modeling

#### Creating the Base Model

We will create a base model and compare its performance with our Artificial Neural Network.

In [None]:
# For our base model, we will use the Random Forest Classifier
# ---
#
from sklearn.tree import DecisionTreeRegressor

# Creating instances of our models
# ---
#
decision_tree_regressor = DecisionTreeRegressor(random_state=0)

# Training our machine learning algorithms
# ---
#
decision_tree_regressor.fit(X_train, y_train)

# Making predictions
# ---
#
decision_tree_pred = decision_tree_regressor.predict(X_test)

In [None]:
from sklearn import metrics
print('Decision Tree: Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, decision_tree_pred)))

#### Creating our Artificial Neural Network

In [None]:
# Importing our library and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

# Instantiating our ANN regressor
regressor = Sequential()

# Adding input layer
regressor.add(Dense(units = 10, input_dim = 5, activation = 'relu'))
regressor.add(Dropout(0.3, seed = 2))

# Adding a second hidden layer
regressor.add(Dense(units = 10, activation = 'relu'))
regressor.add(Dropout(0.3, seed = 2))

# Adding an output layer
# ---
# Our network will end with a single unit 1, and doesn’t include an activation.
# This would be the case for regression, where we are trying to predict a single continuous value.
# ---
regressor.add(Dense(units = 1))

In [None]:
# Finally Compiling our ANN
# ---
# We use the rsmprop as our optimization algorithm
# and mse as the loss function which is popular mse as the loss function.
# We also use the Mean Absolute Error (MAE) as a metric.
# ---
#
regressor.compile(optimizer = 'adam', loss = 'mse', metrics = ['mae'])

In [None]:
# Training our model
# ---
#
regressor.fit(X_train, y_train, epochs=300)

In [None]:
# Model Evaluation on Test Data
# ---
# We use the evaluate() function which will calculate the values
# of the metrics we chose when we compiled the model.
# ---
# - MAE (Mean Absolute Error) quantifies how close predictions are to the eventual outcomes.
# - MSE (Mean Squared Error) measures the average of the squares of the errors or deviations.
#   The closer to 0, the better. For our case, we will also use the RMSE.
# ---
#
mse_value, mae_value = regressor.evaluate(X_test, y_test)

print('Mean squared error: ', mse_value)
print('Mean absolute error: ', mae_value)
print('Root Mean squared error: ', np.sqrt(mse_value))

From our MAE, the regressor on average predicted 240.39 above or below the actual values.

#### Explaining our Model

In a case where we need to explain what are the major components used by our model to perform its prediction, we can use the **SHAP** library. This allows us to create a summary of our features and its impact on the model output

In [None]:
# Installing shap
# ---
#
!pip install shap

In [None]:
import shap
shap.initjs()

explainer = shap.KernelExplainer(regressor, X_train.values)
shap_values = explainer.shap_values(X_test.values)

# Plot summary_plot as barplot
# ---
#
shap.summary_plot(shap_values, X_test, plot_type='bar')

The summary plot shows the most important features and the magnitude of their impact on the model. We can observe that Length2 contributed the most during prediction followed by Length3, Length1, Height and Width.

### Step 5: Making Predictions

In [None]:
# Making predictions
# ---
# We make predictions using our ANN by passing an array
# of feature values for our new prediction.
# ---
#

# Question:
# ---
# Say we wanted to determine the weight of fish with the following dimensions:
# 1. Length1: 30.9
# 2. Length2: 33.5
# 3. Length3: 38.6
# 4. Height:  15.6330
# 5. Width:   5.1338
# ---
#
new_value = np.array([[30.9, 33.5, 38.6, 25.6330, 5.1338]])

# Making our prediction
# ---
#
print(regressor.predict(new_value))

## <font color="green">Challenges</font>

In [None]:
# Challenge 1
# ---
# Create an artificial neural networks classification model that
# predicts insurance costs given the following dataset.
# ---
# Dataset url = https://bit.ly/30GtDfO
# ---
# YOUR CODE GOES BELOW
#

In [None]:
# Challenge 2
# ---
# Build a neural network to predict insurance costs given the following dataset.
# ---
# Dataset url = https://bit.ly/InsuranceDS
# ---
# YOUR CODE GOES BELOW
#