# Session 4

Introduction to Neural Networks



---



Michael de la Maza

AI/ML

Hult International business School

Adapted from "Data Mining for Business Analytics" by Shmueli

## Simple Dataset: MLPClassifier

In [None]:
# Extremely simple dataset example

import numpy as np
import pandas as pd

# 5 minute exercise - What pattern do you see in this dataset?
# Draw graph

creditcard_df = pd.DataFrame({
    'Years': [4, 18, 1, 3, 15, 6], # years with credit
    'Salary': [43, 65, 53, 95, 88, 112], # salary
    'Used_credit': [0, 1, 0, 0, 1, 1], # 1=unpaid credit card balance at end of month
})

input_df = creditcard_df.drop(columns=['Used_credit'])
output_df = creditcard_df.Used_credit

X = np.array(input_df)
y = np.array(output_df)

# We will not split into training and test sets

print(X)
print(y)




In [None]:
# Train neural net with single hidden layer

from sklearn.neural_network import MLPClassifier # MLP = multilayer perceptron

# Single hidden layer with 3 neurons
# Example: hidden_layer_sizes = (3, 4). Two hidden layers with 3 and 4 neurons.
clf = MLPClassifier(hidden_layer_sizes=(3), activation='logistic', solver='lbfgs', random_state=42)
clf.fit(X, y) # input and output layer automatically determined by data

# 5 minute exercise - (1) Draw this neural net. (2) How many weights and biases does it have?

# Network structure
for i, (weights, intercepts) in enumerate(zip(clf.coefs_, clf.intercepts_)):
    print('Hidden layer' if i == 0 else 'Output layer')
    print(' Intercepts:\n ', intercepts)
    print(' Weights:')
    for weight in weights:
        print(' ', weight)
    print()

print(clf.out_activation_) # Note: Output node is logistic.

In [None]:
# Print predictions

print(pd.concat([
    creditcard_df,
    pd.DataFrame(clf.predict_proba(X), columns=[0, 1])
], axis=1))

# How many does it get right? How many does it get wrong? (assume cutoff of 0.5)

In [None]:
# Neural network description

print('Output nodes:', clf.n_outputs_)
print('Number of layers:', clf.n_layers_)
print('Number of features:', clf.n_features_in_)
print('Classes', clf.classes_)


In [None]:
# Inputs to neural networks should (almost) always be scaled
# But we didn't do that!

from sklearn.preprocessing import StandardScaler

# StandardScaler = z score. Mean = 0, std = 1.

scaler = StandardScaler()
scaled_features = scaler.fit_transform(creditcard_df[['Years', 'Salary']])

# Print the scaled features
print(scaled_features)


In [None]:
# Reset input X

X = scaled_features

print(X)
print(y)

In [None]:
# Retrain neural network

clf = MLPClassifier(hidden_layer_sizes=(3), activation='logistic', solver='lbfgs', random_state=42)
clf.fit(X, y)

# Network structure
for i, (weights, intercepts) in enumerate(zip(clf.coefs_, clf.intercepts_)):
    print('Hidden layer' if i == 0 else 'Output layer')
    print(' Intercepts:\n ', intercepts)
    print(' Weights:')
    for weight in weights:
        print(' ', weight)
    print()

In [None]:
# Print predictions
# Notice that predictions have improved for class 0

print(pd.concat([
    creditcard_df,
    pd.DataFrame(clf.predict_proba(X), columns=[0, 1])
], axis=1))

# How many does it get right? How many does it get wrong?

In [None]:
# 5 minute exercise

# Train a neural network with two hidden layers. The first has two neurons and the second has 3 neurons.
# How does it perform?

## Car dataset: MLPRegressor

In [None]:
!pip install dmba

import dmba



In [None]:
# load the data
car_df = dmba.load_data('ToyotaCorolla.csv')

print(car_df.head())

# select some of the variables
selected_var = ['Price', 'Age_08_04', 'KM', 'Fuel_Type', 'HP', 'Automatic', 'Doors', 'Quarterly_Tax',
                'Mfr_Guarantee', 'Guarantee_Period', 'Airco', 'Automatic_airco', 'CD_Player',
                'Powered_Windows', 'Sport_Model', 'Tow_Bar']
car_df = car_df[selected_var]


In [None]:
print(car_df.describe())

In [None]:
# Neural networks don't work with categorical variables like Fuel_Type
# They must be converted
# pd.get_dummies converts categorical variables to 0/1 variables, one for each category
# Also called one hot encoding
# Note: Cannot run this code twice
car_df = pd.get_dummies(car_df, columns=['Fuel_Type'], drop_first=True)

# Define input and output variables
X_df = car_df.drop(columns=['Price'])
Y_df = car_df[ ['Price'] ] # Predicting Price



In [None]:
# Scale the data between 0 and 1 (similar to bike sales dataset)

from sklearn.preprocessing import MinMaxScaler

print(X_df.head())

scaleInput = MinMaxScaler()
scaleOutput = MinMaxScaler()
X = scaleInput.fit_transform(X_df)
y = scaleOutput.fit_transform(Y_df)

print(X[0:2])

In [None]:
# Create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape


Neural network with two neurons in one hidden layer

In [None]:
# This is a regression problem. Predicting Price.

from sklearn.neural_network import MLPRegressor

car_nnet = MLPRegressor(hidden_layer_sizes=(2), activation='logistic', solver='lbfgs', random_state=42)
car_nnet.fit(X_train, y_train.ravel()) # ravel flattens array

In [None]:
# Calculate RMSE and R2 on the training set
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# Need to reverse scaling so that output is interpretable
y_actual = scaleOutput.inverse_transform(y_train).ravel()
y_pred = scaleOutput.inverse_transform([car_nnet.predict(X_train)]).ravel()

rmse_train = np.sqrt(mean_squared_error(y_actual, y_pred, squared=False))

print("RMSE on training set: ", rmse_train)
print("R2 on training set", r2_score(y_actual, y_pred))

In [None]:
# Calculate RMSE and R2 on the test set

# Reverse scaling - important
y_actual = scaleOutput.inverse_transform(y_test).ravel()
y_pred = scaleOutput.inverse_transform([car_nnet.predict(X_test)]).ravel()

rmse_test = np.sqrt(mean_squared_error(y_actual, y_pred, squared=False))

print("RMSE on test set: ", rmse_test)
print("R2 on test set", r2_score(y_actual, y_pred))

In [None]:
# Plot predicted vs actual values
import matplotlib.pylab as plt

ax = pd.DataFrame({'actual': y_actual, 'predicted': y_pred}).plot.scatter(x='actual', y='predicted', alpha=0.5)


plt.show()

In [None]:
# 5 minute exercise

# Look at the list of metrics.
# Select one that you think would be relevant.
# Add it to the code above.

In [None]:
# We have now trained a simple regressor on this dataset
# Now let's change the neural network hyperparameters and see if it makes a difference



Neural network with five neurons in one hidden layer

In [None]:
# train neural network with 5 hidden nodes
car_nnet = MLPRegressor(hidden_layer_sizes=(5), activation='logistic', solver='lbfgs', random_state=42)
car_nnet.fit(X_train, y_train.ravel())


In [None]:
# RMSE and R2 on the training set

y_actual = scaleOutput.inverse_transform(y_train).ravel()
y_pred = scaleOutput.inverse_transform([car_nnet.predict(X_train)]).ravel()

rmse_train = np.sqrt(mean_squared_error(y_actual, y_pred, squared=False))

print("RMSE on training set: ", rmse_train)
print("R2 on training set", r2_score(y_actual, y_pred))

In [None]:
# Calculate RMSE and R2 on the test set

y_actual = scaleOutput.inverse_transform(y_test).ravel()
y_pred = scaleOutput.inverse_transform([car_nnet.predict(X_test)]).ravel()

rmse_test = np.sqrt(mean_squared_error(y_actual, y_pred, squared=False))

print("RMSE on test set: ", rmse_test)
print("R2 on test set", r2_score(y_actual, y_pred))

In [None]:
# Plot predicted versus actual prices
ax = pd.DataFrame({'actual': y_actual, 'predicted': y_pred}).plot.scatter(x='actual', y='predicted', alpha=0.5)
plt.show()

Neural network with one neuron in one hidden layer

In [None]:
car_nnet = MLPRegressor(hidden_layer_sizes=(1), activation='logistic', solver='lbfgs', random_state=42)
car_nnet.fit(X_train, y_train.ravel())

In [None]:
# RMSE and R2 on the training set

y_actual = scaleOutput.inverse_transform(y_train).ravel()
y_pred = scaleOutput.inverse_transform([car_nnet.predict(X_train)]).ravel()

rmse_train = np.sqrt(mean_squared_error(y_actual, y_pred, squared=False))

print("RMSE on training set: ", rmse_train)
print("R2 on training set", r2_score(y_actual, y_pred))

In [None]:
# Calculate RMSE and R2 on the test set

y_actual = scaleOutput.inverse_transform(y_test).ravel()
y_pred = scaleOutput.inverse_transform([car_nnet.predict(X_test)]).ravel()

rmse_test = np.sqrt(mean_squared_error(y_actual, y_pred, squared=False))

print("RMSE on test set: ", rmse_test)
print("R2 on test set", r2_score(y_actual, y_pred))

In [None]:
# Plot predicted versus actual prices
ax = pd.DataFrame({'actual': y_actual, 'predicted': y_pred}).plot.scatter(x='actual', y='predicted', alpha=0.5)
plt.show()

In [None]:
# Conclusion: For this particular dataset, we get very good performance (R2 = 0.9)
# with a very simple neural network (one neuron in the hidden layer). Changing the number of neurons does not
# impact the performance by much.

# 5 minute exercise
# Try with no hidden layer! May have to lookup how to do this

In [None]:
# The End
