In [2]:
# Import the dependecies:
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow.python as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [4]:
# CREATING DEEP LEARNING MODELS WITH KERAS
# In the previous lesson, we learned how to build a neural network model with a single hidden layer using Keras.
# To create a deep learning model, we'll build on this knowledge.
# Suppose that we've been hired by a Spanish winery in the Rioja region.
# The winery sees new market opportunities in Australia and wants to expand.
# To forecast future revenue, the winery owners want to predict the quality of wine in future wine lots.
# We will build a deep learning neural network model to help them predict the quality scores of different wines.
# As usual, we'll start by reading the data into a DataFrame, as shown in the following code:

# Create the winery DataFrame:
wine_df = pd.read_csv(
    Path('wine_quality.csv')
)

# Review the first and last 5 rows of data:
display(wine_df.head())
display(wine_df.tail())

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1594,6.2,0.6,0.08,2.0,0.09,32.0,44.0,0.9949,3.45,0.58,10.5,5
1595,5.9,0.55,0.1,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5
1598,6.0,0.31,0.47,3.6,0.067,18.0,42.0,0.99549,3.39,0.66,11.0,6


In [6]:
# BREAKDOWN
# Let's take a closer look at the resulting DataFrame.
# The data contains 11 variables representing different wine characteristics.
# Together, these characteristics allow us to assess the overall 'quality' of a wine on a scale from 1 to 10.

In [9]:
# PREPROCESS THE DATA
# Next, we must preprocess our data.
# We create the features set (X) and the target set (y).
# The following code creates these datasets:

# Create the features (X) and the target (y):
X = wine_df.drop(columns=['quality']).values
y= wine_df['quality'].values

In [10]:
# NOTE
# The data used to fit a neural network should always be numerical and normalized to the same scale.
# This is true regardless of how many hidden layers the neural network contains.

# The following code creates the training and testing datasets, and scales the data:

# Create the training and testing datasets:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create the scaler instance:
X_scaler = StandardScaler()

# Fith the scaler:
X_scaler.fit(X_train)

# Scale the features data:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# MODEL THE DEEP NEURAL NET WITH KERAS
# To define a deep neural net with Keras, we will use a process similar to the one we used to define a simple neural network.
# But, this time, we will add an additional hidden layer.
# In this demonstration, the second hidden layer will contain fewer neurons than the first.
# This is typically how deep neural nets are constructed: The number of neurons on each successive layer is equal to or less than the number of neurons on the previous layer, with the output layer contain the fewest neurons.
# As we did for our previous neural network, we will choose an activation function for the first layer.
# This time, we will also use this same activation function for our second hidden layer.
# Often, developers experiment with many potential architectures in an effort to minimize the loss metric.

# CONNECT THE DOTS
# As we build this model, you may notice that we continue to follow a process similar to the one we previously used to build a simple neural network.
# Later in the less, we will compile our model, and then we'll define a loss function.
# As in the previous model, we will seek to minimize the metric that the loss function returns.

# The following code creates our deep learning model and adds two hidden layers to the model:

# Define the model - deep neural net with two hidden layers:
input_features = 11
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 4

# Create a sequential neural network model:
nn = Sequential()

# Add the first hidden layer:
nn.add(Dense(units=hidden_nodes_layer1, input_dim=input_features, activation='relu'))

# Add the second hidden layer:
nn.add(Dense(units=hidden_nodes_layer2, activation='relu'))

# Add the output layer:
nn.add(Dense(units=1, activation='linear'))




In [12]:
# BREAKDOWN
# To add a second layer to a neural network, all you have to do is include another call to the `add` function.
# Note that on the second hidden layer, we do not define an `input_dim` parameter.
# We only define the number of neurons the layer will contain (using the `units` parameter) and the activation function.
# In this case, we use the same activation function (ReLU) for the first and second layers.
# Then, for the output layer, we use the linear activation function.
# The linear activation function allows for the multiple outputs we need to our 1-10 wine quality scale (rather htan just a binary 0 or 1).

In [13]:
# COMPILE AND FIT THE MODEL
# Now we will compile and fit our deep neural network model.
# The following code compiles and fits the model:

# Compile the model:
nn.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

# Fit the model:
deep_net_model = nn.fit(X_train_scaled, y_train, epochs=100)


Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch

In [14]:
# REWIND
# Recall that epochs can be loosely defined as one iteration of a neural network model.
# That is, one pass of the entire training dataset through the model.
# The loss function scores the model's performance after each new epoch.
# The optimizer shapes and molds the model while it's trained on the data.
# And finally, the evaluation metric assesses the model's performance.

In [15]:
# BREAKDOWN
# Let's take a closer look at the parameters we chose to compile this model.
# This model will output numbers (wine quality scores) ranging from 1 to 10.
# This means that the model's output is continuous, rather than binary.
# So, we're building a regression model, rather than a classification model.
# We used the `mean_squared_error` loss function, which is designed for regression problems.
# We also used the MSE metric to evaluate the quality of the model.
# Remember that there are two main evaluation metrics:
    # 1. Model Predictive Accuracy
    # 2. Modedl Mean Squared Error (MSE)
# We use accuracy for classification models and MSE for regression models.
# When using MSE to assess a model, an MSE value closer to zero indicates a better model.
# So, in this case, we want our wine-quality prediction model to return an MSE value close to zero.

In [17]:
# DEFINING THE NUMBER OF HIDDEN LAYERS
# When designing a deep learning model, we have to decide: How many hidden layers should we add to the neural network?
# Usually, adding more layers to a model increases the model's overall performance - but sometimes it doesn't.
# That's because sometimes, additinoal layers can become redundant - that is, the first hidden layers can sufficiently encapsulate the dataset's complexity.
# In this section, we'll demonstrate the impact of adding more layers to a neural network model.
# For this example, we'll use the wine quality dataset again.
# However, this time, we'll define two different models.
# Our first model will have 2 hidden layers.
# We will use 22 neurons on the first layer and 11 neurons on the second layer.
# The following code defines the model:

# Define the model - deep neural net with two hidden layers:
number_input_features = 11
hidden_nodes_layer_1 = 22
hidden_nodes_layer_2 = 11

# Create a sequential neural network model:
nn_1 = Sequential()

# Add the first hidden layer:
nn_1.add(Dense(units=hidden_nodes_layer_1, input_dim=number_input_features, activation='relu'))

# Add the second hidden layer:
nn_1.add(Dense(units=hidden_nodes_layer_2, activation='relu'))

# Add the output layer:
nn_1.add(Dense(units=1, activation='linear'))

# Compile the model:
nn_1.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

# Fit the model:
deep_net_model_1 = nn_1.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [18]:
# Our second model will have three hidden layers.
# The first and the second hidden layers will be identical to those of the previous model.
# For the third hidden layer, we will set eight neurons on the layer.
# The following code defines the second model.

# Define the model - deep neural net with two hidden layers
number_input_features = 11
hidden_nodes_layer1 = 22
hidden_nodes_layer2 = 11
hidden_nodes_layer3 = 8

# Create a sequential neural network model
nn_2 = Sequential()

# Add the first hidden layer
nn_2.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Add the second hidden layer
nn_2.add(Dense(units=hidden_nodes_layer2, activation="relu"))

# Add the third hidden layer
nn_2.add(Dense(units=hidden_nodes_layer3, activation="relu"))

# Add the output layer
nn_2.add(Dense(units=1, activation="linear"))

# Compile the model
nn_2.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# Fit the model
deep_net_model_2 = nn_2.fit(X_train_scaled, y_train, epochs=100)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [19]:
# BREAKDOWN
# After compiling and fitting both models, we can assess the impact of adding a third hidden layer by passing each model the testing data and then comparing their mean squared values.
# As we've done before, we use the `evalute` function, and pass the testing datasets as parameters, to retrieve the evaluation metrics.
# Because we used the `mse` metric when compiling the model, the `evaluate` function returns the value for the `mse` metric as well as the loss function.
# The following code evaluates our models.

# Evaluate Model 1 using testing data:
model1_loss, model1_mse = nn_1.evaluate(X_test_scaled, y_test, verbose=2)

# Evaluate Model 2 using testing data:
model2_loss, model2_mse = nn_2.evaluate(X_test_scaled, y_test, verbose=2)

13/13 - 1s - loss: 0.4754 - mse: 0.4754 - 558ms/epoch - 43ms/step
13/13 - 0s - loss: 0.4789 - mse: 0.4789 - 344ms/epoch - 26ms/step


In [20]:
# BREAKDOWN
# As shown in the preceding image, Model 1's MSE is 0.4754, and Model 2's MSE is 0.4789.
# These results tell us that the difference between the two models `mse` metrics is quite small.
# So, in this case, we can use the simpler model with two hidden layers to make predictions.
# Adding layers does not always guarantee better model performance.
# Depending on the input data's complexity, adding more hidden lyers sometimes just increases the chance of overfitting the training data.
# Unfortunately, no easy solution or rule of thumb exists to identify how many layers will maximize performance for a given model.
# Trial and error is the only way to determine how 'deep' a deep learning model should be.
# You must train and evaluate a model with deeper and deeper layers, until the model no longer demonstrates noticeable improvements over the same number of epochs.