In [1]:
# Import pandas library to read the csv file.
import pandas as pd

# Import numpy for mean and standard deviation calculation.
import numpy as np

# Import sklearn train_test_split and mean_squared_error for splitting dataset and model evaluation, respectively.
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Import keras layers and models to make the neural network.
import keras
from keras.layers import Dense
from keras.models import Sequential

In [2]:
# Load, read, and print data
path = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv"
data = pd.read_csv(path)
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# Split data into predictors and target
predictors = data.drop(columns=['Strength'])
target = data.Strength

In [4]:
# Normalization
predictors_norm = (predictors - predictors.mean()) / predictors.std()

In [5]:
# Define regression model
def regression_model():
    model = Sequential()
    # Three hidden layer, 10 nodes each, and ReLU activation
    model.add(Dense(10, activation='relu', input_shape=(n_cols, )))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [6]:
mse = []

# Repeat steps 50 times and store mean_squared_error in list
for i in range(50):
    # Train and test split - Test size 30% with random shuffle
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=4)

    # Number of input
    n_cols = X_train.shape[1]

    # Build the model
    model = regression_model()

    # Fit the model - 50 epochs
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=2)
    
    # Predictions by the model
    predictions = model.predict(X_test)

    # Evaluate mean squared error on the test dataset
    mse.append(mean_squared_error(y_test, predictions))

Epoch 1/50
23/23 - 10s - loss: 1598.9547 - val_loss: 1528.5308
Epoch 2/50
23/23 - 0s - loss: 1575.2694 - val_loss: 1509.5193
Epoch 3/50
23/23 - 0s - loss: 1553.1597 - val_loss: 1486.0587
Epoch 4/50
23/23 - 0s - loss: 1523.6047 - val_loss: 1453.3934
Epoch 5/50
23/23 - 0s - loss: 1480.6415 - val_loss: 1403.9408
Epoch 6/50
23/23 - 0s - loss: 1415.7878 - val_loss: 1327.7401
Epoch 7/50
23/23 - 0s - loss: 1319.6842 - val_loss: 1220.0170
Epoch 8/50
23/23 - 0s - loss: 1186.1284 - val_loss: 1075.9001
Epoch 9/50
23/23 - 0s - loss: 1013.5012 - val_loss: 889.7759
Epoch 10/50
23/23 - 0s - loss: 805.3220 - val_loss: 681.0282
Epoch 11/50
23/23 - 1s - loss: 582.8176 - val_loss: 486.9638
Epoch 12/50
23/23 - 0s - loss: 398.1920 - val_loss: 343.6145
Epoch 13/50
23/23 - 0s - loss: 283.1343 - val_loss: 278.9323
Epoch 14/50
23/23 - 0s - loss: 240.1445 - val_loss: 256.0941
Epoch 15/50
23/23 - 0s - loss: 221.5707 - val_loss: 245.3726
Epoch 16/50
23/23 - 0s - loss: 210.4806 - val_loss: 237.3874
Epoch 17/50
23/

In [7]:
# Check length of mse list
print(len(mse))

# Print mean and standard deviation of mean_squared_errors
print(f"Mean: {np.mean(mse)}")
print(f"Standard Deviation: {np.std(mse)}")

50
Mean: 139.53816734566308
Standard Deviation: 14.78319092251308


### How does the mean of the mean squared errors compare to that from Step B?

**The mean of the mean squared error has decreased further from 347.66 to 139.54 after increasing the number of hidden layers which indicates the model is performing much better in terms of prediction accuracy.**