In [1]:
# Import pandas library to read the csv file.
import pandas as pd

# Import numpy for mean and standard deviation calculation.
import numpy as np

# Import sklearn train_test_split and mean_squared_error for splitting dataset and model evaluation, respectively.
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Import keras layers and models to make the neural network.
import keras
from keras.layers import Dense
from keras.models import Sequential

In [2]:
# Load, read, and print data
path = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv"
data = pd.read_csv(path)
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# Split data into predictors and target
predictors = data.drop(columns=['Strength'])
target = data.Strength

In [4]:
# Normalization
predictors_norm = (predictors - predictors.mean()) / predictors.std()

In [5]:
# Define regression model
def regression_model():
    model = Sequential()
    # One hidden layer, 10 nodes, and ReLU activation
    model.add(Dense(10, activation='relu', input_shape=(n_cols, )))
    model.add(Dense(1))
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [6]:
mse = []

# Repeat steps 50 times and store mean_squared_error in list
for i in range(50):
    # Train and test split - Test size 30% with random shuffle
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=4)

    # Number of input
    n_cols = X_train.shape[1]

    # Build the model
    model = regression_model()

    # Fit the model - 100 epochs
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, verbose=2)
    
    # Predictions by the model
    predictions = model.predict(X_test)

    # Evaluate mean squared error on the test dataset
    mse.append(mean_squared_error(y_test, predictions))

Epoch 1/100
23/23 - 3s - loss: 1541.7858 - val_loss: 1479.1619
Epoch 2/100
23/23 - 0s - loss: 1526.2100 - val_loss: 1464.5959
Epoch 3/100
23/23 - 0s - loss: 1510.5671 - val_loss: 1449.9000
Epoch 4/100
23/23 - 0s - loss: 1494.7100 - val_loss: 1434.7645
Epoch 5/100
23/23 - 0s - loss: 1478.2990 - val_loss: 1419.2687
Epoch 6/100
23/23 - 0s - loss: 1461.2328 - val_loss: 1402.9532
Epoch 7/100
23/23 - 0s - loss: 1443.2075 - val_loss: 1385.7208
Epoch 8/100
23/23 - 0s - loss: 1424.1572 - val_loss: 1367.5879
Epoch 9/100
23/23 - 0s - loss: 1404.0146 - val_loss: 1348.6377
Epoch 10/100
23/23 - 0s - loss: 1382.9762 - val_loss: 1328.2911
Epoch 11/100
23/23 - 0s - loss: 1360.5337 - val_loss: 1307.0120
Epoch 12/100
23/23 - 0s - loss: 1336.9689 - val_loss: 1284.5209
Epoch 13/100
23/23 - 0s - loss: 1311.8164 - val_loss: 1261.6934
Epoch 14/100
23/23 - 0s - loss: 1286.4761 - val_loss: 1236.8113
Epoch 15/100
23/23 - 0s - loss: 1259.1921 - val_loss: 1211.9424
Epoch 16/100
23/23 - 0s - loss: 1231.9561 - val_l

In [7]:
# Check length of mse list
print(len(mse))

# Print mean and standard deviation of mean_squared_errors
print(f"Mean: {np.mean(mse)}")
print(f"Standard Deviation: {np.std(mse)}")

50
Mean: 177.15127955219762
Standard Deviation: 12.268411481874706


### How does the mean of the mean squared errors compare to that from Step B?

**The mean of the mean squared error has decreased further from 347.66 to 177.15 after increasing the number of epochs by 50 i.e., 100 epochs which indicates the model is performing better in terms of prediction accuracy.**