In [1]:
# Import pandas library to read the csv file.
import pandas as pd

# Import numpy for mean and standard deviation calculation.
import numpy as np

# Import sklearn train_test_split and mean_squared_error for splitting dataset and model evaluation, respectively.
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Import keras layers and models to make the neural network.
import keras
from keras.layers import Dense
from keras.models import Sequential

In [2]:
# Load, read, and print data
path = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv"
data = pd.read_csv(path)
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# Split data into predictors and target
predictors = data.drop(columns=['Strength'])
target = data.Strength

In [4]:
# Normalization
predictors_norm = (predictors - predictors.mean()) / predictors.std()

In [5]:
# Define regression model
def regression_model():
    model = Sequential()
    # One hidden layer, 10 nodes, and ReLU activation
    model.add(Dense(10, activation='relu', input_shape=(n_cols, )))
    model.add(Dense(1))
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [6]:
mse = []

# Repeat steps 50 times and store mean_squared_error in list
for i in range(50):
    # Train and test split - Test size 30% with random shuffle
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=4)

    # Number of input
    n_cols = X_train.shape[1]

    # Build the model
    model = regression_model()

    # Fit the model - 50 epochs
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=2)
    
    # Predictions by the model
    predictions = model.predict(X_test)

    # Evaluate mean squared error on the test dataset
    mse.append(mean_squared_error(y_test, predictions))

Epoch 1/50
23/23 - 2s - loss: 1649.3185 - val_loss: 1582.3181
Epoch 2/50
23/23 - 0s - loss: 1631.2559 - val_loss: 1566.0431
Epoch 3/50
23/23 - 0s - loss: 1613.9799 - val_loss: 1550.5999
Epoch 4/50
23/23 - 0s - loss: 1597.2285 - val_loss: 1535.3552
Epoch 5/50
23/23 - 0s - loss: 1580.3058 - val_loss: 1520.0071
Epoch 6/50
23/23 - 0s - loss: 1563.0387 - val_loss: 1504.4236
Epoch 7/50
23/23 - 0s - loss: 1545.4745 - val_loss: 1488.1656
Epoch 8/50
23/23 - 0s - loss: 1527.3674 - val_loss: 1471.5687
Epoch 9/50
23/23 - 0s - loss: 1508.5421 - val_loss: 1454.5417
Epoch 10/50
23/23 - 0s - loss: 1489.1870 - val_loss: 1436.6403
Epoch 11/50
23/23 - 0s - loss: 1468.9143 - val_loss: 1418.1709
Epoch 12/50
23/23 - 0s - loss: 1447.9065 - val_loss: 1399.0382
Epoch 13/50
23/23 - 0s - loss: 1426.1075 - val_loss: 1379.5496
Epoch 14/50
23/23 - 0s - loss: 1403.8326 - val_loss: 1359.5039
Epoch 15/50
23/23 - 0s - loss: 1380.8879 - val_loss: 1338.9042
Epoch 16/50
23/23 - 0s - loss: 1357.6294 - val_loss: 1317.1781
E

In [7]:
# Check length of mse list
print(len(mse))

# Print mean and standard deviation of mean_squared_errors
print(f"Mean: {np.mean(mse)}")
print(f"Standard Deviation: {np.std(mse)}")

50
Mean: 347.6621678963661
Standard Deviation: 81.80286999332839


### How does the mean of the mean squared errors compare to that from Step A?

**The mean of the mean squared error has decreased from 427.72 to 347.66 after normalizing the data which indicates the model is performing well in terms of prediction accuracy.**