In [2]:
# Build a baseline model 

# Use the Keras library to build a neural network with the following:

# - One hidden layer of 10 nodes, and a ReLU activation function

# - Use the adam optimizer and the mean squared error  as the loss function.

# 1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_splithelper function from Scikit-learn.

# 2. Train the model on the training data using 50 epochs.

# 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

# 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

# 5. Report the mean and the standard deviation of the mean squared errors.



import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

In [3]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head(10)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3
5,266.0,114.0,0.0,228.0,0.0,932.0,670.0,90,47.03
6,380.0,95.0,0.0,228.0,0.0,932.0,594.0,365,43.7
7,380.0,95.0,0.0,228.0,0.0,932.0,594.0,28,36.45
8,266.0,114.0,0.0,228.0,0.0,932.0,670.0,28,45.85
9,475.0,0.0,0.0,228.0,0.0,932.0,594.0,28,39.29


In [4]:
concrete_data.shape

(1030, 9)

In [5]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [6]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [7]:
concrete_data_columns = concrete_data.columns

In [8]:
target = concrete_data['Strength']
target.head(10)

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
5    47.03
6    43.70
7    36.45
8    45.85
9    39.29
Name: Strength, dtype: float64

In [9]:
predictors = concrete_data.iloc[:, :-1]
predictors.head(10)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360
5,266.0,114.0,0.0,228.0,0.0,932.0,670.0,90
6,380.0,95.0,0.0,228.0,0.0,932.0,594.0,365
7,380.0,95.0,0.0,228.0,0.0,932.0,594.0,28
8,266.0,114.0,0.0,228.0,0.0,932.0,670.0,28
9,475.0,0.0,0.0,228.0,0.0,932.0,594.0,28


In [10]:
#num of inputs = num of predictors colums
n_cols = predictors.shape[1]
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [11]:
model = regression_model()

In [12]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Randomly split the data into a training set (70%) and a test set (30%):  
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3)
    #Train and test the model at the same time
    res = model.fit(X_train, y_train, epochs=50, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 198.4631805419922
Cycle #2: mean_squared_error 121.10132598876953
Cycle #3: mean_squared_error 123.28996276855469
Cycle #4: mean_squared_error 121.40036010742188
Cycle #5: mean_squared_error 119.2193832397461
Cycle #6: mean_squared_error 112.11276245117188
Cycle #7: mean_squared_error 120.8397445678711
Cycle #8: mean_squared_error 126.71661376953125
Cycle #9: mean_squared_error 105.0273666381836
Cycle #10: mean_squared_error 114.49043273925781
Cycle #11: mean_squared_error 105.38162994384766
Cycle #12: mean_squared_error 101.731689453125
Cycle #13: mean_squared_error 116.7564697265625
Cycle #14: mean_squared_error 131.8148193359375
Cycle #15: mean_squared_error 111.26571655273438
Cycle #16: mean_squared_error 115.52536010742188
Cycle #17: mean_squared_error 107.14955139160156
Cycle #18: mean_squared_error 108.83663177490234
Cycle #19: mean_squared_error 102.77212524414062
Cycle #20: mean_squared_error 83.76497650146484
Cycle #21: mean_squared_error 70.76245

In [13]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 87.44172378540038
The standard deviation of the mean squared errors: 28.30577276812598


In [None]:
# Normalize the data

In [14]:
predictors_norm = (predictors - predictors.mean())/predictors.std()
predictors_norm.head(10)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
5,-0.145138,0.464818,-0.846733,2.174405,-1.038638,-0.526262,-1.291914,0.701883
6,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
7,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,-0.279597
8,-0.145138,0.464818,-0.846733,2.174405,-1.038638,-0.526262,-1.291914,-0.279597
9,1.85474,-0.856472,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,-0.279597


In [15]:
n_cols = predictors_norm.shape[1]
def regression_model2():
    model2 = Sequential()
    model2.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model2.add(Dense(1))
    
    model2.compile(optimizer='adam', loss='mean_squared_error')
    return model2

model2 = regression_model2()

In [16]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Randomly split the data into a training set (70%) and a test set (30%):  
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    #Train and test the model at the same time
    res = model2.fit(X_train, y_train, epochs=50, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 234.09063720703125
Cycle #2: mean_squared_error 144.9830780029297
Cycle #3: mean_squared_error 110.92703247070312
Cycle #4: mean_squared_error 91.3677749633789
Cycle #5: mean_squared_error 87.86283111572266
Cycle #6: mean_squared_error 81.27066040039062
Cycle #7: mean_squared_error 70.99695587158203
Cycle #8: mean_squared_error 59.410579681396484
Cycle #9: mean_squared_error 58.577674865722656
Cycle #10: mean_squared_error 48.808349609375
Cycle #11: mean_squared_error 47.41896057128906
Cycle #12: mean_squared_error 47.159175872802734
Cycle #13: mean_squared_error 42.9716682434082
Cycle #14: mean_squared_error 49.92159652709961
Cycle #15: mean_squared_error 40.795135498046875
Cycle #16: mean_squared_error 49.40127944946289
Cycle #17: mean_squared_error 42.58183670043945
Cycle #18: mean_squared_error 42.09374237060547
Cycle #19: mean_squared_error 43.82624435424805
Cycle #20: mean_squared_error 43.56185531616211
Cycle #21: mean_squared_error 42.91591262817383

In [17]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 55.593638305664065
The standard deviation of the mean squared errors: 32.0709306252599


In [None]:
# Increate the number of epochs 
# Repeat Part B but use 100 epochs this time for training.

# How does the mean of the mean squared errors compare to that from Step B?

# Train and test the model at the same time using the fit-method. We will leave out 30% of the data (data after normalization) for validation and we will train the model for 100 epochs instead of 50 epochs.

In [18]:
def regression_model3():
    model3 = Sequential()
    model3.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model3.add(Dense(1))
    
    model3.compile(optimizer='adam', loss='mean_squared_error')
    return model3

model3 = regression_model3() 

In [19]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Randomly split the data into a training set (70%) and a test set (30%):  
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    #Train and test the model at the same time
    res = model3.fit(X_train, y_train, epochs=100, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 189.2625732421875
Cycle #2: mean_squared_error 91.00852966308594
Cycle #3: mean_squared_error 59.5987548828125
Cycle #4: mean_squared_error 44.80221939086914
Cycle #5: mean_squared_error 44.297359466552734
Cycle #6: mean_squared_error 37.508522033691406
Cycle #7: mean_squared_error 34.54198455810547
Cycle #8: mean_squared_error 37.209556579589844
Cycle #9: mean_squared_error 35.774009704589844
Cycle #10: mean_squared_error 38.301876068115234
Cycle #11: mean_squared_error 39.523433685302734
Cycle #12: mean_squared_error 37.81349182128906
Cycle #13: mean_squared_error 37.55875778198242
Cycle #14: mean_squared_error 38.44070053100586
Cycle #15: mean_squared_error 36.94258499145508
Cycle #16: mean_squared_error 37.93204879760742
Cycle #17: mean_squared_error 33.95689392089844
Cycle #18: mean_squared_error 41.29240036010742
Cycle #19: mean_squared_error 35.339725494384766
Cycle #20: mean_squared_error 35.96379089355469
Cycle #21: mean_squared_error 39.6596298217

In [20]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 40.587626953125
The standard deviation of the mean squared errors: 23.034619502876076


In [None]:
# Increase the number of hidden layers 
# Repeat part B but use a neural network with the following instead:

# Three hidden layers, each of 10 nodes and ReLU activation function.
# How does the mean of the mean squared errors compare to that from Step B?

In [21]:
def regression_model4():
    model4 = Sequential()
    model4.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model4.add(Dense(10, activation='relu'))
    model4.add(Dense(10, activation='relu'))
    model4.add(Dense(1))
    
    model4.compile(optimizer='adam', loss='mean_squared_error')
    return model4

In [22]:
model4 = regression_model4()

In [23]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Randomly split the data into a training set (70%) and a test set (30%):  
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    #Train and test the model at the same time
    res = model4.fit(X_train, y_train, epochs=50, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 137.5099639892578
Cycle #2: mean_squared_error 117.61882019042969
Cycle #3: mean_squared_error 101.64252471923828
Cycle #4: mean_squared_error 95.46595764160156
Cycle #5: mean_squared_error 68.87236785888672
Cycle #6: mean_squared_error 41.986576080322266
Cycle #7: mean_squared_error 44.0658073425293
Cycle #8: mean_squared_error 37.63887405395508
Cycle #9: mean_squared_error 39.100006103515625
Cycle #10: mean_squared_error 36.47340774536133
Cycle #11: mean_squared_error 39.3292121887207
Cycle #12: mean_squared_error 34.09077453613281
Cycle #13: mean_squared_error 28.780048370361328
Cycle #14: mean_squared_error 29.440746307373047
Cycle #15: mean_squared_error 32.00979232788086
Cycle #16: mean_squared_error 29.279926300048828
Cycle #17: mean_squared_error 31.62748146057129
Cycle #18: mean_squared_error 26.321578979492188
Cycle #19: mean_squared_error 31.568397521972656
Cycle #20: mean_squared_error 25.474292755126953
Cycle #21: mean_squared_error 25.15126991

In [24]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 35.86739185333252
The standard deviation of the mean squared errors: 24.44218856351571
