### Let's import the libraries and read the data

In [1]:
import pandas as pd
import numpy as np
import sklearn
import statistics
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# Part 1 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

### Let's split the data into predictors and target

In [2]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

### Let's save the number of predictors

In [3]:
n_cols = predictors.shape[1] # number of predictors

### Let's import Keras

In [4]:
import keras

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Let's import Keras packages

In [5]:
from keras.models import Sequential
from keras.layers import Dense

### Let's define a function that builds a regression model
##### Remember that we need  one hidden layer of 10 nodes, a ReLU activation function, 
##### adam optimizer and the mean squared error as the loss function.

In [6]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### Let's split the data into training and testion subsets (30% testing)

In [7]:
from sklearn.model_selection import train_test_split
pred_train, pred_test, targ_train, targ_test = train_test_split(predictors, target, test_size = 0.3, random_state = 4)
pred_train.shape

(721, 8)

### Let's call the function to create our model

In [8]:
# build the model
model = regression_model()

### Let's fit the model on the training data set, 50 epochs

In [9]:
model.fit(pred_train, targ_train, epochs=50, verbose=2)

Epoch 1/50
 - 1s - loss: 3860.4196
Epoch 2/50
 - 0s - loss: 2890.7205
Epoch 3/50
 - 0s - loss: 2157.5431
Epoch 4/50
 - 0s - loss: 1617.4178
Epoch 5/50
 - 0s - loss: 1182.0780
Epoch 6/50
 - 0s - loss: 907.0823
Epoch 7/50
 - 0s - loss: 752.5751
Epoch 8/50
 - 0s - loss: 640.9868
Epoch 9/50
 - 0s - loss: 543.1246
Epoch 10/50
 - 0s - loss: 469.2640
Epoch 11/50
 - 0s - loss: 404.5161
Epoch 12/50
 - 0s - loss: 354.1111
Epoch 13/50
 - 0s - loss: 314.2210
Epoch 14/50
 - 0s - loss: 280.1753
Epoch 15/50
 - 0s - loss: 255.0413
Epoch 16/50
 - 0s - loss: 231.1355
Epoch 17/50
 - 0s - loss: 211.7241
Epoch 18/50
 - 0s - loss: 198.8608
Epoch 19/50
 - 0s - loss: 185.7828
Epoch 20/50
 - 0s - loss: 178.5138
Epoch 21/50
 - 0s - loss: 168.1383
Epoch 22/50
 - 0s - loss: 163.2293
Epoch 23/50
 - 0s - loss: 156.8669
Epoch 24/50
 - 0s - loss: 153.5252
Epoch 25/50
 - 0s - loss: 147.3157
Epoch 26/50
 - 0s - loss: 144.7265
Epoch 27/50
 - 0s - loss: 141.6269
Epoch 28/50
 - 0s - loss: 139.4171
Epoch 29/50
 - 0s - loss

<keras.callbacks.History at 0x7f5f3118c6d8>

### Let's run the model in prediction mode on the Test data set

In [10]:
targ_hat=model.predict(pred_test, verbose=2)
targ_hat[0:10]

array([[41.18492 ],
       [43.007774],
       [36.6962  ],
       [37.232044],
       [53.30358 ],
       [39.31801 ],
       [35.27453 ],
       [18.147827],
       [44.918774],
       [27.342407]], dtype=float32)

In [11]:

metrics_mse=sklearn.metrics.mean_squared_error(targ_test, targ_hat)
metrics_mse

135.85865916717225

### Now let us repeat the above steps 50 times and create a list of MSEs. First initialize the list we will fill in later

In [12]:
mse_list=[0.0 for i in range(50)]


### Then repeat the above steps 50 times, adding the result to the list each time

In [13]:
for i in range(50):
    model = regression_model()
    pred_train, pred_test, targ_train, targ_test = train_test_split(predictors, target, test_size = 0.3, random_state = i+2)
    model.fit(pred_train, targ_train, epochs=50, verbose=0)
    targ_hat=model.predict(pred_test, verbose=0)
    metrics_mse=sklearn.metrics.mean_squared_error(targ_test, targ_hat)
    mse_list[i]=metrics_mse
    #print(i)
    #print(mse_list[i])
    


In [14]:
print(mse_list)

[142.11508951578438, 124.18249941586379, 212.58072541843262, 137.49592764216473, 135.78175079835816, 559.6369710081877, 162.39950306220257, 415.40866973691476, 2219.0528686923703, 194.3571440441421, 113.89721504046102, 123.1020488072272, 292.1685286504986, 128.6702562570447, 315.8418744790115, 127.20756617995433, 147.65493468902082, 163.31138018926612, 116.52299268452698, 427.2987270423288, 637.900109210076, 331.04499246436717, 1024.2444000962275, 100.2387941313402, 330.8591301810781, 119.81057360357923, 97.24173249974747, 302.28082391046865, 228.98396279787585, 334.0730061030319, 93.76828731800798, 132.92305570452996, 107.561809429152, 221.22912590165245, 155.21420343607994, 218.38190154250495, 339.0559540834369, 237.5065212650461, 1559.5194710064009, 161.8407334219159, 108.88236759187531, 397.8310592758982, 132.27187149769995, 458.30761630983596, 202.03217778267688, 234.32975277431285, 261.7413636623265, 356.4935777780984, 102.409735592186, 157.7340231575532]


In [15]:

mean=statistics.mean(mse_list)
mean

308.0879761376549

In [16]:
st_dev=statistics.stdev(mse_list)
st_dev

372.9106829710456

# Part 2 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++

### Now we normalize the data

In [17]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


### Now let us repeat the above process for normalized data and create a list of MSEs. First initialize the list we will fill in later

In [18]:
mse_list_norm=[0.0 for i in range(50)]


### Then repeat the process 50 times, adding the result to the list each time

In [19]:
for i in range(50):
    model = regression_model()
    pred_train, pred_test, targ_train, targ_test = train_test_split(predictors_norm, target, test_size = 0.3, random_state = i+2)
    model.fit(pred_train, targ_train, epochs=50, verbose=0)
    targ_hat=model.predict(pred_test, verbose=0)
    metrics_mse=sklearn.metrics.mean_squared_error(targ_test, targ_hat)
    mse_list_norm[i]=metrics_mse
    #print(i)
    #print(mse_list_norm[i])
    


In [20]:
print(mse_list_norm)

[479.7200575805238, 459.892639021493, 381.1047211665138, 471.7785029739153, 570.567875162506, 213.82164622268908, 707.6428492408954, 342.3491790648985, 319.19927966886615, 363.90454591439203, 563.2679171295299, 253.24101984557342, 345.009751091744, 270.86836092018393, 267.3683690107112, 451.46430369278096, 259.62674971810833, 389.7789003374538, 246.98915363339967, 284.7683909393748, 248.20537502681773, 316.2709161914985, 327.0905586251084, 316.48210117100325, 270.20523492179194, 384.0038740851939, 508.2899487842702, 327.387095709605, 268.58541926401017, 432.1492051413521, 264.64302805574494, 419.6831784788366, 442.457124972726, 368.4516193459108, 264.8920298976254, 256.732710562147, 330.49223477228844, 393.5994218937225, 450.7051937841841, 322.21035921848795, 252.47969413489943, 264.9620206984224, 451.05363190569443, 436.45035366649387, 343.52994242804084, 283.230987832182, 283.9618194902699, 255.63877564006134, 379.05055824301655, 273.16011436236766]


In [21]:
mean_norm=statistics.mean(mse_list_norm)
mean_norm

355.56837481278654

In [22]:
st_dev_norm=statistics.stdev(mse_list_norm)
st_dev_norm

102.12532836969845

### We can see that although mean error did not change significantly, but the standard deviation dropped strongly after data normalization.

# Part 3 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++

### we already have normalized data predictors_norm

### But this time we should use the model with 100 epochs

###  First initialize the list we will fill in later

In [23]:
mse_list_norm=[0.0 for i in range(50)]


### Then repeat the process 50 times, adding the result to the list each time,  but this time with 100 epochs

In [24]:
for i in range(50):
    model = regression_model()
    pred_train, pred_test, targ_train, targ_test = train_test_split(predictors_norm, target, test_size = 0.3, random_state = i+2)
    model.fit(pred_train, targ_train, epochs=100, verbose=0)
    targ_hat=model.predict(pred_test, verbose=0)
    metrics_mse=sklearn.metrics.mean_squared_error(targ_test, targ_hat)
    mse_list_norm[i]=metrics_mse
    #print(i)
    #print(mse_list_norm[i])
    


In [25]:
print(mse_list_norm)

[175.03106655963907, 183.16257348122997, 170.59268002085705, 177.35606589733396, 189.75749286411383, 138.797129247723, 162.9743132906959, 170.7022268993792, 139.95142857070434, 147.9770307591885, 169.67928922742797, 207.54372124223835, 171.48538760764455, 171.57615931152804, 127.00449766832172, 152.06286028068885, 132.82766432781267, 170.67438413486173, 140.02888626654038, 176.65317759596937, 146.97032616904852, 186.1482017517597, 135.8349557465367, 155.22262138166988, 177.22509880313666, 164.50904233135685, 179.70564355157782, 161.62614375242552, 192.34379054120814, 151.3724281647938, 137.86150051668733, 148.6702911854205, 174.8971147275301, 171.9225756574098, 155.23570375167068, 158.00852984898393, 158.82608696428355, 179.19633698480044, 164.77127182611767, 187.3897929726328, 143.72349092038496, 158.56624747865007, 162.257171572049, 162.53243436279877, 165.33642510629767, 187.54735215679804, 205.35387321465086, 162.70460752541453, 156.75396616479608, 145.5244544773138]


In [26]:
mean_norm=statistics.mean(mse_list_norm)
mean_norm

164.27751029724206

In [27]:
st_dev_norm=statistics.stdev(mse_list_norm)
st_dev_norm

18.270756837251138

### 100 epochs instead of 50 radically improved both mean error and standard deviation.

# Part 4 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++

### we already have normalized data predictors_norm

### But this time we need a new model with 3 hidden layers, so an updated function for it

In [36]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [37]:
# build the model
model = regression_model()

###  First initialize the list we will fill in later

In [38]:
mse_list_norm=[0.0 for i in range(50)]


### Then repeat the process 50 times, adding the result to the list each time

In [39]:
for i in range(50):
    model = regression_model()
    pred_train, pred_test, targ_train, targ_test = train_test_split(predictors_norm, target, test_size = 0.3, random_state = i+2)
    model.fit(pred_train, targ_train, epochs=50, verbose=0)
    targ_hat=model.predict(pred_test, verbose=0)
    metrics_mse=sklearn.metrics.mean_squared_error(targ_test, targ_hat)
    mse_list_norm[i]=metrics_mse
    #print(i)
    #print(mse_list_norm[i])
    


In [40]:
print(mse_list_norm)

[134.57021364204647, 142.7427046418004, 143.45310258854803, 120.14145429780727, 138.88010235827085, 133.76634887789115, 144.94215586024592, 122.78326336646964, 127.21056358299376, 123.95896028896944, 97.5907414595123, 150.9892378285582, 125.2299977906272, 118.86472886091386, 124.82774777164991, 131.55611444628113, 121.93192302716479, 138.98891999594318, 116.75412105765872, 121.16149221680901, 126.99331288046409, 127.36184250151868, 105.26351261902825, 132.8002406848012, 115.29278100763081, 105.78252706320633, 104.86988483326085, 134.08435219085524, 132.39095678675687, 131.20266349283023, 125.49332543619138, 129.91995226205677, 135.4107625231301, 139.7903200434017, 134.40629096873712, 137.11786398564357, 82.88592277078473, 137.31655352065957, 147.07635780603687, 99.88069323724706, 125.9368577708848, 122.64097846115575, 128.30123467702006, 151.84351256471655, 141.21893967522567, 134.3175647889994, 124.28944403743293, 144.56273157978208, 114.27839256732418, 138.21002638742425]


In [41]:
mean_norm=statistics.mean(mse_list_norm)
mean_norm

127.82567386168736

In [42]:
st_dev_norm=statistics.stdev(mse_list_norm)
st_dev_norm

14.098723755075483

### We can see that additional hidden layers are even more efficient than doubling the number of epochs, although the difference is not significant