Final Project with Keras

Section A: Building a baseline Model

In [1]:
# import data handling libraries
import numpy as np
import pandas as pd

# import data splitting libraries and metrics
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# import statistics for sampling
from statistics import mean, stdev

In [2]:
file = 'https://cocl.us/concrete_data'
concrete_data = pd.read_csv(file)
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [4]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']

In [5]:
n_input_cols = predictors.shape[1]

In [7]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [9]:
# build a neural network model
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_input_cols,)))
    model.add(Dense(1))
    # compiling the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [11]:
# executing train test split on the predictor and target columns 
Pred_train, Pred_test, tar_train, tar_test = train_test_split(predictors, target, test_size=0.3, random_state=None)

In [12]:
# training the model with 50 Epochs
model = regression_model()
model.fit(Pred_train, tar_train, epochs=50, verbose=2)

Epoch 1/50
 - 0s - loss: 4201.7405
Epoch 2/50
 - 0s - loss: 1969.8741
Epoch 3/50
 - 0s - loss: 1162.6263
Epoch 4/50
 - 0s - loss: 793.9416
Epoch 5/50
 - 0s - loss: 550.0187
Epoch 6/50
 - 0s - loss: 406.3389
Epoch 7/50
 - 0s - loss: 314.6370
Epoch 8/50
 - 0s - loss: 262.9736
Epoch 9/50
 - 0s - loss: 223.7749
Epoch 10/50
 - 0s - loss: 202.4887
Epoch 11/50
 - 0s - loss: 183.1364
Epoch 12/50
 - 0s - loss: 167.9906
Epoch 13/50
 - 0s - loss: 155.8351
Epoch 14/50
 - 0s - loss: 148.9984
Epoch 15/50
 - 0s - loss: 139.0979
Epoch 16/50
 - 0s - loss: 134.8214
Epoch 17/50
 - 0s - loss: 127.3457
Epoch 18/50
 - 0s - loss: 122.9899
Epoch 19/50
 - 0s - loss: 126.1726
Epoch 20/50
 - 0s - loss: 120.7369
Epoch 21/50
 - 0s - loss: 115.6495
Epoch 22/50
 - 0s - loss: 115.7937
Epoch 23/50
 - 0s - loss: 115.6016
Epoch 24/50
 - 0s - loss: 113.8656
Epoch 25/50
 - 0s - loss: 111.9285
Epoch 26/50
 - 0s - loss: 112.1557
Epoch 27/50
 - 0s - loss: 113.2638
Epoch 28/50
 - 0s - loss: 115.7718
Epoch 29/50
 - 0s - loss: 

<keras.callbacks.History at 0x7fe38d5e8128>

In [13]:
# predicting with the model on the test data
tar_pred = model.predict(Pred_test)
tar_pred[0:4]

array([[53.505165],
       [29.467781],
       [33.13871 ],
       [31.178688]], dtype=float32)

In [14]:
# evaluating the model on the test data
eval_score = mean_squared_error(tar_test, tar_pred)
print('the evaluation score : {}'.format(eval_score))

the evaluation score : 105.76777529166097


In [15]:
# iteratng on processes 1-3 50 times
MSE_list = []
count = 0

while count <= 50:
    # train_test_split
    Pred_train, Pred_test, tar_train, tar_test = train_test_split(predictors, target, test_size=0.3, random_state=None)
    # model training
    model = regression_model()
    model.fit(Pred_train, tar_train, epochs=50, verbose=2)
    # predicting with the model
    tar_pred = model.predict(Pred_test)
    # evaluating the model
    eval_score = mean_squared_error(tar_test, tar_pred)
    # adding evaluation scores to the list
    MSE_list.append(eval_score)
    count += 1

Epoch 1/50
 - 0s - loss: 289779.7259
Epoch 2/50
 - 0s - loss: 175138.4447
Epoch 3/50
 - 0s - loss: 97006.0868
Epoch 4/50
 - 0s - loss: 48062.1376
Epoch 5/50
 - 0s - loss: 20634.6754
Epoch 6/50
 - 0s - loss: 7905.1689
Epoch 7/50
 - 0s - loss: 3520.0985
Epoch 8/50
 - 0s - loss: 2360.7719
Epoch 9/50
 - 0s - loss: 2135.8169
Epoch 10/50
 - 0s - loss: 2069.6165
Epoch 11/50
 - 0s - loss: 2008.4889
Epoch 12/50
 - 0s - loss: 1952.0181
Epoch 13/50
 - 0s - loss: 1888.2572
Epoch 14/50
 - 0s - loss: 1826.7548
Epoch 15/50
 - 0s - loss: 1766.8851
Epoch 16/50
 - 0s - loss: 1703.9097
Epoch 17/50
 - 0s - loss: 1644.6915
Epoch 18/50
 - 0s - loss: 1585.4201
Epoch 19/50
 - 0s - loss: 1526.5230
Epoch 20/50
 - 0s - loss: 1468.8251
Epoch 21/50
 - 0s - loss: 1413.3668
Epoch 22/50
 - 0s - loss: 1359.5617
Epoch 23/50
 - 0s - loss: 1310.6611
Epoch 24/50
 - 0s - loss: 1259.4747
Epoch 25/50
 - 0s - loss: 1213.9054
Epoch 26/50
 - 0s - loss: 1166.8050
Epoch 27/50
 - 0s - loss: 1125.1302
Epoch 28/50
 - 0s - loss: 1083

In [18]:
mean_MSE_list = mean(MSE_list)
std_MSE_list = stdev(MSE_list)
print('mean of the 50 mean squared errors : {}'.format(mean_MSE_list))
print('STD  of the 50 mean squared errors : {}'.format(std_MSE_list))

mean of the 50 mean squared errors : 399.52555809917203
STD  of the 50 mean squared errors : 666.901078048696


Part B: Normalization of Inputs

In [19]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()

In [20]:
# iteratng on processes 1-3 50 times
MSE_list_2 = []
count = 0

while count <= 50:
    # train_test_split
    Pred_train, Pred_test, tar_train, tar_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=None)
    # model training
    model = regression_model()
    model.fit(Pred_train, tar_train, epochs=50, verbose=2)
    # predicting with the model
    tar_pred = model.predict(Pred_test)
    # evaluating the model
    eval_score = mean_squared_error(tar_test, tar_pred)
    # adding evaluation scores to the list
    MSE_list_2.append(eval_score)
    count += 1

Epoch 1/50
 - 3s - loss: 1533.5918
Epoch 2/50
 - 0s - loss: 1516.8531
Epoch 3/50
 - 0s - loss: 1500.4828
Epoch 4/50
 - 0s - loss: 1484.1348
Epoch 5/50
 - 0s - loss: 1467.4429
Epoch 6/50
 - 0s - loss: 1450.3361
Epoch 7/50
 - 0s - loss: 1432.9179
Epoch 8/50
 - 0s - loss: 1414.5365
Epoch 9/50
 - 0s - loss: 1395.8704
Epoch 10/50
 - 0s - loss: 1376.2281
Epoch 11/50
 - 0s - loss: 1355.5264
Epoch 12/50
 - 0s - loss: 1333.7216
Epoch 13/50
 - 0s - loss: 1311.4267
Epoch 14/50
 - 0s - loss: 1287.2809
Epoch 15/50
 - 0s - loss: 1262.7838
Epoch 16/50
 - 0s - loss: 1237.3411
Epoch 17/50
 - 0s - loss: 1211.0101
Epoch 18/50
 - 0s - loss: 1183.6927
Epoch 19/50
 - 0s - loss: 1155.4820
Epoch 20/50
 - 0s - loss: 1126.4140
Epoch 21/50
 - 0s - loss: 1096.2510
Epoch 22/50
 - 0s - loss: 1065.1739
Epoch 23/50
 - 0s - loss: 1033.8538
Epoch 24/50
 - 0s - loss: 1001.5897
Epoch 25/50
 - 0s - loss: 969.1925
Epoch 26/50
 - 0s - loss: 935.7726
Epoch 27/50
 - 0s - loss: 902.7700
Epoch 28/50
 - 0s - loss: 869.4085
Epoch

In [22]:
mean_MSE_list_2 = mean(MSE_list_2)
std_MSE_list_2 = stdev(MSE_list_2)
print('mean of the 50 mean squared errors : {}'.format(mean_MSE_list_2))
print('STD  of the 50 mean squared errors : {}'.format(std_MSE_list_2))
print('The mean of the mean squared errors in part B : {0} is smaller than that in part A : {1}'.format(mean_MSE_list_2,
                                                                        mean_MSE_list))

mean of the 50 mean squared errors : 356.963596100719
STD  of the 50 mean squared errors : 96.50702886795585
The mean of the mean squared errors in part B : 356.963596100719 is smaller than that in part A : 399.52555809917203


Part C: Increasing the number of epochs

In [23]:
# iterating through steps 1 through 3 50 times
MSE_list_3 = []
count = 0

while count <= 50:
    # train_test_split
    Pred_train, Pred_test, tar_train, tar_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=None)
    # model training
    model = regression_model()
    model.fit(Pred_train, tar_train, epochs=100, verbose=2)
    # predicting with the model
    tar_pred = model.predict(Pred_test)
    # evaluating the model
    eval_score = mean_squared_error(tar_test, tar_pred)
    # adding evaluation scores to the list
    MSE_list_3.append(eval_score)
    count += 1

Epoch 1/100
 - 6s - loss: 1650.2095
Epoch 2/100
 - 0s - loss: 1632.5408
Epoch 3/100
 - 0s - loss: 1615.8052
Epoch 4/100
 - 0s - loss: 1599.6953
Epoch 5/100
 - 0s - loss: 1584.1264
Epoch 6/100
 - 0s - loss: 1568.8932
Epoch 7/100
 - 0s - loss: 1553.7262
Epoch 8/100
 - 0s - loss: 1538.6284
Epoch 9/100
 - 0s - loss: 1523.1137
Epoch 10/100
 - 0s - loss: 1507.5174
Epoch 11/100
 - 0s - loss: 1491.3331
Epoch 12/100
 - 0s - loss: 1474.7063
Epoch 13/100
 - 0s - loss: 1457.2174
Epoch 14/100
 - 0s - loss: 1439.1818
Epoch 15/100
 - 0s - loss: 1420.5149
Epoch 16/100
 - 0s - loss: 1400.8347
Epoch 17/100
 - 0s - loss: 1380.8972
Epoch 18/100
 - 0s - loss: 1359.5529
Epoch 19/100
 - 0s - loss: 1337.7262
Epoch 20/100
 - 0s - loss: 1315.0363
Epoch 21/100
 - 0s - loss: 1291.2119
Epoch 22/100
 - 0s - loss: 1266.9859
Epoch 23/100
 - 0s - loss: 1242.5580
Epoch 24/100
 - 0s - loss: 1216.5741
Epoch 25/100
 - 0s - loss: 1190.2095
Epoch 26/100
 - 0s - loss: 1163.0927
Epoch 27/100
 - 0s - loss: 1135.5106
Epoch 28/1

In [26]:
mean_MSE_list_3 = mean(MSE_list_3)
std_MSE_list_3 = stdev(MSE_list_3)
print('mean of the 50 mean squared errors : {}'.format(mean_MSE_list_3))
print('STD  of the 50 mean squared errors : {}'.format(std_MSE_list_3))
print('The mean of the mean squared errors in part C : {0} is smaller than that in part B : {1}'.format(mean_MSE_list_3,
                                                                        mean_MSE_list_2))

mean of the 50 mean squared errors : 163.9575668496406
STD  of the 50 mean squared errors : 14.258124621326894
The mean of the mean squared errors in part C : 163.9575668496406 is smaller than that in part B : 356.963596100719


D. Increasing the Number of Hidden Layers

In [27]:
# developing a neural network with 3 hiddden layers
def deep_regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_input_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    # compiling the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [28]:
# iterating through steps 1 through 3 50 times
MSE_list_4 = []
count = 0

while count <= 50:
    # train_test_split
    Pred_train, Pred_test, tar_train, tar_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=None)
    # model training
    model = deep_regression_model()
    model.fit(Pred_train, tar_train, epochs=50, verbose=2)
    # predicting with the model
    tar_pred = model.predict(Pred_test)
    # evaluating the model
    eval_score = mean_squared_error(tar_test, tar_pred)
    # adding evaluation scores to the list
    MSE_list_4.append(eval_score)
    count += 1

Epoch 1/50
 - 9s - loss: 1542.8048
Epoch 2/50
 - 0s - loss: 1513.2196
Epoch 3/50
 - 0s - loss: 1469.0523
Epoch 4/50
 - 0s - loss: 1399.5691
Epoch 5/50
 - 0s - loss: 1296.9260
Epoch 6/50
 - 0s - loss: 1149.9726
Epoch 7/50
 - 0s - loss: 955.0985
Epoch 8/50
 - 0s - loss: 718.4217
Epoch 9/50
 - 0s - loss: 473.2317
Epoch 10/50
 - 0s - loss: 312.6740
Epoch 11/50
 - 0s - loss: 259.9045
Epoch 12/50
 - 0s - loss: 239.4239
Epoch 13/50
 - 0s - loss: 224.9499
Epoch 14/50
 - 0s - loss: 213.3777
Epoch 15/50
 - 0s - loss: 204.6328
Epoch 16/50
 - 0s - loss: 198.3118
Epoch 17/50
 - 0s - loss: 191.9961
Epoch 18/50
 - 0s - loss: 186.8287
Epoch 19/50
 - 0s - loss: 182.7473
Epoch 20/50
 - 0s - loss: 178.8384
Epoch 21/50
 - 0s - loss: 175.5195
Epoch 22/50
 - 0s - loss: 172.5397
Epoch 23/50
 - 0s - loss: 169.4979
Epoch 24/50
 - 0s - loss: 166.9730
Epoch 25/50
 - 0s - loss: 164.4888
Epoch 26/50
 - 0s - loss: 162.4566
Epoch 27/50
 - 0s - loss: 159.9210
Epoch 28/50
 - 0s - loss: 158.2955
Epoch 29/50
 - 0s - los

In [29]:
mean_MSE_list_4 = mean(MSE_list_4)
std_MSE_list_4 = stdev(MSE_list_4)
print('mean of the 50 mean squared errors : {}'.format(mean_MSE_list_4))
print('STD  of the 50 mean squared errors : {}'.format(std_MSE_list_4))
print('The mean of the mean squared errors in part D : {0} is smaller than that in part B : {1}'.format(mean_MSE_list_4,
                                                                        mean_MSE_list_2))

mean of the 50 mean squared errors : 128.74664759144738
STD  of the 50 mean squared errors : 16.89059079987207
The mean of the mean squared errors in part D : 128.74664759144738 is smaller than that in part B : 356.963596100719


THE END!!!!