Regression Model

Importing necessary libraries


In [0]:
import pandas as pd
import numpy as np

Downloading the dataset

In [3]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Checking the data points

In [4]:
concrete_data.shape

(1030, 9)

In [5]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


Checking if there is any missing value

In [6]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

Splitting the data into predict and target

The target variable in this problem is the concrete sample strength. Therefore, our predictors will be all the other columns.

In [0]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [8]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [9]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [0]:
n_cols = predictors.shape[1] # number of predictors

Importing Keras library

In [0]:
import keras

Let's import the rest of the packages from the Keras library that we will need to build our regressoin model.

In [0]:
from keras.models import Sequential
from keras.layers import Dense

Building a neural network with 1 hidden layer and 10 nodes

In [0]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

Train and Test the Network

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=50)

In [0]:
# build the model
model = regression_model()

We will train and test the model at the same time using the fit method. We will leave out 30% of the data for validation and we will train the model for 50 epochs.

In [37]:
# fit the model
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
 - 0s - loss: 87.1800
Epoch 2/50
 - 0s - loss: 89.6786
Epoch 3/50
 - 0s - loss: 88.3034
Epoch 4/50
 - 0s - loss: 88.3481
Epoch 5/50
 - 0s - loss: 87.0552
Epoch 6/50
 - 0s - loss: 86.8723
Epoch 7/50
 - 0s - loss: 86.6308
Epoch 8/50
 - 0s - loss: 90.2735
Epoch 9/50
 - 0s - loss: 86.0909
Epoch 10/50
 - 0s - loss: 87.8368
Epoch 11/50
 - 0s - loss: 85.2250
Epoch 12/50
 - 0s - loss: 84.9759
Epoch 13/50
 - 0s - loss: 86.2335
Epoch 14/50
 - 0s - loss: 85.1274
Epoch 15/50
 - 0s - loss: 85.7146
Epoch 16/50
 - 0s - loss: 85.0060
Epoch 17/50
 - 0s - loss: 83.5413
Epoch 18/50
 - 0s - loss: 85.0264
Epoch 19/50
 - 0s - loss: 82.6011
Epoch 20/50
 - 0s - loss: 91.2974
Epoch 21/50
 - 0s - loss: 82.7582
Epoch 22/50
 - 0s - loss: 83.6161
Epoch 23/50
 - 0s - loss: 86.7548
Epoch 24/50
 - 0s - loss: 84.9756
Epoch 25/50
 - 0s - loss: 85.7359
Epoch 26/50
 - 0s - loss: 85.0374
Epoch 27/50
 - 0s - loss: 84.0055
Epoch 28/50
 - 0s - loss: 84.0424
Epoch 29/50
 - 0s - loss: 83.0825
Epoch 30/50
 - 0s - los

<keras.callbacks.History at 0x7f3f55942dd8>

In [40]:
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



68.83232456812195

In [0]:
from sklearn.metrics import mean_squared_error

In [41]:
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

68.83230885189136 0.0


In [53]:
total_mean_squared_errors = 50
epochs = 50
mean_squared_errors = []
for i in range(0, total_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("MSE "+str(i+1)+": "+str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)

print('\n')
print("The mean and standard deviation of " +str(total_mean_squared_errors) + " mean squared errors without normalized data. Total number of epochs for each training is: " +str(epochs) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

MSE 1: 121.43323324259045
MSE 2: 125.98502860948878
MSE 3: 108.49003435647218
MSE 4: 122.1613242387
MSE 5: 128.99156791415416
MSE 6: 114.88805589398135
MSE 7: 133.67700037292678
MSE 8: 111.53696429305091
MSE 9: 119.7209675612959
MSE 10: 110.46636483893039
MSE 11: 106.2248464541142
MSE 12: 102.7374469423757
MSE 13: 122.09114810486828
MSE 14: 126.5531769786452
MSE 15: 120.22369927958763
MSE 16: 106.0911969922507
MSE 17: 115.64568062198973
MSE 18: 95.41838288538665
MSE 19: 102.01243556741758
MSE 20: 137.67284494541997
MSE 21: 96.83801037439636
MSE 22: 103.02096073681483
MSE 23: 107.65140447030176
MSE 24: 113.47274711139765
MSE 25: 109.10924873228598
MSE 26: 99.55340655181786
MSE 27: 120.13640893238647
MSE 28: 109.62818074149222
MSE 29: 109.73864610295466
MSE 30: 113.6255733897385
MSE 31: 134.54515041116758
MSE 32: 109.03935045408971
MSE 33: 108.7211844435016
MSE 34: 115.34181781262642
MSE 35: 113.87342752993686
MSE 36: 128.93515335626202
MSE 37: 119.3651342052472
MSE 38: 124.5325781072227

Building the Regression model after normalizing the data

In [43]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [0]:
n_cols = predictors_norm.shape[1] # number of predictors

In [0]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [0]:
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=50)

In [0]:
# build the model
model = regression_model()

In [48]:
# fit the model
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
 - 0s - loss: 1612.1412
Epoch 2/50
 - 0s - loss: 1592.6192
Epoch 3/50
 - 0s - loss: 1573.0013
Epoch 4/50
 - 0s - loss: 1553.7229
Epoch 5/50
 - 0s - loss: 1533.9986
Epoch 6/50
 - 0s - loss: 1514.4235
Epoch 7/50
 - 0s - loss: 1493.9547
Epoch 8/50
 - 0s - loss: 1473.6174
Epoch 9/50
 - 0s - loss: 1452.8788
Epoch 10/50
 - 0s - loss: 1431.6321
Epoch 11/50
 - 0s - loss: 1410.1653
Epoch 12/50
 - 0s - loss: 1388.2046
Epoch 13/50
 - 0s - loss: 1365.6689
Epoch 14/50
 - 0s - loss: 1343.0277
Epoch 15/50
 - 0s - loss: 1319.8531
Epoch 16/50
 - 0s - loss: 1295.8838
Epoch 17/50
 - 0s - loss: 1271.9099
Epoch 18/50
 - 0s - loss: 1247.1781
Epoch 19/50
 - 0s - loss: 1222.3711
Epoch 20/50
 - 0s - loss: 1196.9587
Epoch 21/50
 - 0s - loss: 1171.1736
Epoch 22/50
 - 0s - loss: 1144.8882
Epoch 23/50
 - 0s - loss: 1118.7425
Epoch 24/50
 - 0s - loss: 1091.8502
Epoch 25/50
 - 0s - loss: 1064.7763
Epoch 26/50
 - 0s - loss: 1036.8692
Epoch 27/50
 - 0s - loss: 1008.8274
Epoch 28/50
 - 0s - loss: 980.5233
Ep

<keras.callbacks.History at 0x7f3f55843fd0>

In [49]:
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



415.24904239987865

In [50]:
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

415.2490327323405 0.0


In [62]:
total_mean_squared_errors = 50
epochs = 50
mean_squared_errors = []
for i in range(0, total_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("MSE "+str(i+1)+": "+str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)

print('\n')
print("The mean and standard deviation of " +str(total_mean_squared_errors) + " mean squared errors with normalized data. Total number of epochs for each training is: " +str(epochs) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

MSE 1: 29.815468390011095
MSE 2: 32.30326516111306
MSE 3: 27.198698006787346
MSE 4: 28.756561791626766
MSE 5: 28.788457605059477
MSE 6: 31.065459347079873
MSE 7: 32.5699252279831
MSE 8: 27.208365307656692
MSE 9: 26.54239079250101
MSE 10: 25.788700190176854
MSE 11: 27.91947303697901
MSE 12: 25.90780011272739
MSE 13: 30.47969720433059
MSE 14: 32.68658732442022
MSE 15: 27.855955253527004
MSE 16: 23.204137147823197
MSE 17: 31.761770279276334
MSE 18: 29.42549879574081
MSE 19: 25.566907104936618
MSE 20: 29.421157256685028
MSE 21: 25.69325196472958
MSE 22: 26.055733381351608
MSE 23: 22.333469082236675
MSE 24: 26.599733420560273
MSE 25: 28.944660205285526
MSE 26: 31.015688207928804
MSE 27: 25.337462082649896
MSE 28: 29.47295006810654
MSE 29: 32.347600967752896
MSE 30: 31.407273968446603
MSE 31: 27.765325861069762
MSE 32: 27.742371784444767
MSE 33: 25.690979837213906
MSE 34: 30.862842547469153
MSE 35: 30.753989358550136
MSE 36: 34.62705156641099
MSE 37: 25.117914773885484
MSE 38: 30.19874809783

Building the regression model for 100 epochs

In [55]:
# fit the model
model.fit(X_train, y_train, epochs=100, verbose=2)

Epoch 1/100
 - 0s - loss: 110.7092
Epoch 2/100
 - 0s - loss: 108.1158
Epoch 3/100
 - 0s - loss: 111.6648
Epoch 4/100
 - 0s - loss: 113.3355
Epoch 5/100
 - 0s - loss: 110.4007
Epoch 6/100
 - 0s - loss: 109.3680
Epoch 7/100
 - 0s - loss: 109.7805
Epoch 8/100
 - 0s - loss: 110.6089
Epoch 9/100
 - 0s - loss: 111.8075
Epoch 10/100
 - 0s - loss: 108.3212
Epoch 11/100
 - 0s - loss: 109.2942
Epoch 12/100
 - 0s - loss: 111.3627
Epoch 13/100
 - 0s - loss: 110.0460
Epoch 14/100
 - 0s - loss: 109.3859
Epoch 15/100
 - 0s - loss: 110.3112
Epoch 16/100
 - 0s - loss: 112.1492
Epoch 17/100
 - 0s - loss: 111.5367
Epoch 18/100
 - 0s - loss: 112.3629
Epoch 19/100
 - 0s - loss: 109.5410
Epoch 20/100
 - 0s - loss: 107.8958
Epoch 21/100
 - 0s - loss: 109.1371
Epoch 22/100
 - 0s - loss: 107.2021
Epoch 23/100
 - 0s - loss: 110.3646
Epoch 24/100
 - 0s - loss: 108.0347
Epoch 25/100
 - 0s - loss: 108.4428
Epoch 26/100
 - 0s - loss: 109.2312
Epoch 27/100
 - 0s - loss: 112.3473
Epoch 28/100
 - 0s - loss: 110.8765
E

<keras.callbacks.History at 0x7f3f5e6929e8>

In [56]:
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



111.1146433808657

In [58]:
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

111.11463957735994 0.0


In [60]:
total_mean_squared_errors = 100
epochs = 100
mean_squared_errors = []
for i in range(0, total_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("MSE "+str(i+1)+": "+str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)

print('\n')
print("The mean and standard deviation of " +str(total_mean_squared_errors) + " mean squared errors with normalized data. Total number of epochs for each training is: " +str(epochs) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

MSE 1: 35.22321002305904
MSE 2: 36.3320466013788
MSE 3: 29.678967719710762
MSE 4: 35.09889637227969
MSE 5: 32.92433418347998
MSE 6: 35.5810406015143
MSE 7: 38.03861295829699
MSE 8: 30.04869376494275
MSE 9: 32.07011307404651
MSE 10: 29.84126327415886
MSE 11: 33.91164066722092
MSE 12: 30.524408525633582
MSE 13: 37.17141735360846
MSE 14: 37.25996163130578
MSE 15: 31.150511238567265
MSE 16: 27.843720544117556
MSE 17: 35.15839579807516
MSE 18: 33.295754534526935
MSE 19: 30.439382670380922
MSE 20: 32.38250351569413
MSE 21: 29.48962391232981
MSE 22: 28.96445990380346
MSE 23: 27.725475379178437
MSE 24: 29.471580295501017
MSE 25: 33.04478580511889
MSE 26: 33.51415989699873
MSE 27: 30.93830674057254
MSE 28: 31.665188909734336
MSE 29: 36.04322756918503
MSE 30: 33.87136980322187
MSE 31: 31.647444234311003
MSE 32: 31.018880967569196
MSE 33: 28.183113777135954
MSE 34: 33.68215395486085
MSE 35: 34.79842396461462
MSE 36: 36.64025264727645
MSE 37: 29.892236012085355
MSE 38: 33.8984919427668
MSE 39: 30.