### **Building a Regression model in Keras**
#### **A: Baseline model**
- One hidden layer of 10 nodes, and ReLU activation function
- Adam optimizer and MSE as the loss function

Preprocessing the data.

In [1]:
import pandas as pd
import numpy as np
import sklearn

concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [2]:
concrete_data.shape

(1030, 9)

In [3]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [4]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [5]:
# Splitting the data in predictors and targets

concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']

predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [6]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [7]:
# Number of predictors
n_cols = predictors.shape[1]
n_cols

8

Splitting the data in Train and Test.

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=4)
print('Train set: ', X_train.shape, y_train.shape)
print('Test set: ', X_test.shape, y_test.shape)

Train set:  (721, 8) (721,)
Test set:  (309, 8) (309,)


Building and training the model for 50 epochs

In [9]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [10]:
def regression_model():
    model = Sequential()

    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [11]:
epochs = 50
model = regression_model()
model.fit(X_train, y_train, epochs=epochs, verbose=2)

Epoch 1/50
23/23 - 1s - loss: 262519.2500 - 1s/epoch - 54ms/step
Epoch 2/50
23/23 - 0s - loss: 168866.5156 - 121ms/epoch - 5ms/step
Epoch 3/50
23/23 - 0s - loss: 102097.0625 - 119ms/epoch - 5ms/step
Epoch 4/50
23/23 - 0s - loss: 55590.5391 - 110ms/epoch - 5ms/step
Epoch 5/50
23/23 - 0s - loss: 26104.2422 - 105ms/epoch - 5ms/step
Epoch 6/50
23/23 - 0s - loss: 10252.0176 - 99ms/epoch - 4ms/step
Epoch 7/50
23/23 - 0s - loss: 3448.0710 - 106ms/epoch - 5ms/step
Epoch 8/50
23/23 - 0s - loss: 1297.6261 - 114ms/epoch - 5ms/step
Epoch 9/50
23/23 - 0s - loss: 801.7653 - 102ms/epoch - 4ms/step
Epoch 10/50
23/23 - 0s - loss: 711.0850 - 100ms/epoch - 4ms/step
Epoch 11/50
23/23 - 0s - loss: 683.0016 - 109ms/epoch - 5ms/step
Epoch 12/50
23/23 - 0s - loss: 659.8395 - 109ms/epoch - 5ms/step
Epoch 13/50
23/23 - 0s - loss: 637.3511 - 103ms/epoch - 4ms/step
Epoch 14/50
23/23 - 0s - loss: 615.1925 - 110ms/epoch - 5ms/step
Epoch 15/50
23/23 - 0s - loss: 593.0762 - 109ms/epoch - 5ms/step
Epoch 16/50
23/23 - 

<keras.callbacks.History at 0x23ac750f250>

Evaluating the model and creating a list of the MSE for each epoch.


In [14]:
from sklearn.metrics import mean_squared_error

pred = model.predict(X_test)
mse = mean_squared_error(y_test, pred)
print('MSE: ', mse)

MSE:  222.26661838738417


In [15]:
# Creating a list of MSE
MSE = []

for i in range(epochs):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=4)
    model = regression_model()
    model.fit(X_train, y_train, epochs=epochs, verbose=2)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred) 
    MSE.append(mse)

Epoch 1/50
23/23 - 1s - loss: 431865.2500 - 583ms/epoch - 25ms/step
Epoch 2/50
23/23 - 0s - loss: 211520.2031 - 101ms/epoch - 4ms/step
Epoch 3/50
23/23 - 0s - loss: 86031.8203 - 111ms/epoch - 5ms/step
Epoch 4/50
23/23 - 0s - loss: 28003.4004 - 108ms/epoch - 5ms/step
Epoch 5/50
23/23 - 0s - loss: 7334.1553 - 103ms/epoch - 4ms/step
Epoch 6/50
23/23 - 0s - loss: 3686.0984 - 102ms/epoch - 4ms/step
Epoch 7/50
23/23 - 0s - loss: 3174.0410 - 99ms/epoch - 4ms/step
Epoch 8/50
23/23 - 0s - loss: 2740.1826 - 102ms/epoch - 4ms/step
Epoch 9/50
23/23 - 0s - loss: 2348.7744 - 104ms/epoch - 5ms/step
Epoch 10/50
23/23 - 0s - loss: 2013.5524 - 101ms/epoch - 4ms/step
Epoch 11/50
23/23 - 0s - loss: 1771.8429 - 100ms/epoch - 4ms/step
Epoch 12/50
23/23 - 0s - loss: 1614.5792 - 108ms/epoch - 5ms/step
Epoch 13/50
23/23 - 0s - loss: 1510.0082 - 111ms/epoch - 5ms/step
Epoch 14/50
23/23 - 0s - loss: 1418.0485 - 103ms/epoch - 4ms/step
Epoch 15/50
23/23 - 0s - loss: 1349.9257 - 113ms/epoch - 5ms/step
Epoch 16/50
2

Mean and standard deviation of MSE.

In [16]:
# Mean and Standard Deviation of MSE 
mean = np.mean(MSE)
standard_deviation = np.std(MSE)
for i in range(epochs):
    print(f'MSE {i+1}: {MSE[i]}')
print(f'Mean of the MSE: {mean}')
print(f'Standard deviation of the MSE: {standard_deviation}')

MSE 1: 419.93933382374667
MSE 2: 1807.8980276298348
MSE 3: 475.63733525849045
MSE 4: 1565.8050394698046
MSE 5: 132.56721946042867
MSE 6: 355.368564738967
MSE 7: 288.13400617899606
MSE 8: 96.68318213668147
MSE 9: 749.5173034331898
MSE 10: 273.9049237831664
MSE 11: 328.1230581575632
MSE 12: 1219.2429913493977
MSE 13: 270.79428241478445
MSE 14: 135.44526701138818
MSE 15: 122.66642321725477
MSE 16: 558.4519074870631
MSE 17: 375.003282789726
MSE 18: 212.67624421789847
MSE 19: 836.6647708541282
MSE 20: 224.0133539684871
MSE 21: 172.06468159685537
MSE 22: 248.88051692376803
MSE 23: 256.4968162548248
MSE 24: 105.2236716599004
MSE 25: 818.9492103561264
MSE 26: 610.3464831377323
MSE 27: 112.32913050881596
MSE 28: 274.31816122896873
MSE 29: 230.61308941609394
MSE 30: 194.88302778055692
MSE 31: 252.28266117284693
MSE 32: 618.8414642979054
MSE 33: 392.67296629671
MSE 34: 656.7428348440627
MSE 35: 158.48667564476102
MSE 36: 309.13297690464407
MSE 37: 470.6975469431432
MSE 38: 429.33769823053046
MSE 