# Build a Regression Model in Keras Project

first we import the needed libraries

In [11]:
import pandas as pd
import numpy as np
import keras
from keras.layers import Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

Lets import our data

In [2]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Let's Set out our predictor and target dataframe

In [4]:
predictors = concrete_data.drop(['Age','Strength'],axis=1) # all columns except Strength
target = concrete_data['Strength']

In [5]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5


In [6]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

Finally, the last step is to normalize the data by substracting the mean and dividing by the standard deviation.

In [7]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569


Let's save the number of predictors to n_cols since we will need this number when building our network.

In [8]:
n_cols = predictors_norm.shape[1] # number of predictors

Let's split our data into train test

In [19]:
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.30, random_state=42)

Let's make our model

In [12]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [13]:
model = regression_model()

Instructions for updating:
Colocations handled automatically by placer.


### A. Build a baseline model

In [20]:
mselist = []
for i in range (50):
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)
    y_hat = model.predict(X_test)
    merror = mean_squared_error(y_test, y_hat)
    mselist.append(merror)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 0s - loss: 90435602.6667 - val_loss: 70542313.2535
Epoch 2/50
 - 0s - loss: 59319498.3492 - val_loss: 47671146.6544
Epoch 3/50
 - 0s - loss: 41472181.7778 - val_loss: 34551880.4793
Epoch 4/50
 - 0s - loss: 30847555.5238 - val_loss: 26397739.0691
Epoch 5/50
 - 0s - loss: 23974833.3968 - val_loss: 20819920.1198
Epoch 6/50
 - 0s - loss: 19131988.4286 - val_loss: 16785986.0184
Epoch 7/50
 - 0s - loss: 15557825.8571 - val_loss: 13750139.3272
Epoch 8/50
 - 0s - loss: 12839452.7937 - val_loss: 11421435.3318
Epoch 9/50
 - 0s - loss: 10708592.6190 - val_loss: 9611202.0461
Epoch 10/50
 - 0s - loss: 9062561.0476 - val_loss: 8206799.6820
Epoch 11/50
 - 0s - loss: 7785739.1667 - val_loss: 7101934.0300
Epoch 12/50
 - 0s - loss: 6780752.9762 - val_loss: 6240852.8479
Epoch 13/50
 - 0s - loss: 6013097.3413 - val_loss: 5587841.6774
Epoch 14/50
 - 0s - loss: 5417205.4365 - val_loss: 5083740.6382
Epoch 15/50
 - 0s - loss: 4949189.3016 - val_loss:

In [21]:
msearray = np.array(mselist)

In [22]:
msearray.std()

107471.25329256282

In [23]:
msearray.mean()

18583.072313748966

### B. Normalize the data 

we already normalized the predictors let's put them in the train test split

In [25]:
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.30, random_state=42)

let's repeat everything again

In [26]:
mselist = []
for i in range (50):
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)
    y_hat = model.predict(X_test)
    merror = mean_squared_error(y_test, y_hat)
    mselist.append(merror)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 1s - loss: 234.7386 - val_loss: 184.7347
Epoch 2/50
 - 0s - loss: 162.3460 - val_loss: 155.3946
Epoch 3/50
 - 0s - loss: 135.1837 - val_loss: 148.1835
Epoch 4/50
 - 0s - loss: 126.6529 - val_loss: 147.0041
Epoch 5/50
 - 0s - loss: 123.1704 - val_loss: 146.8894
Epoch 6/50
 - 0s - loss: 121.7601 - val_loss: 147.0070
Epoch 7/50
 - 0s - loss: 121.1770 - val_loss: 147.1144
Epoch 8/50
 - 0s - loss: 120.7206 - val_loss: 147.4871
Epoch 9/50
 - 0s - loss: 120.2306 - val_loss: 147.4262
Epoch 10/50
 - 0s - loss: 120.0847 - val_loss: 147.6039
Epoch 11/50
 - 0s - loss: 119.8083 - val_loss: 147.4592
Epoch 12/50
 - 0s - loss: 119.6999 - val_loss: 147.6152
Epoch 13/50
 - 0s - loss: 119.5026 - val_loss: 147.6635
Epoch 14/50
 - 0s - loss: 119.3853 - val_loss: 147.7739
Epoch 15/50
 - 0s - loss: 119.2896 - val_loss: 147.6273
Epoch 16/50
 - 0s - loss: 119.2486 - val_loss: 147.9507
Epoch 17/50
 - 0s - loss: 119.1307 - val_loss: 147.7455
Epoch 18/50

In [27]:
msearray = np.array(mselist)

In [28]:
msearray.std()

2.07927152315083

In [29]:
msearray.mean()

154.46746621075533

### C. Increase the number of epochs

In [30]:
mselist = []
for i in range (50):
    model.fit(X_train, y_train, validation_split=0.3, epochs=100, verbose=2)
    y_hat = model.predict(X_test)
    merror = mean_squared_error(y_test, y_hat)
    mselist.append(merror)

Train on 504 samples, validate on 217 samples
Epoch 1/100
 - 0s - loss: 110.9615 - val_loss: 146.2366
Epoch 2/100
 - 0s - loss: 110.9701 - val_loss: 146.2984
Epoch 3/100
 - 0s - loss: 110.9546 - val_loss: 146.4175
Epoch 4/100
 - 0s - loss: 110.9974 - val_loss: 146.3601
Epoch 5/100
 - 0s - loss: 111.0116 - val_loss: 146.3681
Epoch 6/100
 - 0s - loss: 111.0600 - val_loss: 146.2138
Epoch 7/100
 - 0s - loss: 111.0149 - val_loss: 146.3642
Epoch 8/100
 - 0s - loss: 110.9450 - val_loss: 146.2086
Epoch 9/100
 - 0s - loss: 111.0634 - val_loss: 146.5903
Epoch 10/100
 - 0s - loss: 110.9515 - val_loss: 146.5708
Epoch 11/100
 - 0s - loss: 110.9356 - val_loss: 146.4187
Epoch 12/100
 - 0s - loss: 110.9491 - val_loss: 146.1252
Epoch 13/100
 - 0s - loss: 110.9349 - val_loss: 146.3150
Epoch 14/100
 - 0s - loss: 110.9924 - val_loss: 146.5509
Epoch 15/100
 - 0s - loss: 110.9174 - val_loss: 146.2870
Epoch 16/100
 - 0s - loss: 110.9019 - val_loss: 146.3245
Epoch 17/100
 - 0s - loss: 110.9929 - val_loss: 146

In [31]:
msearray = np.array(mselist)

In [32]:
msearray.std()

1.3820608523820288

In [33]:
msearray.mean()

154.32117873504575

### D. Increase the number of hidden layers 

In [34]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [35]:
model = regression_model()

In [36]:
mselist = []
for i in range (50):
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)
    y_hat = model.predict(X_test)
    merror = mean_squared_error(y_test, y_hat)
    mselist.append(merror)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 3s - loss: 1613.0982 - val_loss: 1479.1263
Epoch 2/50
 - 0s - loss: 1591.7135 - val_loss: 1455.0994
Epoch 3/50
 - 0s - loss: 1564.6291 - val_loss: 1422.5267
Epoch 4/50
 - 0s - loss: 1526.1311 - val_loss: 1376.7481
Epoch 5/50
 - 0s - loss: 1470.1265 - val_loss: 1308.8865
Epoch 6/50
 - 0s - loss: 1390.5028 - val_loss: 1224.3155
Epoch 7/50
 - 0s - loss: 1292.1030 - val_loss: 1117.4283
Epoch 8/50
 - 0s - loss: 1171.6462 - val_loss: 984.4861
Epoch 9/50
 - 0s - loss: 1024.4700 - val_loss: 833.6759
Epoch 10/50
 - 0s - loss: 863.3951 - val_loss: 672.0451
Epoch 11/50
 - 0s - loss: 696.6424 - val_loss: 518.3640
Epoch 12/50
 - 0s - loss: 546.8413 - val_loss: 392.2769
Epoch 13/50
 - 0s - loss: 426.3844 - val_loss: 310.2156
Epoch 14/50
 - 0s - loss: 356.0561 - val_loss: 262.7636
Epoch 15/50
 - 0s - loss: 310.4504 - val_loss: 241.9503
Epoch 16/50
 - 0s - loss: 288.0405 - val_loss: 230.5271
Epoch 17/50
 - 0s - loss: 272.6806 - val_loss: 223.

In [37]:
msearray = np.array(mselist)

In [38]:
msearray.std()

4.406344368944371

In [39]:
msearray.mean()

164.7011520127479