# Regression model with Keras

### Part A. Building a baseline model

Importing necessary libraries and loading data

In [1]:
import pandas as pd
import numpy as np

In [2]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
concrete_data.shape

(1030, 9)

In [4]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [5]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [6]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

Installing tenzor flow and Keras

In [7]:
pip install keras-tuner --upgrade

Requirement already up-to-date: keras-tuner in c:\users\sdovl\anaconda3\lib\site-packages (1.0.3)
Note: you may need to restart the kernel to use updated packages.


In [8]:
import keras_tuner as kt
from tensorflow import keras

Creating regression model

In [9]:
from keras.models import Sequential
from keras.layers import Dense

In [10]:
n_cols = predictors.shape[1] 

In [11]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [12]:
# build the model
model = regression_model()

###### 1. splitting data on train and test

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

###### 2.training model on train data

In [14]:
# fit the model
model.fit(X_train, y_train, epochs=50, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x276ab371d30>

###### 3. Evaluating model on test data

In [15]:
loss_val = model.evaluate(X_test, y_test)
loss_val



141.71075439453125

In [16]:
y_pred = model.predict(X_test)

###### computing mse between the predicted and actual values

In [17]:
from sklearn.metrics import mean_squared_error

In [18]:
mse = mean_squared_error(y_test, y_pred)
mse

141.7107456381933

###### 4. Repeating steps 1-3 50 times and create list of 50 mse

In [19]:
number_mean_squared_errors = 50
epochs = 50
mean_squared_errors = []
for i in range(0, number_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    loss_val = model.evaluate(X_test, y_test, verbose=0)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mse)
    print(" mse "+str(i+1)+": "+str(mse))


 mse 1: 102.87576218231374
 mse 2: 124.88398038261636
 mse 3: 110.28336201855194
 mse 4: 125.38271157757856
 mse 5: 178.3420343425277
 mse 6: 124.72671327994989
 mse 7: 140.09188140395645
 mse 8: 98.42864240989897
 mse 9: 127.79374981363425
 mse 10: 111.77583160508048
 mse 11: 105.81989479934249
 mse 12: 100.50196359175317
 mse 13: 123.0247908311249
 mse 14: 116.4839108217834
 mse 15: 111.30093135538056
 mse 16: 111.67928397376943
 mse 17: 110.3044262682245
 mse 18: 94.30261464067058
 mse 19: 95.70191120544594
 mse 20: 136.26906846715522
 mse 21: 96.01200799803196
 mse 22: 102.41676054789457
 mse 23: 128.97599184088097
 mse 24: 102.18383210549864
 mse 25: 107.36480271904328
 mse 26: 99.08158799583123
 mse 27: 119.0032372206765
 mse 28: 108.020992725166
 mse 29: 110.43277844641572
 mse 30: 111.65577626689266
 mse 31: 131.64830236637025
 mse 32: 104.29490240541064
 mse 33: 105.77403713310885
 mse 34: 117.49138799534019
 mse 35: 112.36784243221243
 mse 36: 124.54497560003361
 mse 37: 120.

###### 5.Finding mean and variance of MSE

In [20]:
mse = np.array(mse)
mean = np.mean(mse)
standard_deviation = np.std(mse)

print('\n')
print("The mean and standard deviation of " +str(number_mean_squared_errors) + " mean squared errors without normalizing data. Total number of epochs for each training is: " +str(epochs) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))



The mean and standard deviation of 50 mean squared errors without normalizing data. Total number of epochs for each training is: 50

Mean: 58.06268117042234
Standard Deviation: 0.0
