# Regression Model using keras to predict the strength of concrete

In [None]:
# first let's import the necessary libraries for managing the dataset

import pandas as pd
import numpy as np

In [None]:
# let's read the data in
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [None]:
# Before we can split the data into train and test sets we need to specify which part of the dataset is supposed to be predictors and target for our model
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] 

In [None]:
n_cols = predictors.shape[1] # number of predictors

In [None]:
#let's normalize the data before we make any predictions
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [None]:
# let's import the necessary libraries for building our model

import tensorflow as tf
import tensorflow 
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# I'm going to create a model with:
# - Three hidden layers of 10 nodes, and a ReLU activation function
# - Use the adam optimizer and the mean squared error as the loss function.

# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

Now, I'm going to create a for loop which will run our model 50 times. After each iteration the loop will append the list of mean squared errors. We will then use the list to count the standard deviation.

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

X = predictors
y = target

mse_list = []

for x in range(0, 50) :
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model = regression_model()
    model.fit(X_train, y_train, epochs=50, verbose=2)
    model.evaluate(X_test, y_test)
    predictions = model.predict(X_test)
    error = mean_squared_error(y_test, predictions)
    mse_list.append(error)
print('Mean Squared Errors : ', mse_list)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
23/23 - 0s - loss: 30876.4375
Epoch 2/50
23/23 - 0s - loss: 6950.4854
Epoch 3/50
23/23 - 0s - loss: 1285.2771
Epoch 4/50
23/23 - 0s - loss: 413.5983
Epoch 5/50
23/23 - 0s - loss: 311.4446
Epoch 6/50
23/23 - 0s - loss: 290.8290
Epoch 7/50
23/23 - 0s - loss: 280.9760
Epoch 8/50
23/23 - 0s - loss: 273.6227
Epoch 9/50
23/23 - 0s - loss: 267.1190
Epoch 10/50
23/23 - 0s - loss: 261.3803
Epoch 11/50
23/23 - 0s - loss: 257.5745
Epoch 12/50
23/23 - 0s - loss: 250.0924
Epoch 13/50
23/23 - 0s - loss: 255.6901
Epoch 14/50
23/23 - 0s - loss: 243.4985
Epoch 15/50
23/23 - 0s - loss: 245.8875
Epoch 16/50
23/23 - 0s - loss: 242.6706
Epoch 17/50
23/23 - 0s - loss: 240.6907
Epoch 18/50
23/23 - 0s - loss: 239.9685
Epoch 19/50
23/23 - 0s - loss: 239.3252
Epoch 20/50
23/23 - 0s - loss: 239.0801
Epoch 21/50
23/23 - 0s - loss: 239.2487
Epoch 22/50
23/23 - 0s - loss: 239.7927
Epoch 23/50
23/23 - 0s - loss: 239.4964
Epoch 24/50
23/23 - 0s - loss: 

In [None]:
# Report the standard deviation of the mean squared errors

import numpy as np

std = np.std(mse_list)
print(std)

35.8764100393746
