# Regression model on concrete dataset

In [3]:
import numpy as np
import pandas as pd

In [4]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [5]:
concrete_data.shape

(1030, 9)

In [6]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


# check for any null values

In [7]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

# split the data into predictors and target

In [8]:
# split the data into predictors and target
concrete_data_columns=concrete_data.columns

predictors=concrete_data.iloc[:,:-2]
target=concrete_data.iloc[:,-1]
predictors.shape,target.shape, predictors.columns

((1030, 7),
 (1030,),
 Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
        'Coarse Aggregate', 'Fine Aggregate'],
       dtype='object'))

# PART A : Build a baseline model

In [12]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [13]:
xtrain,xtest,ytrain,ytest=train_test_split(predictors,target,test_size=0.3)

In [18]:
def regression_model():
    model=Sequential()
    # model.add(Dense(50,activation='relu',input_shape=(n_col,)))
    model.add(Dense(10,activation='relu',input_shape=(n_col,)))
    model.add(Dense(1))
    model.compile(optimizer='adam',loss='mean_squared_error')
    return model

In [20]:
n_col=predictors.shape[1]
mean_squared_errors=[]

for i in range(50):
    model=regression_model()
    model.fit(xtrain,ytrain,validation_data=(xtest,ytest),epochs=50,verbose=2)
    mse=model.evaluate(xtest,ytest,verbose=0)
    # predict_x=model.predict(xtest)
    # mse1=mean_squared_error(ytest,predict_x)
    mean_squared_errors.append(mse)
mean_mse=np.mean(mean_squared_errors)
std_mse=np.std(mean_squared_errors)


Epoch 1/50
23/23 - 0s - loss: 28537.9727 - val_loss: 9377.4688 - 385ms/epoch - 17ms/step
Epoch 2/50
23/23 - 0s - loss: 6258.9155 - val_loss: 7223.3359 - 123ms/epoch - 5ms/step
Epoch 3/50
23/23 - 0s - loss: 5833.8809 - val_loss: 6408.3516 - 111ms/epoch - 5ms/step
Epoch 4/50
23/23 - 0s - loss: 5156.7197 - val_loss: 5835.7817 - 96ms/epoch - 4ms/step
Epoch 5/50
23/23 - 0s - loss: 4689.6777 - val_loss: 5279.6567 - 78ms/epoch - 3ms/step
Epoch 6/50
23/23 - 0s - loss: 4266.2915 - val_loss: 4779.3545 - 82ms/epoch - 4ms/step
Epoch 7/50
23/23 - 0s - loss: 3892.2402 - val_loss: 4340.7983 - 77ms/epoch - 3ms/step
Epoch 8/50
23/23 - 0s - loss: 3553.9026 - val_loss: 3971.5925 - 77ms/epoch - 3ms/step
Epoch 9/50
23/23 - 0s - loss: 3275.4392 - val_loss: 3631.9736 - 74ms/epoch - 3ms/step
Epoch 10/50
23/23 - 0s - loss: 3017.1521 - val_loss: 3344.0066 - 73ms/epoch - 3ms/step
Epoch 11/50
23/23 - 0s - loss: 2796.1599 - val_loss: 3091.7249 - 74ms/epoch - 3ms/step
Epoch 12/50
23/23 - 0s - loss: 2595.3962 - val_

In [21]:
print(f'Mean of mean squared errors : {mean_mse}')
print(f'standard deviation of mean squared errors : {std_mse}')

Mean of mean squared errors : 452.38050384521483
standard deviation of mean squared errors : 483.2231665926521


# PART B Normalize the data

# normalize the data

In [22]:
predictors_norm=(predictors-predictors.mean())/predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569


In [23]:
xtrain,xtest,ytrain,ytest=train_test_split(predictors_norm,target,test_size=0.3)

In [24]:
n_col=predictors.shape[1]
mean_squared_errors=[]

for i in range(50):
    model=regression_model()
    model.fit(xtrain,ytrain,validation_data=(xtest,ytest),epochs=50,verbose=2)
    mse=model.evaluate(xtest,ytest,verbose=0)
    # predict_x=model.predict(xtest)
    # mse1=mean_squared_error(ytest,predict_x)
    mean_squared_errors.append(mse)
mean_mse=np.mean(mean_squared_errors)
std_mse=np.std(mean_squared_errors)


Epoch 1/50
23/23 - 0s - loss: 1568.0116 - val_loss: 1568.0162 - 378ms/epoch - 16ms/step
Epoch 2/50
23/23 - 0s - loss: 1555.8333 - val_loss: 1555.3136 - 97ms/epoch - 4ms/step
Epoch 3/50
23/23 - 0s - loss: 1543.4225 - val_loss: 1542.3534 - 96ms/epoch - 4ms/step
Epoch 4/50
23/23 - 0s - loss: 1530.5869 - val_loss: 1528.8733 - 80ms/epoch - 3ms/step
Epoch 5/50
23/23 - 0s - loss: 1517.4183 - val_loss: 1514.4644 - 84ms/epoch - 4ms/step
Epoch 6/50
23/23 - 0s - loss: 1503.6583 - val_loss: 1499.7388 - 78ms/epoch - 3ms/step
Epoch 7/50
23/23 - 0s - loss: 1489.2714 - val_loss: 1484.3500 - 74ms/epoch - 3ms/step
Epoch 8/50
23/23 - 0s - loss: 1474.3029 - val_loss: 1468.0645 - 77ms/epoch - 3ms/step
Epoch 9/50
23/23 - 0s - loss: 1458.4072 - val_loss: 1450.7914 - 74ms/epoch - 3ms/step
Epoch 10/50
23/23 - 0s - loss: 1441.4398 - val_loss: 1432.4917 - 78ms/epoch - 3ms/step
Epoch 11/50
23/23 - 0s - loss: 1423.5970 - val_loss: 1412.8075 - 77ms/epoch - 3ms/step
Epoch 12/50
23/23 - 0s - loss: 1404.5219 - val_los

In [25]:
print(f'Mean of mean squared errors : {mean_mse}')
print(f'standard deviation of mean squared errors : {std_mse}')

Mean of mean squared errors : 364.17634643554686
standard deviation of mean squared errors : 106.12675010228378


# PART C: Increase the number of epochs

In [26]:
n_col=predictors.shape[1]
mean_squared_errors=[]

for i in range(50):
    model=regression_model()
    model.fit(xtrain,ytrain,validation_data=(xtest,ytest),epochs=100,verbose=2)
    mse=model.evaluate(xtest,ytest,verbose=0)
    # predict_x=model.predict(xtest)
    # mse1=mean_squared_error(ytest,predict_x)
    mean_squared_errors.append(mse)
mean_mse=np.mean(mean_squared_errors)
std_mse=np.std(mean_squared_errors)

Epoch 1/100
23/23 - 0s - loss: 1541.5879 - val_loss: 1543.9231 - 385ms/epoch - 17ms/step
Epoch 2/100
23/23 - 0s - loss: 1524.1185 - val_loss: 1527.0865 - 128ms/epoch - 6ms/step
Epoch 3/100
23/23 - 0s - loss: 1507.4386 - val_loss: 1510.1744 - 90ms/epoch - 4ms/step
Epoch 4/100
23/23 - 0s - loss: 1490.9368 - val_loss: 1493.7886 - 70ms/epoch - 3ms/step
Epoch 5/100
23/23 - 0s - loss: 1475.0612 - val_loss: 1477.9141 - 72ms/epoch - 3ms/step
Epoch 6/100
23/23 - 0s - loss: 1459.2852 - val_loss: 1462.1521 - 71ms/epoch - 3ms/step
Epoch 7/100
23/23 - 0s - loss: 1443.5122 - val_loss: 1445.9119 - 73ms/epoch - 3ms/step
Epoch 8/100
23/23 - 0s - loss: 1427.1670 - val_loss: 1429.4691 - 83ms/epoch - 4ms/step
Epoch 9/100
23/23 - 0s - loss: 1410.8712 - val_loss: 1412.3464 - 74ms/epoch - 3ms/step
Epoch 10/100
23/23 - 0s - loss: 1394.0214 - val_loss: 1394.5317 - 78ms/epoch - 3ms/step
Epoch 11/100
23/23 - 0s - loss: 1376.4672 - val_loss: 1376.2018 - 79ms/epoch - 3ms/step
Epoch 12/100
23/23 - 0s - loss: 1357.9

In [27]:
print(f'Mean of mean squared errors : {mean_mse}')
print(f'standard deviation of mean squared errors : {std_mse}')

Mean of mean squared errors : 184.82624206542968
standard deviation of mean squared errors : 13.206107176753116


# PART D: Increase the number of epochs

In [28]:
def regression_model():
    model=Sequential()
    # model.add(Dense(50,activation='relu',input_shape=(n_col,)))
    model.add(Dense(10,activation='relu',input_shape=(n_col,)))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam',loss='mean_squared_error')
    return model

In [29]:
n_col=predictors.shape[1]
mean_squared_errors=[]
xtrain,xtest,ytrain,ytest=train_test_split(predictors_norm,target,test_size=0.3)
for i in range(50):
    model=regression_model()
    model.fit(xtrain,ytrain,validation_data=(xtest,ytest),epochs=50,verbose=2)
    mse=model.evaluate(xtest,ytest,verbose=0)
    # predict_x=model.predict(xtest)
    # mse1=mean_squared_error(ytest,predict_x)
    mean_squared_errors.append(mse)
mean_mse=np.mean(mean_squared_errors)
std_mse=np.std(mean_squared_errors)

Epoch 1/50
23/23 - 0s - loss: 1520.6742 - val_loss: 1525.7957 - 442ms/epoch - 19ms/step
Epoch 2/50
23/23 - 0s - loss: 1486.0094 - val_loss: 1486.0969 - 128ms/epoch - 6ms/step
Epoch 3/50
23/23 - 0s - loss: 1439.4834 - val_loss: 1430.5588 - 118ms/epoch - 5ms/step
Epoch 4/50
23/23 - 0s - loss: 1374.2061 - val_loss: 1352.6222 - 100ms/epoch - 4ms/step
Epoch 5/50
23/23 - 0s - loss: 1281.1281 - val_loss: 1239.7020 - 98ms/epoch - 4ms/step
Epoch 6/50
23/23 - 0s - loss: 1148.9397 - val_loss: 1086.9114 - 115ms/epoch - 5ms/step
Epoch 7/50
23/23 - 0s - loss: 976.5728 - val_loss: 895.2281 - 90ms/epoch - 4ms/step
Epoch 8/50
23/23 - 0s - loss: 775.9785 - val_loss: 682.4228 - 80ms/epoch - 3ms/step
Epoch 9/50
23/23 - 0s - loss: 572.3467 - val_loss: 496.4776 - 80ms/epoch - 3ms/step
Epoch 10/50
23/23 - 0s - loss: 412.6379 - val_loss: 362.2520 - 81ms/epoch - 4ms/step
Epoch 11/50
23/23 - 0s - loss: 312.6277 - val_loss: 283.5107 - 82ms/epoch - 4ms/step
Epoch 12/50
23/23 - 0s - loss: 260.9502 - val_loss: 243.

In [30]:
print(f'Mean of mean squared errors : {mean_mse}')
print(f'standard deviation of mean squared errors : {std_mse}')

Mean of mean squared errors : 162.4381332397461
standard deviation of mean squared errors : 6.628644853932616
