# Importing required libraries and downloading the concrete data to pandas dataframe

In [1]:
import pandas as pd
import numpy as np

In [2]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# Data visualization and pre-processing

In [3]:
concrete_data.shape

(1030, 9)

In [4]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [5]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

#### Split data into predictors and target

In [6]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [7]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [8]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [9]:
n_cols = predictors.shape[1] # number of predictors
n_cols

8

# **Part A**

**Importing Keras Library and bulding the regression model**

In [10]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [11]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

**Random split the data into a training and test sets**

In [12]:
X = np.asarray(predictors)
y = np.asarray(target)

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=None)
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

Train set: (721, 8) (721,)
Test set: (309, 8) (309,)


In [14]:
model_A = regression_model()

**Training the model on the training data using 50 epochs**

In [15]:
model_A.fit(X_train, y_train, epochs=50, verbose=2) 

Epoch 1/50
23/23 - 1s - loss: 27826.8770 - 947ms/epoch - 41ms/step
Epoch 2/50
23/23 - 0s - loss: 10746.9160 - 101ms/epoch - 4ms/step
Epoch 3/50
23/23 - 0s - loss: 3617.2400 - 82ms/epoch - 4ms/step
Epoch 4/50
23/23 - 0s - loss: 1437.8601 - 112ms/epoch - 5ms/step
Epoch 5/50
23/23 - 0s - loss: 938.8065 - 71ms/epoch - 3ms/step
Epoch 6/50
23/23 - 0s - loss: 858.9755 - 153ms/epoch - 7ms/step
Epoch 7/50
23/23 - 0s - loss: 814.9707 - 81ms/epoch - 4ms/step
Epoch 8/50
23/23 - 0s - loss: 779.3865 - 63ms/epoch - 3ms/step
Epoch 9/50
23/23 - 0s - loss: 741.1200 - 51ms/epoch - 2ms/step
Epoch 10/50
23/23 - 0s - loss: 702.6113 - 60ms/epoch - 3ms/step
Epoch 11/50
23/23 - 0s - loss: 664.1258 - 85ms/epoch - 4ms/step
Epoch 12/50
23/23 - 0s - loss: 630.6383 - 48ms/epoch - 2ms/step
Epoch 13/50
23/23 - 0s - loss: 603.1981 - 78ms/epoch - 3ms/step
Epoch 14/50
23/23 - 0s - loss: 576.8246 - 100ms/epoch - 4ms/step
Epoch 15/50
23/23 - 0s - loss: 550.2912 - 45ms/epoch - 2ms/step
Epoch 16/50
23/23 - 0s - loss: 526.51

<keras.callbacks.History at 0x7f0080df7e50>

**Evaluate the model on the test data and compute the mean squared error**

In [16]:
y_hat = model_A.predict(X_test)



In [17]:
from sklearn.metrics import mean_squared_error 

mean_squared_error(y_test, y_hat) # Calculating mean squared error

141.46156344981026

**Repeating the above process 50 times and creating list of mean squared errors of those models**

In [18]:
mse_A = []
for _ in range(50):
  X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=None)
  model_A.fit(X_train, y_train, epochs=50, verbose=0)
  mse_A.append(model_A.evaluate(X_test, y_test))



In [19]:
mean_mse_A = np.mean(mse_A) 
print("Mean of Mean Squared Errors is {}".format(mean_mse_A))
print("Standard deviation of Mean Squared Errors is {}".format(np.std(mse_A)))

Mean of Mean Squared Errors is 113.00035415649414
Standard deviation of Mean Squared Errors is 8.607735976514055


# **Part B**

**Normalising the data**

In [20]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [21]:
X_norm = np.asarray(predictors_norm)

**Creating the list of 50 mean squared errors list**

In [22]:
model_B = regression_model()
mse_B = []
for _ in range(50):
  X_train, X_test, y_train, y_test = train_test_split( X_norm, y, test_size=0.3, random_state=None)
  model_B.fit(X_train, y_train, epochs=50, verbose=0)
  mse_B.append(model_B.evaluate(X_test, y_test)) # Using the evaluate method to get MSE



In [23]:
mean_mse_B = np.mean(mse_B)
print("Mean of Mean Squared Errors is {}".format(mean_mse_B))
print("Standard deviation of Mean Squared Errors is {}".format(np.std(mse_B)))

Mean of Mean Squared Errors is 48.50735622406006
Standard deviation of Mean Squared Errors is 47.02767654353788


In [24]:
print("The difference in Mean of Mean Squared Errors of Part A and Part B is {}".format(mean_mse_A - mean_mse_B))

The difference in Mean of Mean Squared Errors of Part A and Part B is 64.49299793243408


***The Mean of mean squared errors of Part B is considerably lower than that of part A.
The Normalisation of the data has increased the accuracy of the trained model.***

# **Part C**

**Repeating the above process by training the data with 100 epochs**

In [25]:
model_C = regression_model()
mse_C = []
for _ in range(50):
  X_train, X_test, y_train, y_test = train_test_split( X_norm, y, test_size=0.3, random_state=None)
  model_C.fit(X_train, y_train, epochs=100, verbose=0)
  mse_C.append(model_C.evaluate(X_test, y_test)) # Using the evaluate method to get MSE



In [26]:
mean_mse_C = np.mean(mse_C)
print("Mean of Mean Squared Errors is {}".format(mean_mse_C))
print("Standard deviation of Mean Squared Errors is {}".format(np.std(mse_C)))

Mean of Mean Squared Errors is 38.25436706542969
Standard deviation of Mean Squared Errors is 20.471491045462816


In [27]:
print("The difference in Mean of Mean Squared Errors of Part B and Part C is {}".format(mean_mse_B - mean_mse_C))

The difference in Mean of Mean Squared Errors of Part B and Part C is 10.25298915863037


***The Mean of mean squared errors of Part C is  lower than that of part B. Higher number of epochs in the trained model has resulted in higher accuracy of the trained model.***

# **Part D**

**Creating a model with 3 hidden layers**

In [28]:
# define regression model
def regression_model_D():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [29]:
model_D = regression_model_D()

**Training and testing the model 50 times:**

In [31]:
mse_D = []
for _ in range(50):
  X_train, X_test, y_train, y_test = train_test_split( X_norm, y, test_size=0.3, random_state=None)
  model_D.fit(X_train, y_train, epochs=50, verbose=0)
  mse_D.append(model_D.evaluate(X_test, y_test)) # Using the evaluate method to get MSE



In [32]:
mean_mse_D = np.mean(mse_D)
print("Mean of Mean Squared Errors is {}".format(mean_mse_D))
print("Standard deviation of Mean Squared Errors is {}".format(np.std(mse_D)))

Mean of Mean Squared Errors is 25.857250480651857
Standard deviation of Mean Squared Errors is 7.333430400205578


In [33]:
print("The difference in Mean of Mean Squared Errors of Part B and Part D is {}".format(mean_mse_B - mean_mse_D))

The difference in Mean of Mean Squared Errors of Part B and Part D is 22.650105743408204


**There is a considerable increase of the accuracy of the model after increasing the number of hidden layers when compared to the model of Part B**

***In General optimal number of epochs and hidden layers in the regression model can increase the accuray of the trained model tremendously***