In [1]:
# The necessary installation(s) 

#!pip install numpy==1.21.4
#!pip install pandas==1.3.4
#!pip install keras==2.1.6
!pip install scikit-learn



In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
#import sklearn.metrics as metrics

import warnings
warnings.simplefilter('ignore', FutureWarning)

In [25]:
# Loading the data 

concrete_data = pd.read_csv("concrete_data.csv")
print("Dimensions (rows & columns) of the dataset:", concrete_data.shape)
concrete_data.head()

Dimensions (rows & columns) of the dataset: (1030, 9)


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [26]:
# Checking the presence of missing data
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [27]:
# Selecting the predictors 

predictors = concrete_data[concrete_data.columns[concrete_data.columns!='Strength']]
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [28]:
# Selecting the target 

target = concrete_data['Strength']
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [29]:
# Normalizing the predictors 

predictors_norm = (predictors - predictors.mean()) / predictors.std() 
predictors_norm.head() 

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [30]:
# Saving the #inputs required (same as the #predictors) to be used in the regression model 

n_cols = predictors_norm.shape[1]
n_cols

8

In [31]:
# Spliting the training and testing set using the scikit-learn function 

x_train, x_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
print ('Train set:', x_train.shape,  y_train.shape)
print ('Test set:', x_test.shape,  y_test.shape)

Train set: (721, 8) (721,)
Test set: (309, 8) (309,)


In [10]:
import keras

Using TensorFlow backend.


In [32]:
from keras.models import Sequential
from keras.layers import Dense

In [41]:
# The function for regression model with three hidden layers, each of 10 nodes

def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [42]:
# Function for the Mean Squre Error

def MeanSqureError_Estimator():
    model = regression_model()
    model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=50, verbose=0)
    model.evaluate(x_test, y_test, verbose=0)
    y_pred = model.predict(x_test, verbose=0) 
    return mean_squared_error(y_test, y_pred)

In [43]:
results = []

for i in range(1, 51):
    results.append({"Iteration": i, "MSE": MeanSqureError_Estimator()})

results_df = pd.DataFrame(results)
print(results_df)

    Iteration         MSE
0           1  125.730751
1           2  119.202089
2           3  121.252271
3           4  109.362086
4           5  127.082165
5           6  137.936021
6           7  133.114461
7           8  142.987878
8           9  143.927191
9          10  137.052349
10         11  137.652864
11         12   97.376758
12         13  136.374423
13         14  133.269582
14         15  128.808210
15         16  136.234723
16         17  141.533882
17         18  136.035652
18         19  128.944276
19         20  144.988696
20         21  150.185575
21         22  138.009147
22         23  140.878609
23         24  131.539827
24         25  100.037185
25         26  116.104003
26         27  108.814489
27         28  133.554173
28         29   82.344765
29         30  138.023663
30         31  136.846423
31         32  136.500164
32         33   98.292313
33         34  130.054120
34         35  138.959566
35         36  134.451890
36         37  111.564845
37         3

In [44]:
# Mean and Standard deviation 

mean_mse = results_df['MSE'].mean()
print("Mean value of all the mean square errors", mean_mse)

sd_mse = results_df['MSE'].std() 
print("Standard devition of all the mean square errors", sd_mse)

Mean value of all the mean square errors 130.20915774028643
Standard devition of all the mean square errors 14.850132962491568


In [37]:
# Q: How does the mean of the mean squared errors compare to that from Step B?

# Ans: The mean squared error has reduced with increase in the number of hidden layers. 