# Download and Clean Dataset

Let's start by importing the pandas and the Numpy libraries.

In [1]:
import pandas as pd
import numpy as np

We will be using the dataset provided in the assignment

The dataset is about the compressive strength of different samples of concrete based on the volumes of the different ingredients that were used to make them. Ingredients include:

1. Cement

2. Blast Furnace Slag

3. Fly Ash

4. Water

5. Superplasticizer

6. Coarse Aggregate

7. Fine Aggregate

Let's read the dataset into a pandas dataframe.

In [2]:
concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Let's check how many data points we have.

In [3]:
concrete_data.shape

(1030, 9)

In [4]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


The data looks very clean and is ready to be used to build our model.

### Split data into predictors and target

The target variable in this problem is the concrete sample strength. Therefore, our predictors will be all the other columns

In [5]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [6]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [7]:
target.head()


0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [8]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [9]:
n_cols = predictors_norm.shape[1] # number of predictors


# Import Keras

In [10]:
import keras

In [11]:
from keras.models import Sequential
from keras.layers import Dense

In [12]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [13]:
from sklearn.model_selection import train_test_split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)

# Train and Test the Network

In [20]:
# build the model
model = regression_model()

In [21]:
# fit the model
epochs = 50
model.fit(X_train, y_train, epochs=epochs, verbose=2)

Epoch 1/50
23/23 - 0s - loss: 1552.1659 - 283ms/epoch - 12ms/step
Epoch 2/50
23/23 - 0s - loss: 1524.6600 - 23ms/epoch - 1000us/step
Epoch 3/50
23/23 - 0s - loss: 1485.6254 - 27ms/epoch - 1ms/step
Epoch 4/50
23/23 - 0s - loss: 1424.9230 - 29ms/epoch - 1ms/step
Epoch 5/50
23/23 - 0s - loss: 1332.9442 - 26ms/epoch - 1ms/step
Epoch 6/50
23/23 - 0s - loss: 1197.8961 - 27ms/epoch - 1ms/step
Epoch 7/50
23/23 - 0s - loss: 1015.7150 - 29ms/epoch - 1ms/step
Epoch 8/50
23/23 - 0s - loss: 797.9760 - 26ms/epoch - 1ms/step
Epoch 9/50
23/23 - 0s - loss: 584.9422 - 29ms/epoch - 1ms/step
Epoch 10/50
23/23 - 0s - loss: 420.7277 - 28ms/epoch - 1ms/step
Epoch 11/50
23/23 - 0s - loss: 327.8346 - 27ms/epoch - 1ms/step
Epoch 12/50
23/23 - 0s - loss: 283.7227 - 26ms/epoch - 1ms/step
Epoch 13/50
23/23 - 0s - loss: 256.5728 - 29ms/epoch - 1ms/step
Epoch 14/50
23/23 - 0s - loss: 238.2474 - 27ms/epoch - 1ms/step
Epoch 15/50
23/23 - 0s - loss: 223.4633 - 27ms/epoch - 1ms/step
Epoch 16/50
23/23 - 0s - loss: 211.73

<keras.callbacks.History at 0x7f9ae39f4d90>

In [22]:
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



98.8558120727539

In [23]:
from sklearn.metrics import mean_squared_error

In [24]:
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

98.8558198860149 0.0


In [25]:
total_mean_squared_errors = 50
epochs = 50
mean_squared_errors = []
for i in range(0, total_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("MSE "+str(i+1)+": "+str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)

print('\n')
print("Below is the mean and standard deviation of " +str(total_mean_squared_errors) + " mean squared errors without normalized data. Total number of epochs for each training is: " +str(epochs) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

MSE 1: 372.5851135253906
MSE 2: 998.44580078125
MSE 3: 202.5924530029297
MSE 4: 217.73825073242188
MSE 5: 191.79408264160156
MSE 6: 164.70899963378906
MSE 7: 181.40054321289062
MSE 8: 131.56776428222656
MSE 9: 189.3871612548828
MSE 10: 125.17666625976562
MSE 11: 96.0924072265625
MSE 12: 111.67554473876953
MSE 13: 117.68670654296875
MSE 14: 182.06138610839844
MSE 15: 79.32738494873047
MSE 16: 72.97538757324219
MSE 17: 69.207275390625
MSE 18: 74.25448608398438
MSE 19: 70.06739807128906
MSE 20: 75.23494720458984
MSE 21: 61.8715934753418
MSE 22: 60.81298828125
MSE 23: 61.81651306152344
MSE 24: 47.77621078491211
MSE 25: 47.93604278564453
MSE 26: 45.94194030761719
MSE 27: 45.51877975463867
MSE 28: 37.58871078491211
MSE 29: 50.327301025390625
MSE 30: 39.530521392822266
MSE 31: 42.84940719604492
MSE 32: 35.019649505615234
MSE 33: 44.60484313964844
MSE 34: 39.108028411865234
MSE 35: 39.58487319946289
MSE 36: 42.799171447753906
MSE 37: 41.74957275390625
MSE 38: 41.35074234008789
MSE 39: 41.68725