Installing necessary libraries in Jupyter

In [1]:
!pip install pandas
!pip install numpy
!pip install tensorflow
!pip install keras
!pip install scikit-learn





Importing the libraries

In [2]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

Downloading data and reading it into a pandas framework

In [3]:
cd = pd.read_csv('https://cocl.us/concrete_data')
cd.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Checking the data points

In [4]:
cd.info()

cd.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


(1030, 9)

Searching the dataset for missing values

In [5]:
cd.describe()
cd.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

Splitting data into predictors and target

In [6]:
cd_columns = cd.columns

predictors = cd[cd_columns[cd_columns != 'Strength']] # all columns except Strength
target = cd['Strength'] # Strength column

Sanity check of the predictors and target

In [7]:
predictors.head()
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

# Normalising the data

In [8]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


saving the number of predictors to n_cols

In [9]:
n_cols = predictors.shape[1]

# Increasing hidden layers to three

In [10]:
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model




Train Test Split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

Build the model

In [12]:
model = regression_model()

Fitting the data

In [13]:
model.fit(predictors, target, validation_split=0.3, epochs=50,verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x15f86e96850>

In [14]:
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



58.28005599975586

Calculations

In [15]:
from sklearn.metrics import mean_squared_error
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

mean_squared_errors = []
for i in range(0, 50):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=50, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("MSE "+str(i+1)+": "+str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)


58.280055053637284 0.0
MSE 1: 55.7879638671875
MSE 2: 50.451416015625
MSE 3: 35.642948150634766
MSE 4: 42.615501403808594
MSE 5: 40.0322265625
MSE 6: 44.619937896728516
MSE 7: 45.18662643432617
MSE 8: 40.009498596191406
MSE 9: 37.39181137084961
MSE 10: 40.61905288696289
MSE 11: 35.07411575317383
MSE 12: 35.186275482177734
MSE 13: 40.54734420776367
MSE 14: 42.27455139160156
MSE 15: 34.297237396240234
MSE 16: 29.142704010009766
MSE 17: 32.694278717041016
MSE 18: 31.53252410888672
MSE 19: 31.105396270751953
MSE 20: 30.717748641967773
MSE 21: 31.038467407226562
MSE 22: 28.223892211914062
MSE 23: 24.49721336364746
MSE 24: 26.247121810913086
MSE 25: 31.167247772216797
MSE 26: 31.51193618774414
MSE 27: 24.445188522338867
MSE 28: 25.245946884155273
MSE 29: 31.666959762573242
MSE 30: 26.238967895507812
MSE 31: 24.83612060546875
MSE 32: 27.980554580688477
MSE 33: 24.46761703491211
MSE 34: 34.773223876953125
MSE 35: 25.916522979736328
MSE 36: 29.345064163208008
MSE 37: 20.532764434814453
MSE 38: 

Output

In [16]:
print('\n')
print("Below is the mean and standard deviation of " +str(50) + " mean squared errors with normalized data. Total number of epochs for each training is: " +str(50) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))



Below is the mean and standard deviation of 50 mean squared errors with normalized data. Total number of epochs for each training is: 50

Mean: 32.39364278582419
Standard Deviation: 7.468349473821813


The mean of mean squared erorr has reduced compared Part B.