## Basic imports

In [1]:
import pandas as pd
import numpy as np

## Reading data

In [2]:
df = pd.read_csv('concrete_data.csv')
df = df.astype('float64')
print(df.shape)
df.head(5)

(1030, 9)


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28.0,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28.0,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270.0,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365.0,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360.0,44.3


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   float64
 8   Strength            1030 non-null   float64
dtypes: float64(9)
memory usage: 72.5 KB


All values are non-null, which is good

## Splitting data into predictors and target

In [4]:
cols = df.columns
x = df[cols[cols != 'Strength']] # Predictors
y = df[['Strength']] # Target

## Normalizing data

In [7]:
x = (x - x.mean())/x.std()
x.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [8]:
y.head()

Unnamed: 0,Strength
0,79.99
1,61.89
2,40.27
3,41.05
4,44.3


## Turning values into numpy arrays

In [9]:
x_vals = np.array(x, ndmin = 2)
y_vals = np.array(y, ndmin = 1)

print('X shape : ', x_vals.shape)
print('Y shape : ', y_vals.shape)

X shape :  (1030, 8)
Y shape :  (1030, 1)


## Imports for making the model and testing it

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense

In [11]:
def make_model():
    
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape = (x_vals.shape[1], )))
    model.add(Dense(1))
    
    model.compile(optimizer = 'adam', loss = 'mse')
    return model

## Main training and testing loop

In [12]:
mse_list = [] # List to store all 50 errors

# main loop
for i in range(50):
    
    train_x, test_x, train_y, test_y = train_test_split(x_vals, y_vals, test_size = 0.3)

    model = make_model()
    model.fit(train_x, train_y, epochs = 50, verbose = 0)
    print(f'Training round {i + 1}')

    preds = model.predict(test_x)
    error = mean_squared_error(test_y, preds)
    print('Error : ', error)
    print('-' * 50)
    mse_list.append(error)

Training round 1
Error :  448.9498187426883
--------------------------------------------------
Training round 2
Error :  348.1941849909621
--------------------------------------------------
Training round 3
Error :  304.5561704421281
--------------------------------------------------
Training round 4
Error :  586.3211033064907
--------------------------------------------------
Training round 5
Error :  270.9928916825817
--------------------------------------------------
Training round 6
Error :  301.66483531282535
--------------------------------------------------
Training round 7
Error :  343.6461231774476
--------------------------------------------------
Training round 8
Error :  327.13033655475334
--------------------------------------------------
Training round 9
Error :  239.2097256250434
--------------------------------------------------
Training round 10
Error :  354.5692918451405
--------------------------------------------------
Training round 11
Error :  346.79229466683165
-

In [13]:
mse_list = np.array(mse_list) # Turning list into numpy array

print('Mean of errors : ', mse_list.mean())
print('Standard deviation : ', mse_list.std())

Mean of errors :  393.3776698796417
Standard deviation :  158.8985379764239


### The mean of the 50 errors is higher than it was was before, however the standard deviation is much lower