## Basic imports

In [1]:
import pandas as pd
import numpy as np

## Reading data

In [20]:
df = pd.read_csv('concrete_data.csv')
df = df.astype('float64')
print(df.shape)
df.head(5)

(1030, 9)


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28.0,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28.0,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270.0,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365.0,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360.0,44.3


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   float64
 8   Strength            1030 non-null   float64
dtypes: float64(9)
memory usage: 72.5 KB


All values are non-null, which is good

## Splitting data into predictors and target

In [22]:
cols = df.columns
x = df[cols[cols != 'Strength']] # Predictors
y = df[['Strength']] # Target

In [23]:
x.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28.0
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28.0
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270.0
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365.0
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360.0


In [24]:
y.head()

Unnamed: 0,Strength
0,79.99
1,61.89
2,40.27
3,41.05
4,44.3


## Turning values into numpy arrays

In [25]:
x_vals = np.array(x, ndmin = 2)
y_vals = np.array(y, ndmin = 1)

print('X shape : ', x_vals.shape)
print('Y shape : ', y_vals.shape)

X shape :  (1030, 8)
Y shape :  (1030, 1)


## Imports for making the model and testing it

In [26]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense

In [31]:
def make_model():
    
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape = (x_vals.shape[1], )))
    model.add(Dense(1))
    
    model.compile(optimizer = 'adam', loss = 'mse')
    return model

## Main training and testing loop

In [43]:
mse_list = [] # List to store all 50 errors

# main loop
for i in range(50):
    
    train_x, test_x, train_y, test_y = train_test_split(x_vals, y_vals, test_size = 0.3)

    model = make_model()
    model.fit(train_x, train_y, epochs = 50, verbose = 0)
    print(f'Training round {i + 1}')

    preds = model.predict(test_x)
    error = mean_squared_error(test_y, preds)
    print('Error : ', error)
    print('-' * 50)
    mse_list.append(error)

Training round 1
Error :  147.62594332208556
--------------------------------------------------
Training round 2
Error :  113.40976605844617
--------------------------------------------------
Training round 3
Error :  133.1857234642808
--------------------------------------------------
Training round 4
Error :  166.88232162921742
--------------------------------------------------
Training round 5
Error :  515.7839629127968
--------------------------------------------------
Training round 6
Error :  88.61824484131563
--------------------------------------------------
Training round 7
Error :  1464.1224611744615
--------------------------------------------------
Training round 8
Error :  126.74801213832279
--------------------------------------------------
Training round 9
Error :  158.44799274572347
--------------------------------------------------
Training round 10
Error :  110.00182215085799
--------------------------------------------------
Training round 11
Error :  115.98080366875

In [44]:
mse_list = np.array(mse_list) # Turning list into numpy array

print('Mean of errors : ', mse_list.mean())
print('Standard deviation : ', mse_list.std())

Mean of errors :  329.20811179498565
Standard deviation :  489.8610070360774
