In [32]:
# import libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense

*Import data*


In [19]:
concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Clear data

In [20]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

#### Split data into predictors and target

In [21]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [22]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [23]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

#### Data normalization

In [24]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


### Build a Neural Network

In [37]:
# define regression model
''' This function create a model were all hidden layers have the same configuration'''
def regression_model(layers, neurons, n_cols, activation, optimizer, loss):
    
    # create model
    model = Sequential()
    for i in range(layers):
        model.add(Dense(neurons, activation=activation, input_shape=(n_cols,)))
    model.add(Dense(1))
    print(f"Layers created: {i+1}")
    
    # compile model
    model.compile(optimizer=optimizer, loss=loss)
    return model

In [36]:
# build the model
n_cols = predictors_norm.shape[1] # number of predictors
layers = 1
neurons = 10
model = regression_model(
    layers = layers,
    neurons = neurons,
    n_cols = n_cols,
    activation = "relu",
    optimizer = "adam",
    loss = "mean_squared_error",
)
model

Layer created: 1


<keras.engine.sequential.Sequential at 0x7fd651c4f460>

1. Randomly split the data into a training and test sets by holding 30% of the data for testing. 

In [45]:
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

2. Train the model on the training data using 50 epochs

In [None]:
# fit the model
epochs = 50
model.fit(X_train, y_train, validation_split=0.3, epochs=epochs, verbose=2)

3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength.

In [44]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"The mean squared error is {mse}")

The mean squared error is 80.41278293871343


4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

In [48]:
epochs = 50
mse_list = []
for j in range(50):
    print(f"Cicle N: {j+1}")
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

Cicle N: 1
Cicle N: 2
Cicle N: 3
Cicle N: 4
Cicle N: 5
Cicle N: 6
Cicle N: 7
Cicle N: 8
Cicle N: 9
Cicle N: 10
Cicle N: 11
Cicle N: 12
Cicle N: 13
Cicle N: 14
Cicle N: 15
Cicle N: 16
Cicle N: 17
Cicle N: 18
Cicle N: 19
Cicle N: 20
Cicle N: 21
Cicle N: 22


5. Report the mean and the standard deviation of the mean squared errors.

In [None]:
mse_mean = np.mean(mse_list)
mse_std = np.std(mse_list)
print(f"Mean of MSE is: {mse_mean}")
print(f"STD of MSE is: {mse_std}")