# Import Libs & Data

In [18]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [19]:
import warnings
warnings.filterwarnings("ignore")

In [20]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')

In [21]:
print(concrete_data.columns.values)

['Cement' 'Blast Furnace Slag' 'Fly Ash' 'Water' 'Superplasticizer'
 'Coarse Aggregate' 'Fine Aggregate' 'Age' 'Strength']


In [22]:
# We can easily check the dataset for any missing values by using the following one-liner:  
concrete_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


In [23]:
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# Data Analysis 

In [24]:
# Remove the results from the dataset (which is the target data)
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] 
# The target data is the result
target = concrete_data['Strength'] 

In [25]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [26]:
target.head() # The target data "strength"

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [27]:
# In project A, we do not normalize the data and we use the original data

# Z = (X - μ) / σ, its a standard score of the sample (Gaussian distribution)
# predictors_norm = (predictors - predictors.mean()) / predictors.std()
# predictors_norm.head()

In [28]:
# Let's save the number of predictors to n_cols
n_cols = predictors.shape[1] # number of predictors

# Model Build

In [29]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=0)

# Display the shapes of the resulting datasets
print(f"Training predictors shape: {X_train.shape}")
print(f"Test predictors shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Test target shape: {y_test.shape}")

Training predictors shape: (721, 8)
Test predictors shape: (309, 8)
Training target shape: (721,)
Test target shape: (309,)


In [30]:
# Keras model

# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [31]:
# build the model
model = regression_model()

In [32]:
# fit the model using only the training data
model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=1)

Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 5197.6958 - val_loss: 3735.1670
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3686.2979 - val_loss: 3254.8660
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3169.3062 - val_loss: 2741.7051
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2538.8127 - val_loss: 2259.1184
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1973.7451 - val_loss: 1799.6979
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1564.3905 - val_loss: 1360.6893
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1152.8701 - val_loss: 1036.2507
Epoch 8/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 835.6351 - val_loss: 808.8464
Epoch 9/50

<keras.src.callbacks.history.History at 0x7f94c9e45e10>

# Predict the results

In [33]:
# Predict the results for the test set
y_pred = model.predict(X_test)

# Display the predicted results
print(y_pred)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[[42.269695]
 [16.483187]
 [53.150734]
 [40.259804]
 [22.578634]
 [53.589314]
 [44.957375]
 [24.888083]
 [48.64246 ]
 [41.30302 ]
 [20.077353]
 [33.551754]
 [25.933153]
 [31.686312]
 [46.740906]
 [50.609123]
 [28.66461 ]
 [27.830996]
 [46.976536]
 [30.125208]
 [43.49379 ]
 [30.244902]
 [26.677877]
 [46.64002 ]
 [24.894087]
 [24.558958]
 [63.489986]
 [23.61991 ]
 [50.609123]
 [43.19889 ]
 [20.430029]
 [35.78502 ]
 [19.682367]
 [27.637583]
 [26.58106 ]
 [15.608369]
 [53.252785]
 [22.55158 ]
 [27.951769]
 [21.19789 ]
 [46.788776]
 [31.134897]
 [29.498224]
 [30.491713]
 [46.496044]
 [45.935658]
 [31.087729]
 [44.78004 ]
 [35.696766]
 [55.112385]
 [33.871582]
 [22.113218]
 [29.016466]
 [29.63494 ]
 [36.549168]
 [44.993538]
 [30.567335]
 [47.444344]
 [50.394623]
 [30.513636]
 [31.815344]
 [20.04429 ]
 [40.527027]
 [44.958633]
 [20.068098]
 [41.4304  ]
 [31.485083]
 [32.06912 ]
 [20.721823]
 [23.022894]
 [25.39102 ]
 [2

In [34]:
# Compute the mean squared error between the predicted and actual concrete strength
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 116.38107947689312


# For the remaining 50-1 = 49 loops

In [35]:
# Initialize a list to store the mean squared errors
mse_list = [mse]

# Repeat the process 50 times 
for i in range(1,50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    
    # Build the model
    model = regression_model()
    
    # Fit the model using only the training data
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=0)
    
    # Predict the results for the test set
    y_pred = model.predict(X_test)
  
    # Compute the mean squared error between the predicted and actual concrete strength
    mse = mean_squared_error(y_test, y_pred)
    
    # Append the mse to the list
    mse_list.append(mse)

# Calculate the mean and standard deviation of the mean squared errors
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

# Print the results
print('\n')
print("Below is the mean and standard deviation of " + str(len(mse_list)) + " mean squared errors without normalized data. Total number of epochs for each training is: 50\n")
print("Mean: " + str(mean_mse))
print("Standard Deviation: " + str(std_mse))

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━