In [54]:
import pandas as pd
import numpy as np
import keras

import warnings
warnings.filterwarnings('ignore')

Let's download the data and read it into a <em>pandas</em> dataframe.


In [55]:
filepath='https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv'
df_data = pd.read_csv(filepath)

df_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [56]:
df_data.shape

(1030, 9)

In [57]:
df_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [58]:
## No null values are present

So, there are approximately 1000 samples to train our model on. Because of the few samples, we have to be careful not to overfit the training data.


In [59]:
df_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


The data looks very clean and is ready to be used to build our model.


#### Split data into predictors and target


The target variable in this problem is the concrete sample strength. Therefore, our predictors will be all the other columns.


In [78]:
df_columns = df_data.columns

In [79]:
X = df_data[df_columns[df_columns != 'Strength']] # all columns except Strength
y = df_data['Strength'] # Strength column

<a id="item2"></a>


Let's do a quick sanity check of the predictors and the target dataframes.


In [80]:
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [81]:
y.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

 normalizing the data by substracting the mean and dividing by the standard deviation.


In [82]:
X_std = (X - X.mean()) / X.std()
X_std.head()


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [83]:

n_cols = X_std.shape[1] # number of predictors
n_cols

8

<a id="item1"></a>


<a id='item32'></a>


##  Import Keras Packages



In [84]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from sklearn.model_selection import train_test_split

<a id='item33'></a>


##### Building aneural network pattern

In [85]:

# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    model.add(Dense(20, activation='relu'))
    model.add(Dense(20, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

<a id="item4"></a>


#### Train and Test the Network


In [86]:

# build the model
model = regression_model()

Next, we will train and test the model at the same time using the *fit* method. We will leave out 30% of the data for validation and we will train the model for 100 epochs.


In [87]:
# fit the model
### Splitting into train and test data
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.3, random_state=42)

## Splitting the training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state=42)




In [88]:
model.fit(X_train, y_train, validation_data = (X_val, y_val), epochs=100, verbose=2)

Epoch 1/100
18/18 - 2s - 99ms/step - loss: 1582.7188 - val_loss: 1574.7981
Epoch 2/100
18/18 - 0s - 13ms/step - loss: 1557.2666 - val_loss: 1547.3390
Epoch 3/100
18/18 - 0s - 12ms/step - loss: 1524.1846 - val_loss: 1507.8446
Epoch 4/100
18/18 - 0s - 13ms/step - loss: 1477.2673 - val_loss: 1452.0286
Epoch 5/100
18/18 - 0s - 13ms/step - loss: 1412.0493 - val_loss: 1378.0552
Epoch 6/100
18/18 - 0s - 12ms/step - loss: 1328.3962 - val_loss: 1283.1512
Epoch 7/100
18/18 - 0s - 12ms/step - loss: 1223.4703 - val_loss: 1167.8536
Epoch 8/100
18/18 - 0s - 13ms/step - loss: 1095.0060 - val_loss: 1032.8444
Epoch 9/100
18/18 - 0s - 12ms/step - loss: 948.7768 - val_loss: 878.7350
Epoch 10/100
18/18 - 0s - 13ms/step - loss: 788.4917 - val_loss: 716.7923
Epoch 11/100
18/18 - 0s - 13ms/step - loss: 629.9517 - val_loss: 565.3676
Epoch 12/100
18/18 - 0s - 12ms/step - loss: 494.1459 - val_loss: 436.4882
Epoch 13/100
18/18 - 0s - 13ms/step - loss: 384.3525 - val_loss: 348.6535
Epoch 14/100
18/18 - 0s - 13ms/

<keras.src.callbacks.history.History at 0x241b83b6ad0>

In [89]:
test_loss = model.evaluate(X_test, y_test, verbose=2)

print(f"Test Loss (MSE): {test_loss}")

10/10 - 0s - 11ms/step - loss: 105.8367
Test Loss (MSE): 105.8366928100586


In [90]:
X_train.mean()

Cement                0.008300
Blast Furnace Slag   -0.016204
Fly Ash               0.011703
Water                 0.006248
Superplasticizer      0.000237
Coarse Aggregate      0.014261
Fine Aggregate       -0.013141
Age                   0.009636
dtype: float64

In [107]:

### Developing a 5 layer network with 50 nodes in each hidden layer

def reg_model():
    model = Sequential()
    model.add(Input(shape = (n_cols,)))
    model.add(Dense(50, activation = 'relu'))
    model.add(Dense(50, activation = 'relu'))
    model.add(Dense(50, activation = 'relu'))
    model.add(Dense(50, activation = 'relu'))
    model.add(Dense(50, activation = 'relu'))
    model.add(Dense(1))

    model.compile(optimizer= 'adam', loss = 'mean_squared_error')
    return model



In [108]:

model = reg_model()

model.fit(X_train, y_train,  epochs = 70, validation_data = (X_val, y_val), verbose= 2)

Epoch 1/70
18/18 - 3s - 168ms/step - loss: 1553.9705 - val_loss: 1494.6523
Epoch 2/70
18/18 - 0s - 13ms/step - loss: 1298.2616 - val_loss: 969.9628
Epoch 3/70
18/18 - 0s - 12ms/step - loss: 511.9559 - val_loss: 404.4744
Epoch 4/70
18/18 - 0s - 13ms/step - loss: 274.1971 - val_loss: 240.4105
Epoch 5/70
18/18 - 0s - 13ms/step - loss: 215.0773 - val_loss: 213.7638
Epoch 6/70
18/18 - 0s - 13ms/step - loss: 192.9913 - val_loss: 202.9961
Epoch 7/70
18/18 - 0s - 13ms/step - loss: 178.6821 - val_loss: 193.5481
Epoch 8/70
18/18 - 0s - 16ms/step - loss: 165.8596 - val_loss: 187.8308
Epoch 9/70
18/18 - 0s - 17ms/step - loss: 158.0285 - val_loss: 173.1501
Epoch 10/70
18/18 - 0s - 14ms/step - loss: 145.8719 - val_loss: 169.3946
Epoch 11/70
18/18 - 0s - 12ms/step - loss: 138.1277 - val_loss: 160.9310
Epoch 12/70
18/18 - 0s - 13ms/step - loss: 124.4787 - val_loss: 150.6331
Epoch 13/70
18/18 - 0s - 13ms/step - loss: 114.1429 - val_loss: 139.8368
Epoch 14/70
18/18 - 0s - 11ms/step - loss: 104.5070 - va

<keras.src.callbacks.history.History at 0x241bf1d5b50>

In [112]:
## On training data 
train_loss = model.evaluate(X_train, y_train, verbose=2)

print(f"Train Loss (MSE): {train_loss}")

18/18 - 0s - 7ms/step - loss: 16.1640
Train Loss (MSE): 16.16401481628418


In [110]:
test_loss = model.evaluate(X_test, y_test, verbose=2)

print(f"Test Loss (MSE): {test_loss}")

10/10 - 0s - 39ms/step - loss: 42.8469
Test Loss (MSE): 42.846920013427734


In [114]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(576, 8)
(309, 8)
(576,)
(309,)


In [121]:
X_new = np.random.uniform(-1, 1, (10, 8))

print(X_new[:10])

print()
print()
model.predict(X_new)



[[-0.0089171  -0.22884755  0.11022199 -0.81701182 -0.36030313  0.6604747
   0.45958964 -0.68247138]
 [-0.14079756  0.52228693  0.6780087  -0.46588226 -0.17071175 -0.68316824
   0.9911349   0.90726789]
 [-0.15642022 -0.28398306  0.90154867 -0.20179677 -0.04814134  0.27511358
  -0.46390098  0.97237378]
 [-0.29048665  0.95625696 -0.35924229  0.89446685 -0.95335604  0.50844716
   0.01918683 -0.65932801]
 [-0.01916932  0.10789008 -0.3361053   0.04126553  0.20704575  0.42765103
   0.08689653 -0.17766041]
 [-0.56526014 -0.21673904  0.9256888   0.33295119  0.72571868  0.9422541
  -0.09437202 -0.74084936]
 [ 0.0023322   0.92338577  0.46228383  0.71616365 -0.00163412 -0.04363412
   0.90065183 -0.80468663]
 [-0.44725405 -0.8897387  -0.07155872  0.77423948  0.64407104  0.26119188
  -0.85041613  0.85330676]
 [ 0.26801402 -0.13952812  0.02672863 -0.90567728  0.36106811  0.37306503
  -0.2986475   0.90301636]
 [ 0.0299281   0.99539147  0.61436061 -0.10500046  0.52972249 -0.15955123
   0.53864152 -0.34

array([[17.80837 ],
       [59.86253 ],
       [51.541992],
       [26.070818],
       [44.073936],
       [14.256024],
       [33.939285],
       [30.402847],
       [58.755474],
       [56.568436]], dtype=float32)