# Concrete Compressive Strength

## Importing Libraries

In [47]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Input
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### Importing Data and Preprocessing

In [48]:
concrete_data=pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')

In [49]:
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [50]:
concrete_data.shape

(1030, 9)

In [51]:
concrete_data.isna().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [52]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [53]:
concrete_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


In [54]:
cols= concrete_data.columns

### Defining predictor and Target Variables

## Part A

In [56]:
predictorsX=concrete_data[cols[cols!='Strength']]

In [57]:
predictorsX.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [58]:
targetY=concrete_data['Strength']

In [59]:
targetY.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [60]:
pred_cols=predictorsX.shape[1]

### Defining the Model with one hidden layer

In [61]:
def regression_model1():
    
    model=Sequential()
    model.add(Input(shape=(pred_cols,)))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam',loss='mean_squared_error')
    
    return model

### Defining function to train model

In [62]:
def model_training(X,Y, regression_model,epochs):
    
    mse_list=[]
    
    for epoc in range(epochs):
    
        model=regression_model
        
        xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.3,random_state=42)
        
        model.fit(xtrain,ytrain,epochs=50,verbose=0)
        
        y_predict=model.predict(xtest)
        
        mse=mean_squared_error(ytest,y_predict)
        mse_list.append(mse)
        
        print(f"Run {epoc+1}, MSE: {mse}")
    
    # Calculate the mean and standard deviation of the MSE values
    mean_mse = np.mean(mse_list)
    std_mse = np.std(mse_list)
    
    print(f"Mean MSE: {mean_mse}")
    print(f"Standard Deviation of MSE: {std_mse}")
        

In [63]:
model_training(predictorsX,targetY,regression_model=regression_model1(),epochs=50)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Run 1, MSE: 230.71931653038166
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Run 2, MSE: 119.80531137125028
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Run 3, MSE: 93.28201999469883
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Run 4, MSE: 78.0637698310586
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Run 5, MSE: 63.470151061498555
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
Run 6, MSE: 58.14891208560888
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Run 7, MSE: 51.559095975461034
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Run 8, MSE: 51.29153569282839
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 9, MSE: 50.968650576273866
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

### Normalising the data

## Part B

In [65]:
scaler=StandardScaler()

predictors=pd.DataFrame(scaler.fit_transform(predictorsX),columns=predictorsX.columns)
target = pd.DataFrame(scaler.fit_transform(targetY.values.reshape(-1, 1)), columns=['target'])

predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.477915,-0.856888,-0.847144,-0.916764,-0.620448,0.863154,-1.21767,-0.279733
1,2.477915,-0.856888,-0.847144,-0.916764,-0.620448,1.056164,-1.21767,-0.279733
2,0.491425,0.795526,-0.847144,2.175461,-1.039143,-0.526517,-2.240917,3.553066
3,0.491425,0.795526,-0.847144,2.175461,-1.039143,-0.526517,-2.240917,5.057677
4,-0.790459,0.678408,-0.847144,0.488793,-1.039143,0.070527,0.647884,4.978487


In [66]:
target.head()

Unnamed: 0,target
0,2.645408
1,1.561421
2,0.266627
3,0.31334
4,0.507979


In [67]:
model_training(predictors,target,regression_model=regression_model1(),epochs=50)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Run 1, MSE: 0.23058902216231733
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Run 2, MSE: 0.16388995623054925
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 3, MSE: 0.152897291004724
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Run 4, MSE: 0.15018084099400736
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 5, MSE: 0.14665957969817725
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 6, MSE: 0.14493377408814292
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 7, MSE: 0.14374800171005864
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 8, MSE: 0.14017069860083034
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 9, MSE: 0.13526830161477543
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

The mean squared error (MSE) of the model decreased dramatically from 57.15 in Part A to 0.128 in Part B, despite using the same number of epochs for training.

Insights:
Effect of Data Normalization: The key difference between Part A and Part B is the normalization of the data. Normalizing the predictors likely improved the model's ability to converge by ensuring that all features contributed proportionally to the learning process. This step helps prevent large-scale features from dominating smaller-scale ones, leading to a significant reduction in prediction error.

Same Epochs, Better Results: The same number of epochs achieved better performance in Part B due to improved input data quality, emphasizing the importance of preprocessing in deep learning.

## Part C

In [69]:
model_training(predictors,target,regression_model=regression_model1(),epochs=100)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Run 1, MSE: 0.20876835680507777
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Run 2, MSE: 0.16963520845966798
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Run 3, MSE: 0.1626825832057825
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Run 4, MSE: 0.15766278776029052
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step  
Run 5, MSE: 0.15002866183207866
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step  
Run 6, MSE: 0.1457243912065982
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step 
Run 7, MSE: 0.1457144962452833
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Run 8, MSE: 0.14183553237746294
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Run 9, MSE: 0.13947058703526505
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

The mean squared error (MSE) increased slightly from 0.128 in Part B to 0.138 in Part C, despite increasing the number of epochs during training.

Insights:
Diminishing Returns on Epochs: The slight increase in MSE indicates that simply increasing the number of epochs does not always lead to better performance. This could be due to the model reaching its optimal learning capacity within the initial epochs, and further training may introduce noise or overfitting.

Plateau in Model Performance: The lack of significant improvement suggests that the model might have already converged in Part B. Additional epochs are not contributing meaningful learning and could instead be causing minor fluctuations in performance.

### Defining model with 3 hidden layers

## Part D

In [71]:
def regression_model2():

    model=Sequential()
    model.add(Input(shape=(pred_cols,)))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam',loss='mean_squared_error')
    
    return model

In [83]:
model_training(predictors,target,regression_model=regression_model2(),epochs=50)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Run 1, MSE: 0.18462158980362564
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
Run 2, MSE: 0.16048986508870816
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Run 3, MSE: 0.15759415604827692
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Run 4, MSE: 0.15404935223153002
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Run 5, MSE: 0.14958466949899288
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Run 6, MSE: 0.14645642582019525
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Run 7, MSE: 0.1442186207645002
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Run 8, MSE: 0.14259625392652212
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Run 9, MSE: 0.14143552357397376
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

The mean squared error (MSE) increased from 0.128 in Part B to 0.138 in Part D when the number of hidden layers was increased.

Insights:
Impact of Additional Complexity: Increasing the number of hidden layers adds complexity to the model, potentially making it harder to generalize to unseen data. This could explain the slight increase in MSE, as the model may have started to overfit the training data or faced challenges in optimizing effectively.

Saturation of Learning: The performance plateau suggests that the dataset and problem complexity may not require additional hidden layers. For relatively simple tasks or datasets, adding more layers can introduce redundancy and unnecessary computational overhead without improving results.

In [84]:
def regression_model():
    
    model=Sequential()
    model.add(Input(shape=(pred_cols,)))
    model.add(Dense(100,activation='relu'))
    model.add(Dense(100,activation='relu'))
    model.add(Dense(1))

    model.compile(optimizer='adam',loss='mean_squared_error')

    return model

In [85]:
model=regression_model()
print(model)

<Sequential name=sequential_207, built=True>


In [86]:
model_training(predictors,target,regression_model=regression_model(),epochs=50)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Run 1, MSE: 0.10994450366759112
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Run 2, MSE: 0.09215148951827913
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Run 3, MSE: 0.10415949157450806
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
Run 4, MSE: 0.1006872824279386
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Run 5, MSE: 0.09930534807903013
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Run 6, MSE: 0.09606451478991945
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Run 7, MSE: 0.09993776524167408
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Run 8, MSE: 0.09387736092936191
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Run 9, MSE: 0.10590069836506959
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [87]:
model.fit(predictors,targetY, validation_split=0.3,epochs=100, verbose=2)

Epoch 1/100
23/23 - 13s - 549ms/step - loss: 1588.0471 - val_loss: 1053.3977
Epoch 2/100
23/23 - 0s - 20ms/step - loss: 1269.5583 - val_loss: 777.5127
Epoch 3/100
23/23 - 1s - 30ms/step - loss: 806.3644 - val_loss: 433.1950
Epoch 4/100
23/23 - 0s - 19ms/step - loss: 386.2609 - val_loss: 214.1478
Epoch 5/100
23/23 - 0s - 19ms/step - loss: 251.1802 - val_loss: 175.8321
Epoch 6/100
23/23 - 0s - 20ms/step - loss: 226.3924 - val_loss: 169.0334
Epoch 7/100
23/23 - 0s - 20ms/step - loss: 207.0029 - val_loss: 163.3777
Epoch 8/100
23/23 - 1s - 28ms/step - loss: 195.1004 - val_loss: 158.9497
Epoch 9/100
23/23 - 0s - 22ms/step - loss: 185.4662 - val_loss: 156.2041
Epoch 10/100
23/23 - 1s - 24ms/step - loss: 177.6386 - val_loss: 153.8455
Epoch 11/100
23/23 - 1s - 25ms/step - loss: 170.8777 - val_loss: 154.2149
Epoch 12/100
23/23 - 1s - 25ms/step - loss: 166.5253 - val_loss: 153.8207
Epoch 13/100
23/23 - 0s - 19ms/step - loss: 161.3666 - val_loss: 149.3184
Epoch 14/100
23/23 - 0s - 21ms/step - loss

<keras.src.callbacks.history.History at 0x2132d7ba3c0>

In [108]:
scores = model.evaluate(predictors, targetY, verbose=2)

33/33 - 0s - 12ms/step - loss: 60.9356


In [110]:
print('Accuracy: {:.2f}% \nError: {:.2f}'.format(scores, (1 - scores)))


Accuracy: 60.94% 
Error: -59.94


In [90]:
model.save('regression_model.keras')