# Building Regression Model (Sequential) in Keras
Creating a sequential regression model in a Keras model. Compiling the regression model, training and evaluating the model for a dataset.
#### Prerequisites
- Preinstalled Tensorflow & Keras

In [19]:
import pandas as pd
import numpy as np
import keras

import warnings
warnings.simplefilter('ignore', FutureWarning)

In [None]:
# Load a dataset (concrete features data) into a pandas dataframe 

filepath='Data/concrete_data.csv'
concrete_data = pd.read_csv(filepath)

concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [21]:
# Display No. of data items and features of Dataset and features and the rank of dataset

concrete_data.shape, concrete_data.ndim

((1030, 9), 2)

In [22]:
# Display various statistical measures of the numerical features

concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [23]:
# Check for null values in the dataset

concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [24]:
# Extract the name of all features/columns from the dataset

concrete_data_columns = concrete_data.columns

In [25]:
# Separate the predictor features and target (to be predicted) into different variables

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [26]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [27]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [28]:
# Scale/Convert the data values into standard range (Z-Score Standardize)

predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [29]:
# Get no. of predictor features

n_cols = predictors_norm.shape[1] # number of predictors

In [30]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input

In [31]:
# Define regression model (3 layer deep Sequential Model)

def regression_model():
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['r2_score'])
    return model

In [32]:
# Build the model and display the architecture
model = regression_model()
model.summary()

In [33]:
# Custom callback function to avoid printing metrics for every training epoch (Optional/Can be omitted)

import tensorflow as tf

class PrintCall(tf.keras.callbacks.Callback):
    def __init__(self, interval=10):
        self.interval = interval
        
    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.interval == 0:
            print(f"\nEpoch {epoch + 1}", end=" ")
            for k, v in logs.items():
                print(f"{k}: {v:.4f}", end=" ")

In [34]:
# Fit the model
model.fit(predictors_norm, target, validation_split=0.3, epochs=200, verbose=0, callbacks=[PrintCall(interval=25)])


Epoch 25 loss: 198.9782 r2_score: 0.3662 val_loss: 153.6000 val_r2_score: 0.1496 
Epoch 50 loss: 145.7828 r2_score: 0.5356 val_loss: 117.6684 val_r2_score: 0.3485 
Epoch 75 loss: 123.4304 r2_score: 0.6068 val_loss: 114.7587 val_r2_score: 0.3647 
Epoch 100 loss: 107.8013 r2_score: 0.6566 val_loss: 116.7162 val_r2_score: 0.3538 
Epoch 125 loss: 95.3707 r2_score: 0.6962 val_loss: 116.3754 val_r2_score: 0.3557 
Epoch 150 loss: 86.3654 r2_score: 0.7249 val_loss: 109.7641 val_r2_score: 0.3923 
Epoch 175 loss: 80.4461 r2_score: 0.7437 val_loss: 103.7980 val_r2_score: 0.4253 
Epoch 200 loss: 76.9883 r2_score: 0.7548 val_loss: 98.5942 val_r2_score: 0.4541 

<keras.src.callbacks.history.History at 0x7c270e56a0f0>

In [35]:
# Redefining the model for more depth to improve prediction power (Doesn't work all the time)

def regression_model():
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['r2_score'])
    return model

In [36]:
# Rebuild and display the architecture of model

model = regression_model()
model.summary()

In [37]:
# Fit the model

model.fit(predictors_norm, target, validation_split=0.1, epochs=200, verbose=0, callbacks=[PrintCall(interval=25)])


Epoch 25 loss: 111.7885 r2_score: 0.6175 val_loss: 102.6190 val_r2_score: 0.2882 
Epoch 50 loss: 39.3676 r2_score: 0.8653 val_loss: 41.7938 val_r2_score: 0.7101 
Epoch 75 loss: 27.0686 r2_score: 0.9074 val_loss: 30.9360 val_r2_score: 0.7854 
Epoch 100 loss: 22.4092 r2_score: 0.9233 val_loss: 35.6449 val_r2_score: 0.7527 
Epoch 125 loss: 19.2214 r2_score: 0.9342 val_loss: 24.6626 val_r2_score: 0.8289 
Epoch 150 loss: 17.4256 r2_score: 0.9404 val_loss: 22.5743 val_r2_score: 0.8434 
Epoch 175 loss: 15.7772 r2_score: 0.9460 val_loss: 23.0346 val_r2_score: 0.8402 
Epoch 200 loss: 14.4989 r2_score: 0.9504 val_loss: 20.1126 val_r2_score: 0.8605 

<keras.src.callbacks.history.History at 0x7c270cbe7920>

# Notes
-