## Part A

01) Import the dataset
 Import required libraries for data handling and deep learning (Ex- pandas, numpy,
sklearn utilities, tensorflow/keras).

 Load the CSV into a DataFrame and separate features X and label y.

 Display:

* Dataset shape
* First few rows of X
* Class balance (counts of 0 and 1)

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

In [2]:
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras

In [3]:
# import data
data = pd.read_csv("diabetes.csv")
X = data.iloc[:, :8].values # 0-7 columns
y = data.iloc[:, 8].values # outcomes column


# X is also known as features
# y is also known as labels
# labels are the target variable we want to predict

In [4]:
X.shape

(768, 8)

In [5]:
X[:5]

array([[6.000e+00, 1.480e+02, 7.200e+01, 3.500e+01, 0.000e+00, 3.360e+01,
        6.270e-01, 5.000e+01],
       [1.000e+00, 8.500e+01, 6.600e+01, 2.900e+01, 0.000e+00, 2.660e+01,
        3.510e-01, 3.100e+01],
       [8.000e+00, 1.830e+02, 6.400e+01, 0.000e+00, 0.000e+00, 2.330e+01,
        6.720e-01, 3.200e+01],
       [1.000e+00, 8.900e+01, 6.600e+01, 2.300e+01, 9.400e+01, 2.810e+01,
        1.670e-01, 2.100e+01],
       [0.000e+00, 1.370e+02, 4.000e+01, 3.500e+01, 1.680e+02, 4.310e+01,
        2.288e+00, 3.300e+01]])

In [6]:
y

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,

In [7]:
np.bincount(y)

array([500, 268])

02) Split (and scale) the data

 Split into train/test sets (use a fixed random state).

 Explain why feature scaling is recommended for MLPs; apply StandardScaler

correctly (fit on train only; transform both train and test).

 Set seeds for numpy and tensorflow to improve reproducibility.

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.20, random_state=0, stratify=y)
# stratify ensures that the class distribution is preserved in both train and test sets

In [9]:
X_train

array([[  7.   , 196.   ,  90.   , ...,  39.8  ,   0.451,  41.   ],
       [  2.   ,  81.   ,  72.   , ...,  30.1  ,   0.547,  25.   ],
       [  2.   , 127.   ,  58.   , ...,  27.7  ,   1.6  ,  25.   ],
       ...,
       [  3.   ,  82.   ,  70.   , ...,  21.1  ,   0.389,  25.   ],
       [  7.   , 103.   ,  66.   , ...,  39.1  ,   0.344,  31.   ],
       [  0.   , 161.   ,  50.   , ...,  21.9  ,   0.254,  65.   ]],
      shape=(614, 8))

In [10]:
X_test

array([[2.00e+00, 5.60e+01, 5.60e+01, ..., 2.42e+01, 3.32e-01, 2.20e+01],
       [1.00e+00, 9.20e+01, 6.20e+01, ..., 1.95e+01, 4.82e-01, 2.50e+01],
       [1.00e+00, 1.00e+02, 7.40e+01, ..., 1.95e+01, 1.49e-01, 2.80e+01],
       ...,
       [5.00e+00, 1.66e+02, 7.60e+01, ..., 4.57e+01, 3.40e-01, 2.70e+01],
       [1.00e+00, 1.06e+02, 7.00e+01, ..., 3.42e+01, 1.42e-01, 2.20e+01],
       [2.00e+00, 1.12e+02, 6.80e+01, ..., 3.41e+01, 3.15e-01, 2.60e+01]],
      shape=(154, 8))

In [11]:
y_train

array([1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,

In [12]:
y_test

array([0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0])

In [13]:
# Standardizing
# standardizing means rescaling the features to have a mean of 0 and a standard deviation of 1

scaler = StandardScaler() # create a scaler object
X_train = scaler.fit_transform(X_train) # fit on train only
X_test = scaler.transform(X_test) # transform test set

In [14]:
# set the random seed
np.random.seed(0)
tf.random.set_seed(0)

In [15]:
X_train

array([[ 0.8803979 ,  2.36205262,  1.05918965, ...,  1.01966869,
        -0.07857322,  0.6266495 ],
       [-0.57822139, -1.24433342,  0.12746695, ..., -0.24027795,
         0.20235726, -0.72073607],
       [-0.57822139,  0.19822099, -0.59720627, ..., -0.55201732,
         3.28381347, -0.72073607],
       ...,
       [-0.28649754, -1.21297355,  0.0239422 , ..., -1.4093006 ,
        -0.26000749, -0.72073607],
       [ 0.8803979 , -0.5544161 , -0.18310729, ...,  0.92874471,
        -0.39169365, -0.21546648],
       [-1.16166911,  1.26445686, -1.01130525, ..., -1.30538748,
        -0.65506598,  2.64772786]], shape=(614, 8))

In [16]:
X_test

array([[-0.57822139, -2.02833039, -0.70073101, ..., -1.00663724,
        -0.42680996, -0.97337087],
       [-0.86994525, -0.89937476, -0.39015678, ..., -1.61712685,
         0.01214391, -0.72073607],
       [-0.86994525, -0.64849573,  0.23099169, ..., -1.61712685,
        -0.96233369, -0.46810128],
       ...,
       [ 0.29695018,  1.42125626,  0.33451644, ...,  1.78602799,
        -0.40339909, -0.55231288],
       [-0.86994525, -0.46033646,  0.0239422 , ...,  0.29227682,
        -0.98281821, -0.97337087],
       [-0.57822139, -0.27217719, -0.07958254, ...,  0.27928768,
        -0.47655807, -0.63652448]], shape=(154, 8))

03) Create the Keras MLP Classifier model

 Build a Sequential model with this architecture (to mirror your sklearn setup):

o Input: shape = (n_features,)

o Dense(5, activation='relu')

o Dense(3, activation='relu')

o Dense(1, activation='sigmoid') (binary output)

 Choose loss and metrics appropriate for binary classification and justify your
choice.

 Choose an optimizer (Ex- Adam) and state your learning-rate rationale

In [17]:
# model = keras.Sequential(
#     [
#         keras.layers.Input(shape=(X_train.shape[1],)),  # Input layer
#         keras.layers.Dense(5, kernel_regularizer='l2'),  # Hidden layer 1 with 5 neurons
#         keras.layers.BatchNormalization(),  # Normalize activations
#         keras.layers.Activation('relu'),  # ReLU activation after BatchNorm
#         keras.layers.Dropout(0.3),  # Drop 30% of units
#         keras.layers.Dense(3, kernel_regularizer='l2'),  # Hidden layer 2 with 3 neurons
#         keras.layers.BatchNormalization(),
#         keras.layers.Activation('relu'),
#         keras.layers.Dropout(0.3),
#         keras.layers.Dense(1, activation='sigmoid'),  # Output layer
#     ]
# )

In [18]:
model = keras.Sequential(
    [
        keras.layers.Input(shape=(X_train.shape[1],)), # input layer
        keras.layers.Dense(5, activation="relu"), # hidden layer 1 with 5 neurons
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(3, activation="relu"), # hidden layer 2 with 3 neurons
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"), # output layer
    ]
)

In [19]:
model

<Sequential name=sequential, built=True>

04) Compile and train

 Compile the model specifying optimizer, loss, and metrics.

 Train with suitable epochs, batch_size, validation_split, and verbose.

In [20]:
# compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# loss = binary_crossentropy is used for binary classification problems
# metrics = accuracy is used to evaluate the model's performance

In [21]:
model.summary()

In [None]:
# training
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) # Add Early Stopping callback

history = model.fit(
    X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping] # Add callback to fit
)

# verbose is 1, which means the model will output detailed logs during training

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.5295 - loss: 0.9556 - val_accuracy: 0.5122 - val_loss: 0.7858
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5295 - loss: 0.8686 - val_accuracy: 0.5691 - val_loss: 0.7652
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5642 - loss: 0.8493 - val_accuracy: 0.5610 - val_loss: 0.7498
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5947 - loss: 0.8006 - val_accuracy: 0.5772 - val_loss: 0.7395
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5601 - loss: 0.8291 - val_accuracy: 0.5935 - val_loss: 0.7328
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6375 - loss: 0.7362 - val_accuracy: 0.5935 - val_loss: 0.7241
Epoch 7/100
[1m16/16[0m [32m━━

In [23]:
# validating
# loss, acc = model.evaluate(X_test, y_test, verbose=0) # this is the original code in the lab sheet
loss, acc = model.evaluate(X_test, y_test, verbose=1) # i changed to 1 to get the detailed logs
print(f"Test Accuracy: {acc:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7727 - loss: 0.4601 
Test Accuracy: 0.7727


In [24]:
y_pred_prob = model.predict(X_test).ravel() # predict probabilities
y_pred = (y_pred_prob >= 0.5).astype(int) # convert probabilities to binary predictions
print(y_pred[:10])

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[0 0 0 0 1 0 1 0 0 0]


In [25]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[93,  7],
       [28, 26]])

In [26]:
from sklearn.metrics import classification_report
classification_report(y_test, y_pred)

'              precision    recall  f1-score   support\n\n           0       0.77      0.93      0.84       100\n           1       0.79      0.48      0.60        54\n\n    accuracy                           0.77       154\n   macro avg       0.78      0.71      0.72       154\nweighted avg       0.78      0.77      0.76       154\n'

In [27]:
# just for testing
model.predict(X_test).ravel()

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


array([0.07303092, 0.1118578 , 0.11839826, 0.3537898 , 0.6642966 ,
       0.09878252, 0.80713105, 0.08819179, 0.11962757, 0.29709283,
       0.29824862, 0.1278427 , 0.12308137, 0.1649881 , 0.37239432,
       0.11647525, 0.29824862, 0.36981344, 0.36896455, 0.77769333,
       0.13291289, 0.33000886, 0.40441218, 0.29824862, 0.29824862,
       0.6777841 , 0.59796107, 0.12545943, 0.12586795, 0.1173728 ,
       0.35731906, 0.11342082, 0.60218585, 0.34425044, 0.86567026,
       0.18685998, 0.6822704 , 0.32116935, 0.3020794 , 0.25480556,
       0.12320766, 0.11405945, 0.7166573 , 0.122072  , 0.5755378 ,
       0.63039386, 0.14294498, 0.17727956, 0.29824862, 0.17813063,
       0.260432  , 0.3356113 , 0.18766004, 0.43905243, 0.31821495,
       0.37062564, 0.15905644, 0.22080518, 0.09655952, 0.14105642,
       0.44735095, 0.21175504, 0.6570921 , 0.5359177 , 0.11313461,
       0.48625138, 0.19823791, 0.2590372 , 0.15979673, 0.39121434,
       0.29824862, 0.5435488 , 0.47042334, 0.39846212, 0.32199

In [28]:
# just for testing
y_pred_prob >= 0.5


array([False, False, False, False,  True, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False,  True,  True,
       False, False, False, False, False,  True, False,  True, False,
        True, False, False, False, False, False,  True, False,  True,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
        True, False, False, False, False, False, False, False,  True,
       False, False, False, False, False, False, False,  True, False,
       False,  True, False,  True,  True, False, False,  True, False,
       False, False, False,  True,  True, False, False,  True, False,
       False, False, False, False, False, False, False, False,  True,
       False, False, False, False, False,  True, False, False, False,
       False, False, False,  True, False,  True, False, False, False,
       False, False,

## Part B

01) Import the dataset
 Import required libraries (Ex- pandas, numpy, sklearn utilities, tensorflow/keras).

 Load the CSV; separate features X and target y.

 Display:

o Dataset shape

o First few rows of X

o First few values of y

In [None]:
data = pd.read_csv("Boston.CSV")
X = data.iloc[:, :12].values
y = data.iloc[:, 12].values

In [None]:
X

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]], shape=(506, 12))

In [None]:
y

array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
       18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
       15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
       13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
       21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
       35.4, 24.7, 31.6, 23.3, 19.6, 18.7, 16. , 22.2, 25. , 33. , 23.5,
       19.4, 22. , 17.4, 20.9, 24.2, 21.7, 22.8, 23.4, 24.1, 21.4, 20. ,
       20.8, 21.2, 20.3, 28. , 23.9, 24.8, 22.9, 23.9, 26.6, 22.5, 22.2,
       23.6, 28.7, 22.6, 22. , 22.9, 25. , 20.6, 28.4, 21.4, 38.7, 43.8,
       33.2, 27.5, 26.5, 18.6, 19.3, 20.1, 19.5, 19.5, 20.4, 19.8, 19.4,
       21.7, 22.8, 18.8, 18.7, 18.5, 18.3, 21.2, 19.2, 20.4, 19.3, 22. ,
       20.3, 20.5, 17.3, 18.8, 21.4, 15.7, 16.2, 18. , 14.3, 19.2, 19.6,
       23. , 18.4, 15.6, 18.1, 17.4, 17.1, 13.3, 17.8, 14. , 14.4, 13.4,
       15.6, 11.8, 13.8, 15.6, 14.6, 17.8, 15.4, 21

02) Split (and scale) the data

 Split into train/test sets (use a fixed random state).

 Explain why feature scaling is helpful for MLP regressors; apply StandardScaler
correctly.

 Set seeds for reproducibility.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.20, random_state=0)

In [None]:
X_train[:1]

array([[3.5809e-01, 0.0000e+00, 6.2000e+00, 5.0700e-01, 6.9510e+00,
        8.8500e+01, 2.8617e+00, 8.0000e+00, 3.0700e+02, 1.7400e+01,
        3.9170e+02, 9.7100e+00]])

In [None]:
X_test[:1]

array([[6.7240e-02, 0.0000e+00, 3.2400e+00, 4.6000e-01, 6.3330e+00,
        1.7200e+01, 5.2146e+00, 4.0000e+00, 4.3000e+02, 1.6900e+01,
        3.7521e+02, 7.3400e+00]])

In [None]:
y_train[:5]

array([26.7, 21.7, 22. , 22.9, 10.4])

In [None]:
y_test[:5]

array([22.6, 50. , 23. ,  8.3, 21.2])

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train[:1]

array([[-0.37257438, -0.49960763, -0.70492455, -0.42487874,  0.93567804,
         0.69366877, -0.4372179 , -0.16224243, -0.56165616, -0.48463784,
         0.3716906 , -0.41100022]])

In [None]:
X_test[:1]

array([[-0.40835869, -0.49960763, -1.12872913, -0.83336883,  0.04497197,
        -1.84621525,  0.69506929, -0.62464765,  0.15913664, -0.71272928,
         0.18547577, -0.73610347]])

03) Create the Keras MLP Regressor model

 Build a Sequential model that mirrors your earlier hidden sizes (3, 2) with either:

o Linear (identity) hidden activations to exactly mirror the earlier sheet, or

o ReLU hidden activations for potentially better performance.

 Example (linear/identity mirror):

o Input: shape = (n_features,)

o Dense(3, activation=None)

o Dense(2, activation=None)

o Dense(1, activation=None) (linear output)

 Select loss/metrics for regression (Ex- MSE, RMSE) and justify.

In [None]:
reg_model = keras.Sequential(
    [
        keras.layers.Input(shape=(X_train.shape[1],)),
        keras.layers.Dense(3, activation=None),
        keras.layers.Dense(2, activation=None),
        keras.layers.Dense(1, activation=None),
    ]
)

In [None]:
reg_model.summary()

04) Compile and train

 Compile with appropriate optimizer (Ex- Adam) and loss ('mse').

 Train with chosen epochs, batch_size, validation_split, verbose.

In [None]:
reg_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError()],
)

In [None]:
reg_history = reg_model.fit(
    X_train, y_train, epochs=300, batch_size=32, validation_split=0.2, verbose=1 # originally 0
)

Epoch 1/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 595.1367 - root_mean_squared_error: 24.3954 - val_loss: 580.1139 - val_root_mean_squared_error: 24.0856
Epoch 2/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 580.8494 - root_mean_squared_error: 24.1008 - val_loss: 562.5474 - val_root_mean_squared_error: 23.7181
Epoch 3/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 558.8812 - root_mean_squared_error: 23.6407 - val_loss: 530.3043 - val_root_mean_squared_error: 23.0283
Epoch 4/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 521.2471 - root_mean_squared_error: 22.8308 - val_loss: 479.7634 - val_root_mean_squared_error: 21.9035
Epoch 5/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 471.4679 - root_mean_squared_error: 21.7133 - val_loss: 425.6125 - val_root_mean_squared_error: 20.6304
Epoch 6/1

05) Evaluate regression performance

 Evaluate on the test set and report RMSE (or compute via predictions).

 Additionally compute MSE and MAE; interpret what lower values mean in context

In [None]:
rmse = reg_model.evaluate(X_test, y_test, verbose=0)[1]
print(f"Test RMSE: {rmse:.4f}")

Test RMSE: 5.9177


In [None]:
y_pred = reg_model.predict(X_test).ravel()
print(np.sqrt(mean_squared_error(y_test, y_pred)))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
5.917672269181405
