Import the libraries

In [1]:
import pandas as pd  # data handling (DataFrames)
import numpy as np  # numerical operations
from sklearn.model_selection import train_test_split  # split dataset into train & test sets
from sklearn.preprocessing import StandardScaler  # feature scaling/normalization
from sklearn.metrics import mean_squared_error  # evaluate regression models
import tensorflow as tf  # deep learning library
from tensorflow import keras  # build neural network models

###Part A

Import data

In [2]:
data = pd.read_csv("diabetes.csv")  # load dataset from CSV file into a pandas DataFrame
X = data.iloc[:, :8].values  # select first 8 columns as features (independent variables)
y = data.iloc[:, 8].values   # select 9th column as target (dependent variable)

Training and testing data

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=0, stratify=y)  # split data into 80% train & 20% test, keeping class distribution balanced

Standardizing

In [4]:
scaler = StandardScaler()  # create StandardScaler object for feature normalization (zero mean, unit variance)
X_train = scaler.fit_transform(X_train)  # fit scaler on training data & apply scaling
X_test = scaler.transform(X_test)  # apply same scaling parameters to test data (no refit to avoid data leakage)

Model architecture (Dropout)

In [5]:
model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),  # input layer with number of features as input shape
    keras.layers.Dense(5, activation="relu"),  # hidden layer with 5 neurons, ReLU activation
    keras.layers.Dropout(0.3),  # dropout layer (30% neurons randomly dropped to prevent overfitting)
    keras.layers.Dense(3, activation="relu"),  # hidden layer with 3 neurons, ReLU activation
    keras.layers.Dropout(0.2),  # dropout layer (20% neurons randomly dropped)
    keras.layers.Dense(1, activation="sigmoid")  # output layer (1 neuron, sigmoid for binary classification)
])

Model architecture (Batch Normalization)

In [6]:
from tensorflow.keras.layers import BatchNormalization  # import BatchNormalization layer

model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),  # input layer with feature size
    keras.layers.Dense(5, activation="relu"),  # hidden layer with 5 neurons, ReLU activation
    BatchNormalization(),  # normalize outputs of previous layer (helps stability & faster training)
    keras.layers.Dense(3, activation="relu"),  # hidden layer with 3 neurons, ReLU activation
    BatchNormalization(),  # normalize outputs of this hidden layer
    keras.layers.Dense(1, activation="sigmoid")  # output layer for binary classification
])

Compiling the model

In [7]:
model.compile(
    optimizer="adam",  # Adam optimizer (adaptive learning rate)
    loss="binary_crossentropy",  # loss function for binary classification
    metrics=["accuracy"]  # track accuracy during training/testing
)

Training

In [8]:
history = model.fit(
    X_train, y_train,        # training data (features and labels)
    epochs=100,              # number of times the model sees the whole dataset
    batch_size=32,           # number of samples per gradient update
    validation_split=0.2,    # use 20% of training data for validation
    verbose=1                # show progress output during training
)

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.6316 - loss: 0.7161 - val_accuracy: 0.4878 - val_loss: 0.6964
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6021 - loss: 0.7352 - val_accuracy: 0.4878 - val_loss: 0.6957
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5844 - loss: 0.7713 - val_accuracy: 0.5041 - val_loss: 0.6921
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6363 - loss: 0.6888 - val_accuracy: 0.5366 - val_loss: 0.6876
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6477 - loss: 0.6657 - val_accuracy: 0.5691 - val_loss: 0.6825
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6477 - loss: 0.6671 - val_accuracy: 0.5854 - val_loss: 0.6787
Epoch 7/100
[1m16/16[0m [32m━━

Training (Early Stopping)

In [9]:
from tensorflow.keras.callbacks import EarlyStopping  # import EarlyStopping callback

early_stop = EarlyStopping(
    monitor="val_loss",       # watch validation loss to decide when to stop
    patience=10,              # stop if no improvement for 10 consecutive epochs
    restore_best_weights=True # roll back to the best model weights
)

In [10]:
story = model.fit(
    X_train, y_train,         # training features and labels
    epochs=100,               # maximum number of training iterations
    batch_size=32,            # samples processed before model update
    validation_split=0.2,     # 20% of training data used for validation
    verbose=1,                # show training progress
    callbacks=[early_stop]    # stop early if val_loss doesn't improve (patience=10)
)

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8085 - loss: 0.4385 - val_accuracy: 0.7317 - val_loss: 0.5721
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7982 - loss: 0.4466 - val_accuracy: 0.7317 - val_loss: 0.5722
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8049 - loss: 0.4472 - val_accuracy: 0.7236 - val_loss: 0.5723
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7788 - loss: 0.4691 - val_accuracy: 0.7317 - val_loss: 0.5734
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7867 - loss: 0.4486 - val_accuracy: 0.7236 - val_loss: 0.5731
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7821 - loss: 0.4450 - val_accuracy: 0.7317 - val_loss: 0.5719
Epoch 7/100
[1m16/16[0m [32m━━

Validating

In [11]:
loss, acc = model.evaluate(X_test, y_test, verbose=0)  # evaluate model on test data (loss & accuracy)
print(f"Test Accuracy: {acc:.4f}")  # print test accuracy with 4 decimal places

Test Accuracy: 0.7987
