In [4]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.utils import class_weight
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import files

file_path = '/content/preprocessed_data.csv'
df = pd.read_csv(file_path)

print(f"Successfully loaded '{file_path}'")


# ---  Prepare Data for the Model ---
# Separate features (X) and the target variable (y)
X = df.drop('Diabetes', axis=1)
y = df['Diabetes']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"\nData split into training and testing sets:")
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

# ---  Calculate Class Weights to Handle Imbalance ---
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = {i : class_weights[i] for i in range(len(class_weights))}
print(f"\nCalculated Class Weights to handle imbalance: {class_weights_dict}")


# --- Build the Neural Network Model with Dropout ---

model = tf.keras.models.Sequential([
    # Dense layer with 12 neurons, 'relu' activation.
    tf.keras.layers.Dense(12, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),  # --- ADDED DROPOUT ---

    # Hidden layer: Another Dense layer with 8 neurons.
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dropout(0.3),  # --- ADDED DROPOUT ---

    # Output layer: A single neuron with a 'sigmoid' activation for binary classification.
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# --- Compile the Model ---
# We configure the model for training.
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print a summary of the model's architecture
print("\nModel Architecture:")
model.summary()


#  --- Define EarlyStopping Callback ---

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=15,  # Increased patience slightly
    restore_best_weights=True
)

# --- Train the Model ---
# We now add class_weight and the early_stopping callback to the fit method.
# We can also increase the number of epochs, as EarlyStopping will find the
# optimal point to stop training.
print("\nStarting model training with improvements...")
history = model.fit(X_train, y_train,
                    epochs=200,                         # Increased epochs
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    class_weight=class_weights_dict,    # --- ADDED ---
                    callbacks=[early_stopping],         # --- ADDED ---
                    verbose=1)

# --- 9. Evaluate the Model ---
print("\nEvaluating final model performance on the test set...")
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {accuracy * 100:.2f}%")
print(f"Test Loss: {loss:.4f}")


Successfully loaded '/content/preprocessed_data.csv'

Data split into training and testing sets:
X_train shape: (240, 7)
X_test shape: (60, 7)
y_train shape: (240,)
y_test shape: (60,)

Calculated Class Weights to handle imbalance: {0: np.float64(1.0), 1: np.float64(1.0)}

Model Architecture:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Starting model training with improvements...
Epoch 1/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step - accuracy: 0.4943 - loss: 0.7014 - val_accuracy: 0.5167 - val_loss: 0.6983
Epoch 2/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.5755 - loss: 0.6841 - val_accuracy: 0.5167 - val_loss: 0.6956
Epoch 3/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.5570 - loss: 0.7010 - val_accuracy: 0.5667 - val_loss: 0.6932
Epoch 4/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5405 - loss: 0.6902 - val_accuracy: 0.5667 - val_loss: 0.6909
Epoch 5/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5311 - loss: 0.6892 - val_accuracy: 0.5667 - val_loss: 0.6890
Epoch 6/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5798 - loss: 0.6802 - val_accuracy: 0.5500 - val_loss: 0