In [95]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
data_path = '/workspaces/Final-Year-Project/Cleaned Data/TrainTestData.csv'
df = pd.read_csv(data_path)

print(f"🔍 NN - Original dataset shape: {df.shape}")

# Drop rows with NaN in any feature column
df = df.dropna()

print(f"🧹 NN - After dropping NaNs: {df.shape}")

# Define features and target
X = df.drop('Diabetes Status', axis=1)
y = df['Diabetes Status']

# Standardize the features (important for NN)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(f"📊 NN - Final dataset shape after scaling: {X_scaled.shape}")

# Initialize the Neural Network model with optimized hyperparameters
nn = MLPClassifier(
    hidden_layer_sizes=(20, 20, 15, 10),  # Gradual reduction in neuron count
    activation='relu',
    solver='adam',
    alpha=0.001,  # L2 regularization to prevent overfitting
    max_iter=5000,  # Allow more iterations for convergence
    random_state=42
)

# Set up K-Fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation using accuracy as the metric
nn_kfold_scores = cross_val_score(nn, X_scaled, y, cv=kf, scoring='accuracy')

# Fit the model on the full dataset for classification report & confusion matrix
nn.fit(X_scaled, y)
y_pred = nn.predict(X_scaled)

# Print results in the same format as previous models
print(f"✅ NN - K-Fold Mean Accuracy: {nn_kfold_scores.mean():.4f}")
print("📌 NN Classification Report:")
print(classification_report(y, y_pred))
print("🟦 NN Confusion Matrix:")
print(confusion_matrix(y, y_pred))


🔍 NN - Original dataset shape: (4435, 10)
🧹 NN - After dropping NaNs: (3003, 10)
📊 NN - Final dataset shape after scaling: (3003, 9)
✅ NN - K-Fold Mean Accuracy: 0.6770
📌 NN Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.65      0.68      1339
           1       0.74      0.79      0.76      1664

    accuracy                           0.73      3003
   macro avg       0.72      0.72      0.72      3003
weighted avg       0.73      0.73      0.72      3003

🟦 NN Confusion Matrix:
[[ 869  470]
 [ 351 1313]]


In [96]:
from sklearn.metrics import classification_report, confusion_matrix

# File path for validation data
valid_path = "/workspaces/Final-Year-Project/Cleaned Data/ValidationData.csv"

### 🎯 VALIDATION PHASE ###
# Load validation data
df_valid = pd.read_csv(valid_path)
df_valid = df_valid.dropna()

# Define features and target
X_valid = df_valid.drop('Diabetes Status', axis=1)
y_valid = df_valid['Diabetes Status']

# Standardize validation data (using the same scaler from training)
X_valid_scaled = scaler.transform(X_valid)

print(f"🧪 NN - Validation dataset shape: {X_valid.shape}")

# Evaluate on validation set
y_pred_valid = nn.predict(X_valid_scaled)

# Classification Report
print(f"📌 NN - Validation Classification Report:")
print(classification_report(y_valid, y_pred_valid))

# Confusion Matrix
print("🟦 NN - Validation Confusion Matrix:")
print(confusion_matrix(y_valid, y_pred_valid))


🧪 NN - Validation dataset shape: (335, 9)
📌 NN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.63      0.65       145
           1       0.73      0.76      0.75       190

    accuracy                           0.70       335
   macro avg       0.70      0.70      0.70       335
weighted avg       0.70      0.70      0.70       335

🟦 NN - Validation Confusion Matrix:
[[ 91  54]
 [ 45 145]]


15, 15, 10, 5

🧪 NN - Validation dataset shape: (335, 9)
📌 NN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.63      0.68       145
           1       0.75      0.83      0.79       190

    accuracy                           0.74       335
   macro avg       0.74      0.73      0.73       335
weighted avg       0.74      0.74      0.74       335

🟦 NN - Validation Confusion Matrix:
[[ 91  54]
 [ 32 158]]

7, 6, 6

🧪 NN - Validation dataset shape: (335, 9)
📌 NN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.63      0.68       145
           1       0.74      0.83      0.78       190

    accuracy                           0.74       335
   macro avg       0.74      0.73      0.73       335
weighted avg       0.74      0.74      0.74       335

🟦 NN - Validation Confusion Matrix:
[[ 91  54]
 [ 33 157]]

 9, 9 ,7

 🧪 NN - Validation dataset shape: (335, 9)
📌 NN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.63      0.68       145
           1       0.74      0.83      0.78       190

    accuracy                           0.74       335
   macro avg       0.74      0.73      0.73       335
weighted avg       0.74      0.74      0.74       335

🟦 NN - Validation Confusion Matrix:
[[ 91  54]
 [ 33 157]]

 7,7,6

 🧪 NN - Validation dataset shape: (335, 9)
📌 NN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.59      0.65       145
           1       0.72      0.82      0.77       190

    accuracy                           0.72       335
   macro avg       0.72      0.70      0.71       335
weighted avg       0.72      0.72      0.72       335

🟦 NN - Validation Confusion Matrix:
[[ 86  59]
 [ 35 155]]