In [1]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
data_path = '/workspaces/Final-Year-Project/Cleaned Data/TrainTestData.csv'
df = pd.read_csv(data_path)

print(f"🔍 AB - Original dataset shape: {df.shape}")

# Drop rows with NaN in any feature column
df = df.dropna()

print(f"🧹 AB - After dropping NaNs: {df.shape}")

# Define features and target
X = df.drop('Diabetes Status', axis=1)
y = df['Diabetes Status']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(f"📊 AB - Final dataset shape after scaling: {X_scaled.shape}")

# Initialize the AdaBoost model
ab = AdaBoostClassifier(n_estimators=100, random_state=42)

# Set up K-Fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation using accuracy as the metric
ab_kfold_scores = cross_val_score(ab, X_scaled, y, cv=kf, scoring='accuracy')

# Fit the model on the full dataset for classification report & confusion matrix
ab.fit(X_scaled, y)
y_pred = ab.predict(X_scaled)

# Print results in the same format as previous models
print(f"✅ AB - K-Fold Mean Accuracy: {ab_kfold_scores.mean():.4f}")
print("📌 AB Classification Report:")
print(classification_report(y, y_pred))
print("🟦 AB Confusion Matrix:")
print(confusion_matrix(y, y_pred))


🔍 AB - Original dataset shape: (4242, 13)
🧹 AB - After dropping NaNs: (948, 13)
📊 AB - Final dataset shape after scaling: (948, 12)
✅ AB - K-Fold Mean Accuracy: 0.9019
📌 AB Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.99      0.90       380
           1       0.99      0.86      0.92       568

    accuracy                           0.91       948
   macro avg       0.91      0.92      0.91       948
weighted avg       0.92      0.91      0.91       948

🟦 AB Confusion Matrix:
[[375   5]
 [ 82 486]]


In [2]:
from sklearn.metrics import classification_report, confusion_matrix

# File path for validation data
valid_path = "/workspaces/Final-Year-Project/Cleaned Data/ValidationData.csv"

### 🎯 VALIDATION PHASE ###
# Load validation data
df_valid = pd.read_csv(valid_path)
df_valid = df_valid.dropna()

# Define features and target
X_valid = df_valid.drop('Diabetes Status', axis=1)
y_valid = df_valid['Diabetes Status']

# Standardize validation data (using the same scaler from training)
X_valid_scaled = scaler.transform(X_valid)

print(f"🧪 AB - Validation dataset shape: {X_valid.shape}")

# Evaluate on validation set
y_pred_valid = ab.predict(X_valid_scaled)

# Classification Report
print(f"📌 AB - Validation Classification Report:")
print(classification_report(y_valid, y_pred_valid))

# Confusion Matrix
print("🟦 AB - Validation Confusion Matrix:")
print(confusion_matrix(y_valid, y_pred_valid))


🧪 AB - Validation dataset shape: (124, 12)
📌 AB - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.98      0.85        45
           1       0.98      0.82      0.90        79

    accuracy                           0.88       124
   macro avg       0.87      0.90      0.88       124
weighted avg       0.90      0.88      0.88       124

🟦 AB - Validation Confusion Matrix:
[[44  1]
 [14 65]]
