**Train Test and Cross Validation**

In [2]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report, confusion_matrix

data_path = '/workspaces/Final-Year-Project/Cleaned Data/TrainTestData.csv'
df = pd.read_csv(data_path)

print(f"üîç AB - Original dataset shape: {df.shape}")

df = df.dropna()

print(f"üßπ AB - After dropping NaNs: {df.shape}")

X = df.drop('Diabetes Status', axis=1)
y = df['Diabetes Status']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(f"üìä AB - Final dataset shape after scaling: {X_scaled.shape}")

ab = AdaBoostClassifier(n_estimators=100, random_state=42)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

ab_kfold_scores = cross_val_score(ab, X_scaled, y, cv=kf, scoring='accuracy')

ab.fit(X_scaled, y)
y_pred = ab.predict(X_scaled)

print(f"‚úÖ AB - K-Fold Mean Accuracy: {ab_kfold_scores.mean():.4f}")
print("üìå AB Classification Report:")
print(classification_report(y, y_pred))
print("üü¶ AB Confusion Matrix:")
print(confusion_matrix(y, y_pred))


üîç AB - Original dataset shape: (2453, 11)
üßπ AB - After dropping NaNs: (1691, 11)
üìä AB - Final dataset shape after scaling: (1691, 10)
‚úÖ AB - K-Fold Mean Accuracy: 0.6842
üìå AB Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.58      0.62       757
           1       0.70      0.77      0.73       934

    accuracy                           0.69      1691
   macro avg       0.69      0.68      0.68      1691
weighted avg       0.69      0.69      0.68      1691

üü¶ AB Confusion Matrix:
[[440 317]
 [211 723]]


In [3]:
from sklearn.metrics import classification_report, confusion_matrix

valid_path = "/workspaces/Final-Year-Project/Cleaned Data/ValidationData.csv"

df_valid = pd.read_csv(valid_path)
df_valid = df_valid.dropna()

X_valid = df_valid.drop('Diabetes Status', axis=1)
y_valid = df_valid['Diabetes Status']

X_valid_scaled = scaler.transform(X_valid)

print(f"üß™ AB - Validation dataset shape: {X_valid.shape}")

y_pred_valid = ab.predict(X_valid_scaled)

print(f"üìå AB - Validation Classification Report:")
print(classification_report(y_valid, y_pred_valid))

print("üü¶ AB - Validation Confusion Matrix:")
print(confusion_matrix(y_valid, y_pred_valid))


üß™ AB - Validation dataset shape: (177, 10)
üìå AB - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.67      0.71        82
           1       0.74      0.80      0.77        95

    accuracy                           0.74       177
   macro avg       0.74      0.74      0.74       177
weighted avg       0.74      0.74      0.74       177

üü¶ AB - Validation Confusion Matrix:
[[55 27]
 [19 76]]
