**Train Test and Cross Validation**

In [4]:
import pandas as pd
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

data_path = '/workspaces/Final-Year-Project/Cleaned Data/TrainTestData.csv'
df = pd.read_csv(data_path)

df = df.dropna()

X = df.drop('Diabetes Status', axis=1)
y = df['Diabetes Status']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

nb = GaussianNB()

kf = KFold(n_splits=5, shuffle=True, random_state=42)

nb_kfold_scores = cross_val_score(nb, X_scaled, y, cv=kf, scoring='accuracy')

print(f"\nNaïve Bayes (K-fold) Mean Accuracy: {nb_kfold_scores.mean():.4f}")

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

print(f"\nNaïve Bayes (Splitting) Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Naïve Bayes Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))



Naïve Bayes (K-fold) Mean Accuracy: 0.6842

Naïve Bayes (Splitting) Accuracy: 0.6873
Naïve Bayes Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.60      0.62       144
           1       0.72      0.75      0.73       195

    accuracy                           0.69       339
   macro avg       0.68      0.68      0.68       339
weighted avg       0.69      0.69      0.69       339

Confusion Matrix:
[[ 86  58]
 [ 48 147]]


In [5]:
from sklearn.metrics import classification_report, confusion_matrix

valid_path = "/workspaces/Final-Year-Project/Cleaned Data/ValidationData.csv"

df_valid = pd.read_csv(valid_path)
df_valid = df_valid.dropna()

X_valid = df_valid.drop('Diabetes Status', axis=1)
y_valid = df_valid['Diabetes Status']

X_valid_scaled = scaler.transform(X_valid)

print(f"🧪 NB - Validation dataset shape: {X_valid.shape}")

y_pred_valid = nb.predict(X_valid_scaled)

print(f"📌 NB - Validation Classification Report:")
print(classification_report(y_valid, y_pred_valid))

print("🟦 NB - Validation Confusion Matrix:")
print(confusion_matrix(y_valid, y_pred_valid))


🧪 NB - Validation dataset shape: (177, 10)
📌 NB - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.67      0.71        82
           1       0.74      0.82      0.78        95

    accuracy                           0.75       177
   macro avg       0.75      0.75      0.75       177
weighted avg       0.75      0.75      0.75       177

🟦 NB - Validation Confusion Matrix:
[[55 27]
 [17 78]]
