In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, roc_auc_score
from tensorflow.keras.models import load_model
from joblib import load
import os
import warnings
warnings.filterwarnings('ignore')

# Define paths
processed_data_path = '../data/processed/'
models_path = '../models/'

# Load preprocessed data
X_test = np.load(os.path.join(processed_data_path, 'X_test.npy'))
y_test = np.load(os.path.join(processed_data_path, 'y_test.npy'))

# Load models
log_reg = load(os.path.join(models_path, 'logistic_regression_smote.pkl'))
rf_model = load(os.path.join(models_path, 'random_forest_smote_tuned.pkl'))
nn_model = load_model(os.path.join(models_path, 'neural_net_tuned.h5'))

# Function to evaluate model with classification report and ROC-AUC
def evaluate_model(model, model_name, X_test, y_test):
    if model_name == 'Neural Network':
        y_pred_proba = model.predict(X_test)
        y_pred = np.argmax(y_pred_proba, axis=1)
    else:
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)

    # Classification Report
    print(f"\nClassification Report for {model_name}:")
    print(classification_report(y_test, y_pred))

    # ROC-AUC Score
    roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted')
    print(f"ROC-AUC Score for {model_name}: {roc_auc:.4f}")

# Evaluate all models
models = {'Logistic Regression': log_reg, 'Random Forest': rf_model, 'Neural Network': nn_model}
for name, model in models.items():
    evaluate_model(model, name, X_test, y_test)

# Save results to file
with open(os.path.join(models_path, 'evaluation_results.txt'), 'w') as f:
    for name, model in models.items():
        if name == 'Neural Network':
            y_pred_proba = model.predict(X_test)
            y_pred = np.argmax(y_pred_proba, axis=1)
        else:
            y_pred = model.predict(X_test)
            y_pred_proba = model.predict_proba(X_test)
        f.write(f"\nClassification Report for {name}:\n")
        f.write(classification_report(y_test, y_pred))
        f.write(f"ROC-AUC Score: {roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted'):.4f}\n")

print("\nEvaluation complete. Results saved to evaluation_results.txt in models/ directory.")




Classification Report for Logistic Regression:
              precision    recall  f1-score   support

         0.0       0.94      0.63      0.75     65029
         1.0       0.04      0.30      0.07      1974
         2.0       0.33      0.57      0.42     11474

    accuracy                           0.61     78477
   macro avg       0.44      0.50      0.41     78477
weighted avg       0.83      0.61      0.69     78477

ROC-AUC Score for Logistic Regression: 0.7886

Classification Report for Random Forest:
              precision    recall  f1-score   support

         0.0       0.84      0.95      0.89     65029
         1.0       0.03      0.01      0.01      1974
         2.0       0.36      0.15      0.21     11474

    accuracy                           0.81     78477
   macro avg       0.41      0.37      0.37     78477
weighted avg       0.75      0.81      0.77     78477

ROC-AUC Score for Random Forest: 0.7437
[1m2453/2453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2

In [3]:

warnings.filterwarnings('ignore')

# Define paths
processed_data_path = '../data/processed/'
models_path = '../models/'

# Load preprocessed data
X_test = np.load(os.path.join(processed_data_path, 'X_test.npy'))
y_test = np.load(os.path.join(processed_data_path, 'y_test.npy'))

# Load the Neural Network model (best model based on ROC-AUC)
nn_model = load_model(os.path.join(models_path, 'neural_net_tuned.h5'))

# Generate predictions
y_pred_proba = nn_model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)

# Map predictions to labels (assuming 0.0: No Diabetes, 1.0: Yes no complications, 2.0: Yes with complications)
label_map = {0: 'No Diabetes', 1: 'Yes, no complications', 2: 'Yes, with complications'}
y_pred_labels = [label_map[pred] for pred in y_pred]
y_test_labels = [label_map[true] for true in y_test]

# Create a summary DataFrame with predictions and confidence scores
results_df = pd.DataFrame({
    'True Label': y_test_labels,
    'Predicted Label': y_pred_labels,
    'Confidence Score': [y_pred_proba[i, pred] for i, pred in enumerate(y_pred)]
})

# Display the first 10 rows as a quick outcome
print("\nQuick Outcome Summary (First 10 Predictions):")
print(results_df.head(10))

# Save the full results to a CSV file for further use
results_df.to_csv(os.path.join(models_path, 'predictions_outcome.csv'), index=False)
print("\nFull prediction results saved to predictions_outcome.csv in models/ directory.")

# Optional: Basic performance summary
from sklearn.metrics import classification_report, roc_auc_score
print("\nPerformance Summary for Neural Network:")
print(classification_report(y_test, y_pred))
print(f"ROC-AUC Score: {roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted'):.4f}")

print("\nOutcome generated. You can use predictions_outcome.csv for analysis or reporting. Retrain later to improve 1.0 recall if needed.")



[1m2453/2453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 660us/step

Quick Outcome Summary (First 10 Predictions):
                True Label          Predicted Label  Confidence Score
0  Yes, with complications  Yes, with complications          0.486102
1              No Diabetes              No Diabetes          0.516496
2              No Diabetes  Yes, with complications          0.549559
3              No Diabetes              No Diabetes          0.745143
4              No Diabetes              No Diabetes          0.546921
5              No Diabetes  Yes, with complications          0.619402
6  Yes, with complications  Yes, with complications          0.389203
7              No Diabetes              No Diabetes          0.444533
8  Yes, with complications  Yes, with complications          0.787049
9              No Diabetes              No Diabetes          0.411554

Full prediction results saved to predictions_outcome.csv in models/ directory.

Performance Summary for