In [10]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, roc_auc_score
from tensorflow.keras.models import load_model
from joblib import load
import os
import warnings
warnings.filterwarnings('ignore')

# --------------------- Define Paths ---------------------
processed_data_path = '../data/processed/'
models_path = '../models/'

# --------------------- Load Test Data ---------------------
X_test = np.load(os.path.join(processed_data_path, 'X_test.npy'))
y_test = np.load(os.path.join(processed_data_path, 'y_test.npy'))

# --------------------- Load Models ---------------------
log_reg = load(os.path.join(models_path, 'logistic_regression_smote.pkl'))
rf_model = load(os.path.join(models_path, 'random_forest_smote_tuned.pkl'))
xgb_model = load(os.path.join(models_path, 'xgboost_tuned.pkl'))
nn_model = load_model(os.path.join(models_path, 'neural_net_tuned.h5'))

# --------------------- Evaluation Function ---------------------
def evaluate_model(model, model_name, X_test, y_test):
    if model_name == 'Neural Network':
        y_pred_proba = model.predict(X_test)
        y_pred = np.argmax(y_pred_proba, axis=1)
    else:
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)

    # Classification Report
    print(f"\nClassification Report for {model_name}:")
    print(classification_report(y_test, y_pred))

    # ROC-AUC Score
    roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted')
    print(f"ROC-AUC Score for {model_name}: {roc_auc:.4f}")
    return classification_report(y_test, y_pred), roc_auc

# --------------------- Evaluate and Save Results ---------------------
results_file = os.path.join(models_path, 'evaluation_results.txt')
with open(results_file, 'w') as f:
    models = {
        'Logistic Regression': log_reg,
        'Random Forest': rf_model,
        'XGBoost': xgb_model,
        'Neural Network': nn_model
    }

    for name, model in models.items():
        if name == 'Neural Network':
            y_pred_proba = model.predict(X_test)
            y_pred = np.argmax(y_pred_proba, axis=1)
        else:
            y_pred = model.predict(X_test)
            y_pred_proba = model.predict_proba(X_test)

        class_report = classification_report(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted')

        print(f"\n{name} evaluated. Saving results...")
        f.write(f"\nClassification Report for {name}:\n{class_report}\n")
        f.write(f"ROC-AUC Score: {roc_auc:.4f}\n")

print("\nEvaluation complete. Results saved to evaluation_results.txt.")

# --------------------- Use Neural Network for Final Predictions ---------------------
y_pred_proba = nn_model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)

# Map numeric labels to human-readable labels
label_map = {
    0: 'No Diabetes',
    1: 'Yes, no complications',
    2: 'Yes, with complications'
}
y_pred_labels = [label_map[pred] for pred in y_pred]
y_test_labels = [label_map[true] for true in y_test]

# Create a DataFrame with predictions and confidence scores
results_df = pd.DataFrame({
    'True Label': y_test_labels,
    'Predicted Label': y_pred_labels,
    'Confidence Score': [y_pred_proba[i, pred] for i, pred in enumerate(y_pred)]
})

# Display first 10 rows
print("\nQuick Outcome Summary (First 10 Predictions):")
print(results_df.head(10))

# Save results to CSV
output_csv = os.path.join(models_path, 'predictions_outcome.csv')
results_df.to_csv(output_csv, index=False)
print(f"\nFull prediction results saved to {output_csv}")

# --------------------- Optional: Basic Performance Summary ---------------------
print("\nPerformance Summary for Neural Network:")
print(classification_report(y_test, y_pred))
print(f"ROC-AUC Score: {roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted'):.4f}")

print("\nOutcome generated. You can use predictions_outcome.csv for analysis or reporting.")





Logistic Regression evaluated. Saving results...

Random Forest evaluated. Saving results...

XGBoost evaluated. Saving results...
[1m2453/2453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 892us/step

Neural Network evaluated. Saving results...

Evaluation complete. Results saved to evaluation_results.txt.
[1m2453/2453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 556us/step

Quick Outcome Summary (First 10 Predictions):
                True Label          Predicted Label  Confidence Score
0  Yes, with complications  Yes, with complications          0.596485
1              No Diabetes  Yes, with complications          0.511028
2              No Diabetes  Yes, with complications          0.653673
3              No Diabetes              No Diabetes          0.554781
4              No Diabetes              No Diabetes          0.419042
5              No Diabetes  Yes, with complications          0.657048
6  Yes, with complications  Yes, with complications          0.46

In [14]:
# --------------------- Import Libraries ---------------------
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, roc_auc_score
from tensorflow.keras.models import load_model
from joblib import load
import os
import warnings
warnings.filterwarnings('ignore')

# --------------------- Define Paths ---------------------
processed_data_path = '../data/processed/'
models_path = '../models/'

# --------------------- Load Balanced Test Data ---------------------
X_test_balanced = np.load(os.path.join(processed_data_path, 'X_test_balanced.npy'))
y_test_balanced = np.load(os.path.join(processed_data_path, 'y_test_balanced.npy'))

# --------------------- Load Balanced Models ---------------------
log_reg_balanced = load(os.path.join(models_path, 'logistic_regression_balanced.pkl'))
rf_model_balanced = load(os.path.join(models_path, 'random_forest_balanced_tuned.pkl'))
xgb_model_balanced = load(os.path.join(models_path, 'xgboost_balanced_tuned.pkl'))
nn_model_balanced = load_model(os.path.join(models_path, 'neural_net_balanced_tuned.h5'))

# --------------------- Evaluation Function ---------------------
def evaluate_model_balanced(model, model_name, X_test_balanced, y_test_balanced):
    if model_name == 'Neural Network':
        y_pred_proba_balanced = model.predict(X_test_balanced)
        y_pred_balanced = np.argmax(y_pred_proba_balanced, axis=1)
    else:
        y_pred_balanced = model.predict(X_test_balanced)
        y_pred_proba_balanced = model.predict_proba(X_test_balanced)

    print(f"\nClassification Report for {model_name}:")
    print(classification_report(y_test_balanced, y_pred_balanced))

    roc_auc_balanced = roc_auc_score(y_test_balanced, y_pred_proba_balanced, multi_class='ovr', average='weighted')
    print(f"ROC-AUC Score for {model_name}: {roc_auc_balanced:.4f}")
    return classification_report(y_test_balanced, y_pred_balanced), roc_auc_balanced

# --------------------- Evaluate and Save Results ---------------------
results_file_balanced = os.path.join(models_path, 'evaluation_results_balanced.txt')
with open(results_file_balanced, 'w') as f:
    models_balanced = {
        'Logistic Regression': log_reg_balanced,
        'Random Forest': rf_model_balanced,
        'XGBoost': xgb_model_balanced,
        'Neural Network': nn_model_balanced
    }

    for name, model in models_balanced.items():
        if name == 'Neural Network':
            y_pred_proba_balanced = model.predict(X_test_balanced)
            y_pred_balanced = np.argmax(y_pred_proba_balanced, axis=1)
        else:
            y_pred_balanced = model.predict(X_test_balanced)
            y_pred_proba_balanced = model.predict_proba(X_test_balanced)

        class_report_balanced = classification_report(y_test_balanced, y_pred_balanced)
        roc_auc_balanced = roc_auc_score(y_test_balanced, y_pred_proba_balanced, multi_class='ovr', average='weighted')

        print(f"\n{name} evaluated. Saving results...")
        f.write(f"\nClassification Report for {name}:\n{class_report_balanced}\n")
        f.write(f"ROC-AUC Score: {roc_auc_balanced:.4f}\n")

print("\nBalanced evaluation complete. Results saved to evaluation_results_balanced.txt.")

# --------------------- Neural Network Predictions & Outcome Summary ---------------------
y_pred_proba_balanced = nn_model_balanced.predict(X_test_balanced)
y_pred_balanced = np.argmax(y_pred_proba_balanced, axis=1)

# Map numeric labels to human-readable labels
label_map_balanced = {
    0: 'No Diabetes',
    1: 'Yes, no complications',
    2: 'Yes, with complications'
}
y_pred_labels_balanced = [label_map_balanced[pred] for pred in y_pred_balanced]
y_test_labels_balanced = [label_map_balanced[true] for true in y_test_balanced]

# Create DataFrame
results_df_balanced = pd.DataFrame({
    'True Label': y_test_labels_balanced,
    'Predicted Label': y_pred_labels_balanced,
    'Confidence Score': [y_pred_proba_balanced[i, pred] for i, pred in enumerate(y_pred_balanced)]
})

# Show quick summary
print("\nQuick Outcome Summary (First 10 Predictions):")
print(results_df_balanced.head(10))

# Save predictions
output_csv_balanced = os.path.join(models_path, 'predictions_outcome_balanced.csv')
results_df_balanced.to_csv(output_csv_balanced, index=False)
print(f"\nFull prediction results saved to {output_csv_balanced}")

# --------------------- Performance Summary (Neural Network) ---------------------
print("\nPerformance Summary for Neural Network (Balanced):")
print(classification_report(y_test_balanced, y_pred_balanced))
print(f"ROC-AUC Score: {roc_auc_score(y_test_balanced, y_pred_proba_balanced, multi_class='ovr', average='weighted'):.4f}")

print("\nBalanced outcome generated. You can use predictions_outcome_balanced.csv for analysis or reporting.")




ValueError: y should be a 1d array, got an array of shape (21545, 2) instead.