In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import numpy as np
import joblib

# Loading preprocessed data
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv').values.ravel()

# Loading models
lr_model = joblib.load('logistic_regression_model.pkl')
dt_model = joblib.load('decision_tree_model.pkl')
rf_model = joblib.load('random_forest_model.pkl')

# Loading metrics
with open('logistic_regression_metrics.txt', 'r') as f:
    lr_metrics = {line.split(': ')[0]: float(line.split(': ')[1]) for line in f.read().splitlines()}
with open('decision_tree_metrics.txt', 'r') as f:
    dt_metrics = {line.split(': ')[0]: float(line.split(': ')[1]) for line in f.read().splitlines()}
with open('random_forest_metrics.txt', 'r') as f:
    rf_metrics = {line.split(': ')[0]: float(line.split(': ')[1]) for line in f.read().splitlines()}

# Creating comparison table
metrics_df = pd.DataFrame({
    'Logistic Regression': lr_metrics,
    'Decision Tree': dt_metrics,
    'Random Forest': rf_metrics
}).T
print("Model Comparison:\n", metrics_df)
metrics_df.to_csv('model_comparison.csv')

# Plotting comparison
metrics_df.plot(kind='bar', figsize=(10, 6))
plt.title('Model Performance Comparison')
plt.ylabel('Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('model_comparison_plot.png')
plt.close()

# Plotting combined ROC curves
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])
models = {
    'Logistic Regression': lr_model,
    'Decision Tree': dt_model,
    'Random Forest': rf_model
}
plt.figure(figsize=(10, 8))
for name, model in models.items():
    y_score = model.predict_proba(X_test)
    for i in range(3):
        fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_score[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, label=f'{name} (class {i}, AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.legend(loc='lower right')
plt.savefig('roc_comparison.png')
plt.close()

# Practical implications for report
with open('practical_implications.txt', 'w') as f:
    f.write("Practical Implications for Maternal and Child Health in Northwest Cameroon:\n")
    f.write("- Random Forest is recommended due to its high accuracy and robustness.\n")
    f.write("- Focus on key predictors (see feature_importance.csv) like birth weight and gestational age.\n")
    f.write("- Deploy the model in clinics to identify high-risk pregnancies early.\n")
    f.write("- Use predictions to allocate resources and prioritize prenatal care for high-risk cases.\n")

print("Model evaluation and comparison completed.")

Model Comparison:
                      Accuracy  Precision  Recall  F1-Score
Logistic Regression    0.5965     0.3558  0.5965    0.4457
Decision Tree          0.4498     0.4556  0.4498    0.4526
Random Forest          0.5951     0.3555  0.5951    0.4451
Model evaluation and comparison completed.
