# Fetal Health Prediction System


In [None]:
# ----------------------------------------------
# IMPORTS
# ----------------------------------------------
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import matthews_corrcoef, roc_auc_score
from sklearn.metrics import classification_report

# ML Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings("ignore")

# ----------------------------------------------
# LOAD DATA
# ----------------------------------------------
df = pd.read_csv("fetal_health.csv")

# Features & Target
X = df.drop("fetal_health", axis=1)
y = df["fetal_health"] - 1 # Adjust target variable to be 0-indexed (0, 1, 2)

# ----------------------------------------------
# TRAIN-TEST SPLIT
# ----------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ----------------------------------------------
# SCALING
# ----------------------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----------------------------------------------
# DEFINE MODELS
# ----------------------------------------------
models = {
    "Logistic Regression": LogisticRegression(max_iter=200, multi_class="auto"),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=7),
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=200, random_state=42),
    "XGBoost": XGBClassifier(
        objective='multi:softprob',
        eval_metric='mlogloss',
        num_class=3,
        n_estimators=200,
        learning_rate=0.1
    )
}

# ----------------------------------------------
# EVALUATION FUNCTION
# ----------------------------------------------
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)

    metrics = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='weighted'),
        "Recall": recall_score(y_test, y_pred, average='weighted'),
        "F1 Score": f1_score(y_test, y_pred, average='weighted'),
        "MCC": matthews_corrcoef(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_prob, multi_class='ovo')
    }
    return metrics

# ----------------------------------------------
# TRAIN, EVALUATE, SAVE MODELS
# ----------------------------------------------
results = {}

for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_train_scaled, y_train)
    metrics = evaluate_model(model, X_test_scaled, y_test)
    results[name] = metrics

    # save model
    import joblib
    joblib.dump(model, f"{name.replace(' ', '_')}.pkl")

# ----------------------------------------------
# RESULTS TABLE
# ----------------------------------------------
results_df = pd.DataFrame(results).T
print("\n\n=== MODEL COMPARISON TABLE ===\n")
print(results_df)

# ALSO SHOW CLASSIFICATION REPORT FOR BEST MODEL (usually XGBoost)
best_model = "XGBoost"
print("\n\n=== CLASSIFICATION REPORT: XGBOOST ===\n")
print(classification_report(y_test, models[best_model].predict(X_test_scaled)))