<a href="https://colab.research.google.com/github/Ijomriba/ICU-MORTALITY-PREDECTION/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ======================================================
# ICU MORTALITY PREDICTION (FINAL WORKING CODE)
# Mortality derived using SOFA score
# ======================================================

# -------- 1. Import Libraries --------
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    roc_auc_score
)

# -------- 2. Load Dataset --------
df = pd.read_csv("icu.csv")

print("Dataset Shape:", df.shape)

# -------- 3. Clean Column Names --------
df.columns = df.columns.str.strip().str.lower()

# -------- 4. CREATE ICU MORTALITY LABEL --------
# Clinical rule:
# SOFA >= 11 → High mortality risk (1)
# SOFA < 11 → Low mortality risk (0)

if 'sofa' not in df.columns:
    raise ValueError("SOFA score not found in dataset")

df['mortality'] = np.where(df['sofa'] >= 11, 1, 0)

print("Mortality label created using SOFA score")
print(df['mortality'].value_counts())

# -------- 5. Features & Target --------
X = df.drop(['mortality'], axis=1)
y = df['mortality']

# Numeric features only
X = X.select_dtypes(include=[np.number])

print("Total numeric features used:", X.shape[1])

# -------- 6. Handle Missing Values --------
imputer = SimpleImputer(strategy='median')
X = imputer.fit_transform(X)

# -------- 7. Feature Scaling --------
scaler = StandardScaler()
X = scaler.fit_transform(X)

# -------- 8. Train-Test Split --------
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

# -------- 9. Logistic Regression --------
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)

y_pred_log = log_model.predict(X_test)
y_prob_log = log_model.predict_proba(X_test)[:, 1]

print("\n--- Logistic Regression Results ---")
print("Accuracy:", accuracy_score(y_test, y_pred_log))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_log))
print(classification_report(y_test, y_pred_log))

# -------- 10. Random Forest --------
rf_model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight='balanced'
)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
y_prob_rf = rf_model.predict_proba(X_test)[:, 1]

print("\n--- Random Forest Results ---")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_rf))
print(classification_report(y_test, y_pred_rf))

# -------- 11. Confusion Matrix --------
print("\nConfusion Matrix (Random Forest):")
print(confusion_matrix(y_test, y_pred_rf))

# -------- 12. Single Patient Prediction --------
sample_patient = X_test[0].reshape(1, -1)
prediction = rf_model.predict(sample_patient)

print("\nSample Patient Prediction:")
if prediction[0] == 1:
    print("High Risk of ICU Mortality")
else:
    print("Low Risk of ICU Mortality")

Dataset Shape: (3600, 120)
Mortality label created using SOFA score
mortality
0    2995
1     605
Name: count, dtype: int64
Total numeric features used: 120
Training samples: 2880
Testing samples: 720

--- Logistic Regression Results ---
Accuracy: 0.9875
ROC-AUC: 0.9987444639136853
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       599
           1       0.97      0.96      0.96       121

    accuracy                           0.99       720
   macro avg       0.98      0.98      0.98       720
weighted avg       0.99      0.99      0.99       720


--- Random Forest Results ---
Accuracy: 0.9958333333333333
ROC-AUC: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       599
           1       1.00      0.98      0.99       121

    accuracy                           1.00       720
   macro avg       1.00      0.99      0.99       720
weighted avg       1.00      1.00      1.00  