In [8]:
# deep_mlp_xgb_ensemble_complete.py
# Complete pipeline: Patient Treatment Classification with Deep MLP + XGBoost + RF Ensemble

import os
import json
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report
)
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier, RandomForestClassifier

import xgboost as xgb
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

In [9]:
"C:\PERSONAL\PATIENT TEST CLASSIFICATION\DATASET.csv"
DATA_PATH = r"C:\PERSONAL\PATIENT TREATMENT CLASSIFICATION\DATASET.csv"# change to your downloaded CSV path
ARTIFACT_DIR = r"C:\PERSONAL\PATIENT TREATMENT CLASSIFICATION/artifacts_patient_treatment"

RANDOM_STATE = 42
TEST_SIZE = 0.2
VAL_SIZE = 0.2  # from remaining train

os.makedirs(ARTIFACT_DIR, exist_ok=True)

  "C:\PERSONAL\PATIENT TEST CLASSIFICATION\DATASET.csv"


In [10]:
# ===================================================================
# 1. LOAD DATA
# ===================================================================
print("="*70)
print("PATIENT TREATMENT CLASSIFICATION - ENSEMBLE PIPELINE")
print("="*70)

df = pd.read_csv(DATA_PATH)
print(f"\nLoaded dataset: {df.shape[0]} rows, {df.shape[1]} columns")
print(f"Columns: {df.columns.tolist()}")

df.to_csv(os.path.join(ARTIFACT_DIR, "raw_data_snapshot.csv"), index=False)


PATIENT TREATMENT CLASSIFICATION - ENSEMBLE PIPELINE

Loaded dataset: 4412 rows, 11 columns
Columns: ['HAEMATOCRIT', 'HAEMOGLOBINS', 'ERYTHROCYTE', 'LEUCOCYTE', 'THROMBOCYTE', 'MCH', 'MCHC', 'MCV', 'AGE', 'SEX', 'SOURCE']


In [11]:
# ===================================================================
# 2. EDA AND PLOTS
# ===================================================================

print("EXPLORATORY DATA ANALYSIS")

TARGET_COL="SOURCE"

with open(os.path.join(ARTIFACT_DIR, "data_info.txt"), "w") as f:
    f.write("DATAFRAME INFO\n")
    df.info(buf=f)
    f.write("\n\nDESCRIBE (NUMERIC)\n")
    f.write(df.describe(include=[np.number]).to_string())
    f.write("\n\nDESCRIBE (ALL)\n")
    f.write(df.describe(include="all").to_string())

# Class distribution
print(f"\nTarget column: {TARGET_COL}")
print("Class distribution:")
print(df[TARGET_COL].value_counts())

plt.figure(figsize=(6, 4))
df[TARGET_COL].value_counts().plot(kind="bar")
plt.title("Class Distribution")
plt.xlabel("Class")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "class_distribution.png"))
plt.close()

# Correlation heatmap
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
if len(numeric_cols) > 1:
    plt.figure(figsize=(12, 10))
    corr = df[numeric_cols].corr()
    sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", square=True)
    plt.title("Feature Correlation Heatmap")
    plt.tight_layout()
    plt.savefig(os.path.join(ARTIFACT_DIR, "corr_heatmap.png"), dpi=150)
    plt.close()

# Histograms (first 10 numeric features)
for col in numeric_cols[:10]:
    plt.figure(figsize=(6, 4))
    sns.histplot(df[col].dropna(), kde=True, bins=30)
    plt.title(f"Distribution of {col}")
    plt.tight_layout()
    plt.savefig(os.path.join(ARTIFACT_DIR, f"hist_{col}.png"))
    plt.close()

EXPLORATORY DATA ANALYSIS

Target column: SOURCE
Class distribution:
SOURCE
out    2628
in     1784
Name: count, dtype: int64


In [12]:
# ===================================================================
# 3. TRAIN/VAL/TEST SPLIT WITH LABEL ENCODING
# ===================================================================
print("\n" + "="*70)
print("DATA SPLITTING")
print("="*70)

X = df.drop(columns=[TARGET_COL])
y = df[TARGET_COL]

# Encode target if string
if y.dtype == "O" or isinstance(y.iloc[0], str):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    class_names = le.classes_.tolist()
    print(f"Target classes: {class_names} -> encoded as {list(range(len(class_names)))}")
    joblib.dump(le, os.path.join(ARTIFACT_DIR, "label_encoder.joblib"))
else:
    y_encoded = y.values
    class_names = sorted(y.unique().tolist())
    le = None

X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y_encoded, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y_encoded
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=VAL_SIZE,
    random_state=RANDOM_STATE, stratify=y_train_full
)

print(f"\nTrain: {len(y_train)}, Val: {len(y_val)}, Test: {len(y_test)}")

# Save splits
for split_name, X_split, y_split in [
    ("train", X_train, y_train),
    ("val", X_val, y_val),
    ("test", X_test, y_test),
]:
    split_df = X_split.copy()
    split_df[TARGET_COL] = y_split
    split_df.to_csv(os.path.join(ARTIFACT_DIR, f"{split_name}_split.csv"), index=False)


DATA SPLITTING
Target classes: ['in', 'out'] -> encoded as [0, 1]

Train: 2823, Val: 706, Test: 883


In [13]:
# ===================================================================
# 4. PREPROCESSING PIPELINE
# ===================================================================
numeric_features = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_features = X.select_dtypes(exclude=[np.number]).columns.tolist()

print(f"\nNumeric features: {len(numeric_features)}")
print(f"Categorical features: {len(categorical_features)}")

numeric_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler())
    ]
)

categorical_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot",
         __import__("sklearn.preprocessing").preprocessing.OneHotEncoder(
             handle_unknown="ignore"
         ))
    ]
)

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)



Numeric features: 9
Categorical features: 1


In [14]:
# ===================================================================
# 5. BASE MODELS WITH CLASS IMBALANCE HANDLING
# ===================================================================
print("\n" + "="*70)
print("MODEL ARCHITECTURE")
print("="*70)

# Check class distribution
unique, counts = np.unique(y_train, return_counts=True)
print("\nTraining set class distribution:")
for u, c in zip(unique, counts):
    print(f"  Class '{class_names[u]}': {c} samples ({c/len(y_train)*100:.1f}%)")

# Deep MLP
mlp = MLPClassifier(
    hidden_layer_sizes=(512, 256, 128, 64, 32),
    activation="relu",
    solver="adam",
    alpha=5e-3,
    batch_size=128,
    learning_rate="adaptive",
    learning_rate_init=1e-4,
    max_iter=600,
    early_stopping=True,
    n_iter_no_change=40,
    validation_fraction=0.2,
    random_state=RANDOM_STATE,
    verbose=False
)

# XGBoost with scale_pos_weight
n_neg = counts[0]
n_pos = counts[1]
scale_weight = n_neg / n_pos if n_pos > 0 else 1.0
print(f"\nXGBoost scale_pos_weight: {scale_weight:.2f}")

xgb_clf = xgb.XGBClassifier(
    objective="binary:logistic",
    eval_metric="auc",
    n_estimators=500,
    max_depth=4,
    learning_rate=0.03,
    subsample=0.7,
    colsample_bytree=0.7,
    min_child_weight=3,
    gamma=0.1,
    reg_alpha=0.1,
    reg_lambda=1.0,
    scale_pos_weight=scale_weight,
    random_state=RANDOM_STATE,
    n_jobs=-1,
    verbosity=0
)

# Random Forest
rf_clf = RandomForestClassifier(
    n_estimators=300,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features="sqrt",
    class_weight="balanced",
    random_state=RANDOM_STATE,
    n_jobs=-1,
    verbose=0
)

print("\nEnsemble models:")
print("  1. Deep MLP: 5 layers (512, 256, 128, 64, 32)")
print("  2. XGBoost: 500 trees, max_depth=4")
print("  3. Random Forest: 300 trees, balanced weights")


MODEL ARCHITECTURE

Training set class distribution:
  Class 'in': 1142 samples (40.5%)
  Class 'out': 1681 samples (59.5%)

XGBoost scale_pos_weight: 0.68

Ensemble models:
  1. Deep MLP: 5 layers (512, 256, 128, 64, 32)
  2. XGBoost: 500 trees, max_depth=4
  3. Random Forest: 300 trees, balanced weights


In [15]:
# ===================================================================
# 6. ENSEMBLE WITH SMOTE
# ===================================================================
voting_clf = VotingClassifier(
    estimators=[
        ("deep_mlp", mlp),
        ("xgboost", xgb_clf),
        ("random_forest", rf_clf)
    ],
    voting="soft",
    weights=[1.5, 2.0, 1.0],
    n_jobs=-1
)

ensemble_pipeline = ImbPipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("smote", SMOTE(random_state=RANDOM_STATE, k_neighbors=5)),
        ("ensemble", voting_clf)
    ]
)


In [16]:
# ===================================================================
# 7. TRAINING
# ===================================================================
print("\n" + "="*70)
print("TRAINING")
print("="*70)
print("\nTraining ensemble with SMOTE oversampling...")

ensemble_pipeline.fit(X_train, y_train)

joblib.dump(ensemble_pipeline, os.path.join(ARTIFACT_DIR, "ensemble_pipeline.joblib"))
print("✓ Model saved")


TRAINING

Training ensemble with SMOTE oversampling...
✓ Model saved


In [17]:
# ===================================================================
# 8. EVALUATION FUNCTION
# ===================================================================
def evaluate_split(name, model, X, y_true):
    y_pred = model.predict(X)
    try:
        y_proba = model.predict_proba(X)[:, 1]
        auc = roc_auc_score(y_true, y_proba)
    except Exception:
        y_proba = None
        auc = None

    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=class_names,
                                   output_dict=True, zero_division=0)

    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    metrics = {
        "split": name,
        "accuracy": float(acc),
        "precision": float(prec),
        "recall": float(rec),
        "f1": float(f1),
        "roc_auc": float(auc) if auc is not None else None,
        "sensitivity": float(sensitivity),
        "specificity": float(specificity),
        "false_positives": int(fp),
        "false_negatives": int(fn),
        "confusion_matrix": cm.tolist(),
        "classification_report": report,
    }

    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f"Confusion Matrix - {name}")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(ARTIFACT_DIR, f"cm_{name}.png"))
    plt.close()

    print(f"\n{name} Metrics:")
    print(f"  Accuracy : {acc:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  F1       : {f1:.4f}")
    if auc:
        print(f"  ROC AUC  : {auc:.4f}")
    print(f"  Sensitivity ('{class_names[1]}'): {sensitivity:.4f}")
    print(f"  Specificity ('{class_names[0]}'): {specificity:.4f}")
    print(f"  False Positives: {fp}, False Negatives: {fn}")
    print("  Confusion Matrix:")
    print(f"  {class_names}")
    print(cm)

    return metrics

print("\n" + "="*70)
print("INITIAL EVALUATION")
print("="*70)

metrics_train = evaluate_split("TRAIN", ensemble_pipeline, X_train, y_train)
metrics_val = evaluate_split("VAL", ensemble_pipeline, X_val, y_val)
metrics_test = evaluate_split("TEST", ensemble_pipeline, X_test, y_test)


INITIAL EVALUATION

TRAIN Metrics:
  Accuracy : 0.8282
  Precision: 0.8655
  Recall   : 0.8424
  F1       : 0.8538
  ROC AUC  : 0.9131
  Sensitivity ('out'): 0.8424
  Specificity ('in'): 0.8074
  False Positives: 220, False Negatives: 265
  Confusion Matrix:
  ['in', 'out']
[[ 922  220]
 [ 265 1416]]

VAL Metrics:
  Accuracy : 0.7465
  Precision: 0.8025
  Recall   : 0.7625
  F1       : 0.7820
  ROC AUC  : 0.7961
  Sensitivity ('out'): 0.7625
  Specificity ('in'): 0.7228
  False Positives: 79, False Negatives: 100
  Confusion Matrix:
  ['in', 'out']
[[206  79]
 [100 321]]

TEST Metrics:
  Accuracy : 0.7271
  Precision: 0.7902
  Recall   : 0.7376
  F1       : 0.7630
  ROC AUC  : 0.8091
  Sensitivity ('out'): 0.7376
  Specificity ('in'): 0.7115
  False Positives: 103, False Negatives: 138
  Confusion Matrix:
  ['in', 'out']
[[254 103]
 [138 388]]


In [18]:
# ===================================================================
# 9. THRESHOLD OPTIMIZATION
# ===================================================================
print("\n" + "="*70)
print("THRESHOLD OPTIMIZATION")
print("="*70)

y_val_proba = ensemble_pipeline.predict_proba(X_val)[:, 1]

thresholds = np.arange(0.3, 0.8, 0.05)
best_threshold = 0.5
best_f1 = 0.0

print("\nTuning threshold on validation set:")
for thresh in thresholds:
    y_pred_thresh = (y_val_proba >= thresh).astype(int)
    f1_thresh = f1_score(y_val, y_pred_thresh)
    acc_thresh = accuracy_score(y_val, y_pred_thresh)
    cm_thresh = confusion_matrix(y_val, y_pred_thresh)
    
    tn, fp, fn, tp = cm_thresh.ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    
    print(f"  Threshold {thresh:.2f}: F1={f1_thresh:.4f}, Acc={acc_thresh:.4f}, "
          f"Sens={sensitivity:.4f}, Spec={specificity:.4f}")
    
    if f1_thresh > best_f1:
        best_f1 = f1_thresh
        best_threshold = thresh

print(f"\n✅ Best threshold: {best_threshold:.2f} (F1={best_f1:.4f})")

# Re-evaluate with optimized threshold
def evaluate_with_threshold(name, model, X, y_true, threshold):
    y_proba = model.predict_proba(X)[:, 1]
    y_pred = (y_proba >= threshold).astype(int)
    
    auc = roc_auc_score(y_true, y_proba)
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    cm = confusion_matrix(y_true, y_pred)
    
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    
    print(f"\n{name} Metrics (threshold={threshold:.2f}):")
    print(f"  Accuracy : {acc:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  F1       : {f1:.4f}")
    print(f"  ROC AUC  : {auc:.4f}")
    print(f"  Sensitivity ('{class_names[1]}'): {sensitivity:.4f}")
    print(f"  Specificity ('{class_names[0]}'): {specificity:.4f}")
    print(f"  False Positives: {fp}, False Negatives: {fn}")
    print(f"  Confusion Matrix: {class_names}")
    print(cm)
    
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Greens",
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f"CM - {name} (optimized threshold={threshold:.2f})")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(ARTIFACT_DIR, f"cm_{name}_optimized.png"))
    plt.close()
    
    return {
        "split": name,
        "threshold": float(threshold),
        "accuracy": float(acc),
        "precision": float(prec),
        "recall": float(rec),
        "f1": float(f1),
        "roc_auc": float(auc),
        "sensitivity": float(sensitivity),
        "specificity": float(specificity),
        "false_positives": int(fp),
        "false_negatives": int(fn),
        "confusion_matrix": cm.tolist()
    }

print("\n" + "="*70)
print("OPTIMIZED EVALUATION")
print("="*70)

metrics_val_opt = evaluate_with_threshold("VAL", ensemble_pipeline, X_val, y_val, best_threshold)
metrics_test_opt = evaluate_with_threshold("TEST", ensemble_pipeline, X_test, y_test, best_threshold)



THRESHOLD OPTIMIZATION

Tuning threshold on validation set:
  Threshold 0.30: F1=0.8165, Acc=0.7606, Sens=0.8931, Spec=0.5649
  Threshold 0.35: F1=0.8063, Acc=0.7550, Sens=0.8551, Spec=0.6070
  Threshold 0.40: F1=0.7972, Acc=0.7507, Sens=0.8219, Spec=0.6456
  Threshold 0.45: F1=0.8043, Acc=0.7663, Sens=0.8052, Spec=0.7088
  Threshold 0.50: F1=0.7820, Acc=0.7465, Sens=0.7625, Spec=0.7228
  Threshold 0.55: F1=0.7541, Acc=0.7238, Sens=0.7102, Spec=0.7439
  Threshold 0.60: F1=0.7215, Acc=0.7025, Sens=0.6461, Spec=0.7860
  Threshold 0.65: F1=0.6863, Acc=0.6827, Sens=0.5819, Spec=0.8316
  Threshold 0.70: F1=0.6087, Acc=0.6303, Sens=0.4822, Spec=0.8491
  Threshold 0.75: F1=0.5408, Acc=0.5935, Sens=0.4014, Spec=0.8772

✅ Best threshold: 0.30 (F1=0.8165)

OPTIMIZED EVALUATION

VAL Metrics (threshold=0.30):
  Accuracy : 0.7606
  Precision: 0.7520
  Recall   : 0.8931
  F1       : 0.8165
  ROC AUC  : 0.7961
  Sensitivity ('out'): 0.8931
  Specificity ('in'): 0.5649
  False Positives: 124, False N

In [19]:
# ===================================================================
# 10. CROSS-VALIDATION
# ===================================================================
print("\n" + "="*70)
print("CROSS-VALIDATION")
print("="*70)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
cv_results = cross_validate(
    ensemble_pipeline,
    X_train_full,
    y_train_full,
    cv=cv,
    scoring=["accuracy", "f1", "roc_auc"],
    return_train_score=True,
    n_jobs=-1
)

cv_summary = {
    "mean_train_accuracy": float(np.mean(cv_results["train_accuracy"])),
    "std_train_accuracy": float(np.std(cv_results["train_accuracy"])),
    "mean_val_accuracy": float(np.mean(cv_results["test_accuracy"])),
    "std_val_accuracy": float(np.std(cv_results["test_accuracy"])),
    "mean_train_f1": float(np.mean(cv_results["train_f1"])),
    "std_train_f1": float(np.std(cv_results["train_f1"])),
    "mean_val_f1": float(np.mean(cv_results["test_f1"])),
    "std_val_f1": float(np.std(cv_results["test_f1"])),
    "mean_val_roc_auc": float(np.mean(cv_results["test_roc_auc"])),
    "std_val_roc_auc": float(np.std(cv_results["test_roc_auc"])),
}

print("\n5-Fold Cross-Validation Results:")
for key, val in cv_summary.items():
    print(f"  {key}: {val:.4f}")


CROSS-VALIDATION

5-Fold Cross-Validation Results:
  mean_train_accuracy: 0.8231
  std_train_accuracy: 0.0134
  mean_val_accuracy: 0.7385
  std_val_accuracy: 0.0072
  mean_train_f1: 0.8493
  std_train_f1: 0.0121
  mean_val_f1: 0.7764
  std_val_f1: 0.0102
  mean_val_roc_auc: 0.8023
  std_val_roc_auc: 0.0068


In [20]:
# ===================================================================
# 11. SAVE ALL METRICS AND METADATA
# ===================================================================
all_metrics = {
    "initial": {
        "train": metrics_train,
        "val": metrics_val,
        "test": metrics_test,
    },
    "optimized": {
        "val": metrics_val_opt,
        "test": metrics_test_opt,
        "best_threshold": float(best_threshold)
    },
    "cv_summary": cv_summary,
}

with open(os.path.join(ARTIFACT_DIR, "metrics.json"), "w") as f:
    json.dump(all_metrics, f, indent=4)

meta = {
    "dataset_path": DATA_PATH,
    "n_samples": int(df.shape[0]),
    "n_features": int(df.shape[1] - 1),
    "target_column": TARGET_COL,
    "class_names": class_names,
    "numeric_features": numeric_features,
    "categorical_features": categorical_features,
    "ensemble_type": "VotingClassifier (soft voting) with SMOTE",
    "models": ["Deep MLP (512,256,128,64,32)", "XGBoost", "Random Forest"],
    "mlp_hidden_layers": [512, 256, 128, 64, 32],
    "xgb_n_estimators": int(xgb_clf.n_estimators),
    "xgb_max_depth": int(xgb_clf.max_depth),
    "rf_n_estimators": int(rf_clf.n_estimators),
    "random_state": RANDOM_STATE,
    "test_size": TEST_SIZE,
    "val_size": VAL_SIZE,
    "created_at": datetime.now().isoformat(),
}

with open(os.path.join(ARTIFACT_DIR, "metadata.json"), "w") as f:
    json.dump(meta, f, indent=4)

print("\n" + "="*70)
print("PIPELINE COMPLETE")
print("="*70)
print(f"\n✅ All artifacts saved to: {ARTIFACT_DIR}")
print("\nSaved files:")
print("  - ensemble_pipeline.joblib (trained model)")
print("  - label_encoder.joblib")
print("  - metrics.json (all performance metrics)")
print("  - metadata.json")
print("  - confusion matrices (PNG)")
print("  - EDA plots (PNG)")
print("  - train/val/test splits (CSV)")
print("\nEnsemble combines:")
print("  - Deep MLP: 5 layers (512→256→128→64→32)")
print("  - XGBoost: 500 trees with class balancing")
print("  - Random Forest: 300 trees with balanced weights")
print("  - SMOTE oversampling for training balance")
print(f"  - Optimized threshold: {best_threshold:.2f}")
print("\n" + "="*70)


PIPELINE COMPLETE

✅ All artifacts saved to: C:\PERSONAL\PATIENT TREATMENT CLASSIFICATION/artifacts_patient_treatment

Saved files:
  - ensemble_pipeline.joblib (trained model)
  - label_encoder.joblib
  - metrics.json (all performance metrics)
  - metadata.json
  - confusion matrices (PNG)
  - EDA plots (PNG)
  - train/val/test splits (CSV)

Ensemble combines:
  - Deep MLP: 5 layers (512→256→128→64→32)
  - XGBoost: 500 trees with class balancing
  - Random Forest: 300 trees with balanced weights
  - SMOTE oversampling for training balance
  - Optimized threshold: 0.30



In [21]:
# ===================================================================
# 12. PLOT TRAINING METRICS AND PERFORMANCE VISUALIZATIONS
# ===================================================================
print("\n" + "="*70)
print("GENERATING PERFORMANCE PLOTS")
print("="*70)

# -------------------------------------------------------------------
# 12a. Cross-Validation Scores Comparison
# -------------------------------------------------------------------
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Accuracy
axes[0].bar(['Train', 'Val'], 
            [cv_summary['mean_train_accuracy'], cv_summary['mean_val_accuracy']],
            yerr=[cv_summary['std_train_accuracy'], cv_summary['std_val_accuracy']],
            capsize=5, color=['#2ecc71', '#3498db'])
axes[0].set_ylabel('Accuracy')
axes[0].set_title('Cross-Validation Accuracy')
axes[0].set_ylim([0, 1])
axes[0].grid(axis='y', alpha=0.3)

# F1 Score
axes[1].bar(['Train', 'Val'], 
            [cv_summary['mean_train_f1'], cv_summary['mean_val_f1']],
            yerr=[cv_summary['std_train_f1'], cv_summary['std_val_f1']],
            capsize=5, color=['#2ecc71', '#3498db'])
axes[1].set_ylabel('F1 Score')
axes[1].set_title('Cross-Validation F1 Score')
axes[1].set_ylim([0, 1])
axes[1].grid(axis='y', alpha=0.3)

# ROC AUC
axes[2].bar(['Val'], 
            [cv_summary['mean_val_roc_auc']],
            yerr=[cv_summary['std_val_roc_auc']],
            capsize=5, color=['#3498db'])
axes[2].set_ylabel('ROC AUC')
axes[2].set_title('Cross-Validation ROC AUC')
axes[2].set_ylim([0, 1])
axes[2].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "cv_metrics_comparison.png"), dpi=150)
plt.close()
print("✓ Saved cv_metrics_comparison.png")

# -------------------------------------------------------------------
# 12b. Train/Val/Test Performance Comparison
# -------------------------------------------------------------------
splits = ['Train', 'Val', 'Test']
accuracies = [metrics_train['accuracy'], metrics_val['accuracy'], metrics_test['accuracy']]
precisions = [metrics_train['precision'], metrics_val['precision'], metrics_test['precision']]
recalls = [metrics_train['recall'], metrics_val['recall'], metrics_test['recall']]
f1s = [metrics_train['f1'], metrics_val['f1'], metrics_test['f1']]
aucs = [metrics_train['roc_auc'], metrics_val['roc_auc'], metrics_test['roc_auc']]

x = np.arange(len(splits))
width = 0.15

fig, ax = plt.subplots(figsize=(12, 6))
ax.bar(x - 2*width, accuracies, width, label='Accuracy', color='#3498db')
ax.bar(x - width, precisions, width, label='Precision', color='#2ecc71')
ax.bar(x, recalls, width, label='Recall', color='#e74c3c')
ax.bar(x + width, f1s, width, label='F1 Score', color='#f39c12')
ax.bar(x + 2*width, aucs, width, label='ROC AUC', color='#9b59b6')

ax.set_ylabel('Score')
ax.set_title('Model Performance Across Splits')
ax.set_xticks(x)
ax.set_xticklabels(splits)
ax.legend()
ax.set_ylim([0, 1])
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "metrics_comparison_all_splits.png"), dpi=150)
plt.close()
print("✓ Saved metrics_comparison_all_splits.png")

# -------------------------------------------------------------------
# 12c. Optimized vs Initial Performance (Val and Test)
# -------------------------------------------------------------------
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Validation
val_metrics_initial = [metrics_val['accuracy'], metrics_val['precision'], 
                       metrics_val['recall'], metrics_val['f1'], metrics_val['roc_auc']]
val_metrics_opt = [metrics_val_opt['accuracy'], metrics_val_opt['precision'], 
                   metrics_val_opt['recall'], metrics_val_opt['f1'], metrics_val_opt['roc_auc']]

metric_names = ['Accuracy', 'Precision', 'Recall', 'F1', 'AUC']
x_pos = np.arange(len(metric_names))
width = 0.35

axes[0].bar(x_pos - width/2, val_metrics_initial, width, 
            label='Initial (thresh=0.5)', color='#3498db')
axes[0].bar(x_pos + width/2, val_metrics_opt, width, 
            label=f'Optimized (thresh={best_threshold:.2f})', color='#2ecc71')
axes[0].set_ylabel('Score')
axes[0].set_title('Validation Set: Initial vs Optimized')
axes[0].set_xticks(x_pos)
axes[0].set_xticklabels(metric_names, rotation=45)
axes[0].legend()
axes[0].set_ylim([0, 1])
axes[0].grid(axis='y', alpha=0.3)

# Test
test_metrics_initial = [metrics_test['accuracy'], metrics_test['precision'], 
                        metrics_test['recall'], metrics_test['f1'], metrics_test['roc_auc']]
test_metrics_opt = [metrics_test_opt['accuracy'], metrics_test_opt['precision'], 
                    metrics_test_opt['recall'], metrics_test_opt['f1'], metrics_test_opt['roc_auc']]

axes[1].bar(x_pos - width/2, test_metrics_initial, width, 
            label='Initial (thresh=0.5)', color='#3498db')
axes[1].bar(x_pos + width/2, test_metrics_opt, width, 
            label=f'Optimized (thresh={best_threshold:.2f})', color='#2ecc71')
axes[1].set_ylabel('Score')
axes[1].set_title('Test Set: Initial vs Optimized')
axes[1].set_xticks(x_pos)
axes[1].set_xticklabels(metric_names, rotation=45)
axes[1].legend()
axes[1].set_ylim([0, 1])
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "initial_vs_optimized.png"), dpi=150)
plt.close()
print("✓ Saved initial_vs_optimized.png")

# -------------------------------------------------------------------
# 12d. False Positives and False Negatives Comparison
# -------------------------------------------------------------------
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Initial
splits_fp_fn = ['Train', 'Val', 'Test']
fps_initial = [metrics_train['false_positives'], 
               metrics_val['false_positives'], 
               metrics_test['false_positives']]
fns_initial = [metrics_train['false_negatives'], 
               metrics_val['false_negatives'], 
               metrics_test['false_negatives']]

x_pos = np.arange(len(splits_fp_fn))
width = 0.35

axes[0].bar(x_pos - width/2, fps_initial, width, label='False Positives', color='#e74c3c')
axes[0].bar(x_pos + width/2, fns_initial, width, label='False Negatives', color='#f39c12')
axes[0].set_ylabel('Count')
axes[0].set_title('Errors - Initial (threshold=0.5)')
axes[0].set_xticks(x_pos)
axes[0].set_xticklabels(splits_fp_fn)
axes[0].legend()
axes[0].grid(axis='y', alpha=0.3)

# Optimized (Val and Test only)
splits_opt = ['Val', 'Test']
fps_opt = [metrics_val_opt['false_positives'], metrics_test_opt['false_positives']]
fns_opt = [metrics_val_opt['false_negatives'], metrics_test_opt['false_negatives']]

x_pos_opt = np.arange(len(splits_opt))

axes[1].bar(x_pos_opt - width/2, fps_opt, width, label='False Positives', color='#e74c3c')
axes[1].bar(x_pos_opt + width/2, fns_opt, width, label='False Negatives', color='#f39c12')
axes[1].set_ylabel('Count')
axes[1].set_title(f'Errors - Optimized (threshold={best_threshold:.2f})')
axes[1].set_xticks(x_pos_opt)
axes[1].set_xticklabels(splits_opt)
axes[1].legend()
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "false_positives_negatives.png"), dpi=150)
plt.close()
print("✓ Saved false_positives_negatives.png")

# -------------------------------------------------------------------
# 12e. ROC Curve (Test Set)
# -------------------------------------------------------------------
from sklearn.metrics import roc_curve, auc

y_test_proba = ensemble_pipeline.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds_roc = roc_curve(y_test, y_test_proba)
roc_auc_val = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='#2ecc71', lw=2, 
         label=f'ROC curve (AUC = {roc_auc_val:.4f})')
plt.plot([0, 1], [0, 1], color='#95a5a6', lw=2, linestyle='--', label='Random')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Test Set')
plt.legend(loc="lower right")
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "roc_curve_test.png"), dpi=150)
plt.close()
print("✓ Saved roc_curve_test.png")

# -------------------------------------------------------------------
# 12f. Precision-Recall Curve (Test Set)
# -------------------------------------------------------------------
from sklearn.metrics import precision_recall_curve, average_precision_score

precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_test_proba)
avg_precision = average_precision_score(y_test, y_test_proba)

plt.figure(figsize=(8, 6))
plt.plot(recall_curve, precision_curve, color='#3498db', lw=2,
         label=f'PR curve (AP = {avg_precision:.4f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve - Test Set')
plt.legend(loc="lower left")
plt.grid(alpha=0.3)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "precision_recall_curve_test.png"), dpi=150)
plt.close()
print("✓ Saved precision_recall_curve_test.png")

# -------------------------------------------------------------------
# 12g. Per-Class Performance (Sensitivity and Specificity)
# -------------------------------------------------------------------
fig, ax = plt.subplots(figsize=(10, 6))

splits_sens_spec = ['Train', 'Val (init)', 'Val (opt)', 'Test (init)', 'Test (opt)']
sensitivities = [
    metrics_train['sensitivity'],
    metrics_val['sensitivity'],
    metrics_val_opt['sensitivity'],
    metrics_test['sensitivity'],
    metrics_test_opt['sensitivity']
]
specificities = [
    metrics_train['specificity'],
    metrics_val['specificity'],
    metrics_val_opt['specificity'],
    metrics_test['specificity'],
    metrics_test_opt['specificity']
]

x_pos = np.arange(len(splits_sens_spec))
width = 0.35

ax.bar(x_pos - width/2, sensitivities, width, 
       label=f"Sensitivity ('{class_names[1]}' recall)", color='#9b59b6')
ax.bar(x_pos + width/2, specificities, width, 
       label=f"Specificity ('{class_names[0]}' recall)", color='#1abc9c')

ax.set_ylabel('Score')
ax.set_title('Per-Class Performance: Sensitivity & Specificity')
ax.set_xticks(x_pos)
ax.set_xticklabels(splits_sens_spec, rotation=30, ha='right')
ax.legend()
ax.set_ylim([0, 1])
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(ARTIFACT_DIR, "sensitivity_specificity.png"), dpi=150)
plt.close()
print("✓ Saved sensitivity_specificity.png")

# -------------------------------------------------------------------
# 12h. Summary Report
# -------------------------------------------------------------------
summary_text = f"""
PATIENT TREATMENT CLASSIFICATION - ENSEMBLE MODEL
==================================================

Dataset: {DATA_PATH}
Samples: {df.shape[0]}
Features: {df.shape[1] - 1}
Target: {TARGET_COL}
Classes: {class_names}

ARCHITECTURE
------------
- Deep MLP: 5 layers (512, 256, 128, 64, 32)
- XGBoost: 500 trees, max_depth=4, scale_pos_weight={scale_weight:.2f}
- Random Forest: 300 trees, class_weight='balanced'
- Ensemble: Soft voting with weights [1.5, 2.0, 1.0]
- Data balancing: SMOTE oversampling

PERFORMANCE SUMMARY
-------------------
Test Set (Initial, threshold=0.5):
  Accuracy:  {metrics_test['accuracy']:.4f}
  Precision: {metrics_test['precision']:.4f}
  Recall:    {metrics_test['recall']:.4f}
  F1 Score:  {metrics_test['f1']:.4f}
  ROC AUC:   {metrics_test['roc_auc']:.4f}
  False Positives: {metrics_test['false_positives']}
  False Negatives: {metrics_test['false_negatives']}

Test Set (Optimized, threshold={best_threshold:.2f}):
  Accuracy:  {metrics_test_opt['accuracy']:.4f}
  Precision: {metrics_test_opt['precision']:.4f}
  Recall:    {metrics_test_opt['recall']:.4f}
  F1 Score:  {metrics_test_opt['f1']:.4f}
  ROC AUC:   {metrics_test_opt['roc_auc']:.4f}
  Sensitivity: {metrics_test_opt['sensitivity']:.4f}
  Specificity: {metrics_test_opt['specificity']:.4f}
  False Positives: {metrics_test_opt['false_positives']}
  False Negatives: {metrics_test_opt['false_negatives']}

Cross-Validation (5-fold):
  Mean Val Accuracy: {cv_summary['mean_val_accuracy']:.4f} ± {cv_summary['std_val_accuracy']:.4f}
  Mean Val F1:       {cv_summary['mean_val_f1']:.4f} ± {cv_summary['std_val_f1']:.4f}
  Mean Val ROC AUC:  {cv_summary['mean_val_roc_auc']:.4f} ± {cv_summary['std_val_roc_auc']:.4f}

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""

with open(os.path.join(ARTIFACT_DIR, "performance_summary.txt"), "w") as f:
    f.write(summary_text)

print("\n" + summary_text)
print("✓ Saved performance_summary.txt")

print("\n" + "="*70)
print("ALL PLOTS GENERATED")
print("="*70)
print("\nGenerated visualizations:")
print("  1. cv_metrics_comparison.png")
print("  2. metrics_comparison_all_splits.png")
print("  3. initial_vs_optimized.png")
print("  4. false_positives_negatives.png")
print("  5. roc_curve_test.png")
print("  6. precision_recall_curve_test.png")
print("  7. sensitivity_specificity.png")
print("  8. performance_summary.txt")
print("="*70)



GENERATING PERFORMANCE PLOTS
✓ Saved cv_metrics_comparison.png
✓ Saved metrics_comparison_all_splits.png
✓ Saved initial_vs_optimized.png
✓ Saved false_positives_negatives.png
✓ Saved roc_curve_test.png
✓ Saved precision_recall_curve_test.png
✓ Saved sensitivity_specificity.png


PATIENT TREATMENT CLASSIFICATION - ENSEMBLE MODEL

Dataset: C:\PERSONAL\PATIENT TREATMENT CLASSIFICATION\DATASET.csv
Samples: 4412
Features: 10
Target: SOURCE
Classes: ['in', 'out']

ARCHITECTURE
------------
- Deep MLP: 5 layers (512, 256, 128, 64, 32)
- XGBoost: 500 trees, max_depth=4, scale_pos_weight=0.68
- Random Forest: 300 trees, class_weight='balanced'
- Ensemble: Soft voting with weights [1.5, 2.0, 1.0]
- Data balancing: SMOTE oversampling

PERFORMANCE SUMMARY
-------------------
Test Set (Initial, threshold=0.5):
  Accuracy:  0.7271
  Precision: 0.7902
  Recall:    0.7376
  F1 Score:  0.7630
  ROC AUC:   0.8091
  False Positives: 103
  False Negatives: 138

Test Set (Optimized, threshold=0.30):
  Ac