In [2]:
import os
import joblib
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# ============================================
# SETUP
# ============================================
OUTPUT_DIR = r"F:\Ai&ml\outputs"
MODEL_DIR = os.path.join(OUTPUT_DIR, "models")
DATASET_DIR = os.path.join(OUTPUT_DIR, "datasets")

print("="*70)
print("LIVER CIRRHOSIS STAGE PREDICTION")
print("="*70)

# ============================================
# LOAD MODEL AND PREPROCESSING OBJECTS
# ============================================
print("\nðŸ“¦ Loading model and preprocessing objects...")

# Load the best tuned model
model = joblib.load(os.path.join(MODEL_DIR, 'best_tuned_model.joblib'))
print("âœ“ Model loaded")

# Load preprocessing objects
label_encoder = joblib.load(os.path.join(DATASET_DIR, 'label_encoder.joblib'))
scaler = joblib.load(os.path.join(DATASET_DIR, 'scaler.joblib'))
feature_names = joblib.load(os.path.join(DATASET_DIR, 'feature_names.joblib'))
print("âœ“ Preprocessing objects loaded")

# Load preprocessing summary
preprocessing_summary = joblib.load(os.path.join(DATASET_DIR, 'preprocessing_summary.joblib'))
numeric_cols = preprocessing_summary['numeric_cols']
categorical_cols = preprocessing_summary['categorical_cols']

# Load a sample from training data to understand the structure
X_train_sample = pd.read_csv(os.path.join(DATASET_DIR, 'X_train.csv'), nrows=1)

print(f"\nðŸ“‹ Model expects {len(feature_names)} features")
print(f"   Numeric features: {len(numeric_cols)}")
print(f"   Categorical features: {len(categorical_cols)}")

# ============================================
# SIMPLE PREDICTION FUNCTION
# ============================================
def predict_cirrhosis_stage(patient_data, verbose=True):
    """
    Predict cirrhosis stage for a patient

    Parameters:
    -----------
    patient_data : dict
        Patient information with ALREADY ENCODED features
        Must match the structure after one-hot encoding
    verbose : bool
        Whether to print detailed information

    Returns:
    --------
    predicted_stage : str
        The predicted cirrhosis stage
    probabilities : dict
        Probability for each stage
    """

    # Convert to DataFrame
    if isinstance(patient_data, dict):
        df = pd.DataFrame([patient_data])
    else:
        df = patient_data.copy()

    if verbose:
        print("\n" + "="*70)
        print("MAKING PREDICTION")
        print("="*70)
        print(f"\nðŸ“Š Input data shape: {df.shape}")

    # Align with training features - add missing columns
    for col in feature_names:
        if col not in df.columns:
            df[col] = 0

    # Keep only the columns used in training, in the same order
    df = df[feature_names]

    if verbose:
        print(f"âœ“ Aligned to {len(feature_names)} features")

    # Scale numeric features
    if len(numeric_cols) > 0:
        numeric_in_processed = [col for col in numeric_cols if col in df.columns]
        if numeric_in_processed:
            df[numeric_in_processed] = scaler.transform(df[numeric_in_processed])
            if verbose:
                print(f"âœ“ Scaled {len(numeric_in_processed)} numeric features")

    # Make prediction
    prediction_encoded = model.predict(df)
    predicted_stage = label_encoder.inverse_transform(prediction_encoded)[0]

    # Get probabilities
    try:
        probabilities = model.predict_proba(df)[0]
        prob_dict = {
            str(stage): float(prob)
            for stage, prob in zip(label_encoder.classes_, probabilities)
        }
    except:
        prob_dict = None

    if verbose:
        print(f"\nðŸŽ¯ Predicted Stage: {predicted_stage}")
        if prob_dict:
            print(f"\nðŸ“Š Prediction Probabilities:")
            for stage, prob in sorted(prob_dict.items()):
                print(f"   Stage {stage}: {prob:.2%}")

    return predicted_stage, prob_dict

# ============================================
# HELPER FUNCTION - CREATE PROPERLY FORMATTED INPUT
# ============================================
def create_patient_input(n_days, age, sex, drug, ascites, hepatomegaly,
                        spiders, edema, bilirubin, cholesterol, albumin,
                        copper, alk_phos, sgot, tryglicerides, platelets,
                        prothrombin):
    """
    Helper function to create properly formatted patient input

    All categorical variables should be provided as strings:
    - sex: "M" or "F"
    - drug: "D-penicillamine" or "Placebo"
    - ascites: "Y" or "N"
    - hepatomegaly: "Y" or "N"
    - spiders: "Y" or "N"
    - edema: "Y" or "N"
    """

    # Start with numeric features
    patient = {
        "N_Days": n_days,
        "Age": age,
        "Bilirubin": bilirubin,
        "Cholesterol": cholesterol,
        "Albumin": albumin,
        "Copper": copper,
        "Alk_Phos": alk_phos,
        "SGOT": sgot,
        "Tryglicerides": tryglicerides,
        "Platelets": platelets,
        "Prothrombin": prothrombin
    }

    # Add one-hot encoded categorical features
    # Sex (drop_first=True means we keep only one)
    patient["Sex_M"] = 1 if sex.upper() == "M" else 0

    # Drug
    patient["Drug_Placebo"] = 1 if drug == "Placebo" else 0

    # Ascites
    patient["Ascites_Y"] = 1 if ascites.upper() == "Y" else 0

    # Hepatomegaly
    patient["Hepatomegaly_Y"] = 1 if hepatomegaly.upper() == "Y" else 0

    # Spiders
    patient["Spiders_Y"] = 1 if spiders.upper() == "Y" else 0

    # Edema
    patient["Edema_Y"] = 1 if edema.upper() == "Y" else 0

    return patient

# ============================================
# EXAMPLE PREDICTIONS
# ============================================
print("\n" + "="*70)
print("EXAMPLE PREDICTIONS")
print("="*70)

# Example 1: Stage 1 (Early/Mild) Patient
print("\n" + "-"*70)
print("EXAMPLE 1: Early Stage Patient")
print("-"*70)

patient1 = create_patient_input(
    n_days=300,
    age=45,
    sex="F",
    drug="D-penicillamine",
    ascites="N",
    hepatomegaly="N",
    spiders="N",
    edema="N",
    bilirubin=1.2,
    cholesterol=200,
    albumin=4.0,
    copper=110,
    alk_phos=120,
    sgot=55,
    tryglicerides=150,
    platelets=210,
    prothrombin=12
)

predicted_stage1, probs1 = predict_cirrhosis_stage(patient1)

# Example 2: Stage 2 (Moderate) Patient
print("\n" + "-"*70)
print("EXAMPLE 2: Moderate Stage Patient")
print("-"*70)

patient2 = create_patient_input(
    n_days=800,
    age=52,
    sex="M",
    drug="Placebo",
    ascites="N",
    hepatomegaly="Y",
    spiders="Y",
    edema="N",
    bilirubin=2.5,
    cholesterol=280,
    albumin=3.2,
    copper=180,
    alk_phos=250,
    sgot=120,
    tryglicerides=200,
    platelets=150,
    prothrombin=11
)

predicted_stage2, probs2 = predict_cirrhosis_stage(patient2)

# Example 3: Stage 3 (Advanced) Patient
print("\n" + "-"*70)
print("EXAMPLE 3: Advanced Stage Patient")
print("-"*70)

patient3 = create_patient_input(
    n_days=1500,
    age=58,
    sex="F",
    drug="D-penicillamine",
    ascites="Y",
    hepatomegaly="Y",
    spiders="Y",
    edema="Y",
    bilirubin=4.5,
    cholesterol=350,
    albumin=2.5,
    copper=250,
    alk_phos=450,
    sgot=180,
    tryglicerides=280,
    platelets=95,
    prothrombin=14
)

predicted_stage3, probs3 = predict_cirrhosis_stage(patient3)

# ============================================
# BATCH PREDICTION
# ============================================
print("\n" + "="*70)
print("BATCH PREDICTION EXAMPLE")
print("="*70)

# Create multiple patients at once
batch_patients = pd.DataFrame([patient1, patient2, patient3])

print(f"\nðŸ“Š Predicting for {len(batch_patients)} patients...")

batch_results = []
for idx in range(len(batch_patients)):
    patient = batch_patients.iloc[idx:idx+1]
    stage, probs = predict_cirrhosis_stage(patient, verbose=False)
    batch_results.append({
        'Patient_ID': idx + 1,
        'Predicted_Stage': stage,
        'Confidence': max(probs.values()) if probs else None
    })

results_df = pd.DataFrame(batch_results)
print("\nðŸ“‹ Batch Prediction Results:")
print(results_df.to_string(index=False))

# ============================================
# SAVE PREDICTIONS
# ============================================
PREDICTIONS_DIR = os.path.join(OUTPUT_DIR, "predictions")
os.makedirs(PREDICTIONS_DIR, exist_ok=True)

results_df.to_csv(os.path.join(PREDICTIONS_DIR, 'batch_predictions.csv'), index=False)
print(f"\nâœ“ Predictions saved to: {PREDICTIONS_DIR}")

# ============================================
# ALTERNATIVE: Direct prediction with properly formatted dict
# ============================================
print("\n" + "="*70)
print("ALTERNATIVE METHOD: Direct Dictionary Input")
print("="*70)
print("\nIf you already have one-hot encoded data, use this format:")

# Direct format matching training data structure
direct_patient = {
    "N_Days": 400,
    "Age": 50,
    "Bilirubin": 1.8,
    "Cholesterol": 240,
    "Albumin": 3.5,
    "Copper": 140,
    "Alk_Phos": 180,
    "SGOT": 80,
    "Tryglicerides": 170,
    "Platelets": 180,
    "Prothrombin": 11,
    "Sex_M": 1,              # Male
    "Drug_Placebo": 0,       # D-penicillamine
    "Ascites_Y": 0,          # No
    "Hepatomegaly_Y": 1,     # Yes
    "Spiders_Y": 0,          # No
    "Edema_Y": 0             # No
}

predicted_direct, probs_direct = predict_cirrhosis_stage(direct_patient)

# ============================================
# USAGE GUIDE
# ============================================
print("\n" + "="*70)
print("USAGE GUIDE")
print("="*70)
print("""
METHOD 1: Using the helper function (RECOMMENDED):

patient = create_patient_input(
    n_days=400,
    age=50,
    sex="M",                  # "M" or "F"
    drug="Placebo",           # "D-penicillamine" or "Placebo"
    ascites="N",              # "Y" or "N"
    hepatomegaly="Y",         # "Y" or "N"
    spiders="N",              # "Y" or "N"
    edema="N",                # "Y" or "N"
    bilirubin=1.8,
    cholesterol=240,
    albumin=3.5,
    copper=140,
    alk_phos=180,
    sgot=80,
    tryglicerides=170,
    platelets=180,
    prothrombin=11
)

predicted_stage, probabilities = predict_cirrhosis_stage(patient)

---

METHOD 2: Direct dictionary with one-hot encoding:

patient = {
    "N_Days": 400,
    "Age": 50,
    "Bilirubin": 1.8,
    "Cholesterol": 240,
    "Albumin": 3.5,
    "Copper": 140,
    "Alk_Phos": 180,
    "SGOT": 80,
    "Tryglicerides": 170,
    "Platelets": 180,
    "Prothrombin": 11,
    "Sex_M": 1,              # 1 for Male, 0 for Female
    "Drug_Placebo": 0,       # 1 for Placebo, 0 for D-penicillamine
    "Ascites_Y": 0,          # 1 for Yes, 0 for No
    "Hepatomegaly_Y": 1,     # 1 for Yes, 0 for No
    "Spiders_Y": 0,          # 1 for Yes, 0 for No
    "Edema_Y": 0             # 1 for Yes, 0 for No
}

predicted_stage, probabilities = predict_cirrhosis_stage(patient)

---

The function will automatically handle:
âœ“ Feature alignment
âœ“ Missing features (filled with 0)
âœ“ Numeric feature scaling
âœ“ Prediction with confidence scores
""")

print("="*70)
print("By_OwenXAGK")
print("="*70)

LIVER CIRRHOSIS STAGE PREDICTION

ðŸ“¦ Loading model and preprocessing objects...
âœ“ Model loaded
âœ“ Preprocessing objects loaded

ðŸ“‹ Model expects 20 features
   Numeric features: 11
   Categorical features: 7

EXAMPLE PREDICTIONS

----------------------------------------------------------------------
EXAMPLE 1: Early Stage Patient
----------------------------------------------------------------------

MAKING PREDICTION

ðŸ“Š Input data shape: (1, 17)
âœ“ Aligned to 20 features
âœ“ Scaled 11 numeric features

ðŸŽ¯ Predicted Stage: 3

ðŸ“Š Prediction Probabilities:
   Stage 1: 41.20%
   Stage 2: 13.79%
   Stage 3: 45.01%

----------------------------------------------------------------------
EXAMPLE 2: Moderate Stage Patient
----------------------------------------------------------------------

MAKING PREDICTION

ðŸ“Š Input data shape: (1, 17)
âœ“ Aligned to 20 features
âœ“ Scaled 11 numeric features

ðŸŽ¯ Predicted Stage: 3

ðŸ“Š Prediction Probabilities:
   Stage 1: 9.32%
   Sta

In [5]:
print("\n" + "="*50)
print("TESTING TARGET: STAGE 1")
print("="*50)
# Patient is stable, healthy blood values, no fluid retention
patient_1 = create_patient_input(
    n_days=4000,          # Very long survival (Key for Stage 1)
    age=30,               # Young
    sex="F",
    drug="Placebo",
    ascites="N",
    hepatomegaly="N",
    spiders="N",
    edema="N",            # No Edema
    bilirubin=0.5,        # Normal (< 1.0)
    cholesterol=200,      # Normal
    albumin=4.5,          # High/Good (> 3.5)
    copper=15,            # Low
    alk_phos=100,         # Normal
    sgot=35,              # Normal
    tryglicerides=90,
    platelets=400,        # High/Normal
    prothrombin=9.5       # Clots fast
)
predict_cirrhosis_stage(patient_1)


TESTING TARGET: STAGE 1

MAKING PREDICTION

ðŸ“Š Input data shape: (1, 17)
âœ“ Aligned to 20 features
âœ“ Scaled 11 numeric features

ðŸŽ¯ Predicted Stage: 1

ðŸ“Š Prediction Probabilities:
   Stage 1: 78.91%
   Stage 2: 21.00%
   Stage 3: 0.09%


(np.int64(1),
 {'1': 0.7890802025794983,
  '2': 0.21004395186901093,
  '3': 0.000875875644851476})

In [6]:
# Create a "Super Healthy" patient input
healthy_patient = create_patient_input(
    n_days=3000,          # Survived a long time
    age=30,               # Young
    sex="F",              # Female (lower risk in some datasets)
    drug="Placebo",
    ascites="N",          # No symptoms
    hepatomegaly="N",
    spiders="N",
    edema="N",
    bilirubin=0.5,        # Very low (Normal is < 1.2)
    cholesterol=200,      # Normal
    albumin=4.5,          # High/Healthy (Normal is 3.5-5.0)
    copper=20,            # Low (Normal is 10-50 for healthy)
    alk_phos=100,         # Normal
    sgot=30,              # Low/Normal
    tryglicerides=100,    # Normal
    platelets=300,        # High/Healthy
    prothrombin=10        # Low/Normal (Fast clotting)
)

print("\n--- TEST: HEALTHY PATIENT ---")
pred, probs = predict_cirrhosis_stage(healthy_patient)


--- TEST: HEALTHY PATIENT ---

MAKING PREDICTION

ðŸ“Š Input data shape: (1, 17)
âœ“ Aligned to 20 features
âœ“ Scaled 11 numeric features

ðŸŽ¯ Predicted Stage: 2

ðŸ“Š Prediction Probabilities:
   Stage 1: 25.25%
   Stage 2: 74.61%
   Stage 3: 0.15%
