In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
import joblib
import pandas as pd


In [12]:

# 1. Loading the dataset

data = pd.read_csv("../Data/cleaned_data.csv")

X = data.drop("prognosis", axis=1)
y = data["prognosis"]

In [13]:
# Encode disease labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

In [14]:
# 2. Define Models
# =============================
models = {
    "xgb": xgb.XGBClassifier(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=5,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        use_label_encoder=False,
        eval_metric="mlogloss"
    ),
    "lgb": lgb.LGBMClassifier(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=-1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    ),
    "cat": CatBoostClassifier(
        iterations=200,
        learning_rate=0.05,
        depth=6,
        random_state=42,
        verbose=0
    )
}


In [15]:
# 3. Train & Evaluate
# =============================
for name, model in models.items():
    print(f"\n🚀 Training {name.upper()}...")
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    y_pred_labels = le.inverse_transform(y_pred)
    y_test_labels = le.inverse_transform(y_test)
    
    # Metrics
    acc = accuracy_score(y_test, y_pred)
    print(f"✅ {name.upper()} Accuracy: {acc:.4f}")
    print(f"\n{name.upper()} Classification Report:\n",
          classification_report(y_test_labels, y_pred_labels))
    
    # Save model
    joblib.dump(model, f"../saved_models/{name}_disease_model.pkl")


🚀 Training XGB...


Parameters: { "use_label_encoder" } are not used.



✅ XGB Accuracy: 1.0000

XGB Classification Report:
                                          precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00         1
                                   AIDS       1.00      1.00      1.00         1
                                   Acne       1.00      1.00      1.00         1
                    Alcoholic hepatitis       1.00      1.00      1.00         2
                                Allergy       1.00      1.00      1.00         1
                              Arthritis       1.00      1.00      1.00         1
                       Bronchial Asthma       1.00      1.00      1.00         1
                   Cervical spondylosis       1.00      1.00      1.00         1
                            Chicken pox       1.00      1.00      1.00         2
                    Chronic cholestasis       1.00      1.00      1.00         2
                            Common Cold       1.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


✅ CAT Accuracy: 1.0000

CAT Classification Report:
                                          precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00         1
                                   AIDS       1.00      1.00      1.00         1
                                   Acne       1.00      1.00      1.00         1
                    Alcoholic hepatitis       1.00      1.00      1.00         2
                                Allergy       1.00      1.00      1.00         1
                              Arthritis       1.00      1.00      1.00         1
                       Bronchial Asthma       1.00      1.00      1.00         1
                   Cervical spondylosis       1.00      1.00      1.00         1
                            Chicken pox       1.00      1.00      1.00         2
                    Chronic cholestasis       1.00      1.00      1.00         2
                            Common Cold       1.00      

  y = column_or_1d(y, warn=True)


In [16]:
joblib.dump(le, "../saved_models/label_encoder.pkl")
print("\n All models + encoder saved successfully!")


 All models + encoder saved successfully!
