In [1]:
import pandas as pd
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from xgboost import XGBClassifier
from catboost import CatBoostClassifier

In [2]:
df = pd.read_csv("/workspaces/ml-notes-and-code/cleaned_dataset.csv")

# Recode target: 1 = diseased, 0 = non-diseased
df["Dataset Label"] = df["Dataset Label"].map({1: 1, 2: 0})


# Separate features & target
X = df.drop("Dataset Label", axis=1)
y = df["Dataset Label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Train shape: (466, 10)
Test shape: (117, 10)


In [3]:
rf = RandomForestClassifier(
    n_estimators=500,
    max_depth=None,
    random_state=42,
    class_weight="balanced"
)

rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

with open("rf_model.pkl", "wb") as f:
    pickle.dump(rf, f)

print("\nAccuracy:", accuracy_score(y_test, rf_preds))
print("\nClassification Report:\n", classification_report(y_test, rf_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, rf_preds))


Accuracy: 0.7264957264957265

Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.26      0.36        34
           1       0.75      0.92      0.83        83

    accuracy                           0.73       117
   macro avg       0.66      0.59      0.59       117
weighted avg       0.70      0.73      0.69       117


Confusion Matrix:
 [[ 9 25]
 [ 7 76]]


In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(
    kernel="rbf",
    C=1.0,
    gamma="scale",
    probability=True
)

svm.fit(X_train_scaled, y_train)
svm_preds = svm.predict(X_test_scaled)

with open("svm_model.pkl", "wb") as f:
    pickle.dump(svm, f)

with open("svm_scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("\nAccuracy:", accuracy_score(y_test, svm_preds))
print("\nClassification Report:\n", classification_report(y_test, svm_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, svm_preds))


Accuracy: 0.7094017094017094

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        34
           1       0.71      1.00      0.83        83

    accuracy                           0.71       117
   macro avg       0.35      0.50      0.41       117
weighted avg       0.50      0.71      0.59       117


Confusion Matrix:
 [[ 0 34]
 [ 0 83]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


mlp = MLPClassifier(
    hidden_layer_sizes=(64,64,32),
    activation="relu",
    solver="adam",
    alpha=0.0001,
    learning_rate="adaptive",
    max_iter=500,
    random_state=42
)

# Train the network
mlp.fit(X_train_scaled, y_train)

# Predictions
mlp_preds = mlp.predict(X_test_scaled)

with open("mlp_model.pkl", "wb") as model_file:
    pickle.dump(mlp, model_file)

with open("mlp_scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

print("Accuracy:", accuracy_score(y_test, mlp_preds))
print("\nClassification Report:\n", classification_report(y_test, mlp_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, mlp_preds))

Accuracy: 0.7863247863247863

Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.44      0.55        34
           1       0.80      0.93      0.86        83

    accuracy                           0.79       117
   macro avg       0.76      0.68      0.70       117
weighted avg       0.78      0.79      0.77       117


Confusion Matrix:
 [[15 19]
 [ 6 77]]




In [6]:
xgb = XGBClassifier(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="mlogloss"
)

xgb.fit(X_train, y_train)
xgb_preds = xgb.predict(X_test)

with open("xgboost_model.pkl", "wb") as f:
    pickle.dump(xgb, f)

print("\nAccuracy:", accuracy_score(y_test, xgb_preds))
print("\nClassification Report:\n", classification_report(y_test, xgb_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, xgb_preds))


Accuracy: 0.7350427350427351

Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.29      0.39        34
           1       0.76      0.92      0.83        83

    accuracy                           0.74       117
   macro avg       0.67      0.60      0.61       117
weighted avg       0.71      0.74      0.70       117


Confusion Matrix:
 [[10 24]
 [ 7 76]]


In [7]:
cat = CatBoostClassifier(
    iterations=400,
    depth=6,
    learning_rate=0.05,
    loss_function="MultiClass",
    verbose=0  # suppress giant logs
)

cat.fit(X_train, y_train)
cat_preds = cat.predict(X_test).flatten().astype(int)

with open("catboost_model.pkl", "wb") as f:
    pickle.dump(cat, f)

print("\nAccuracy:", accuracy_score(y_test, cat_preds))
print("\nClassification Report:\n", classification_report(y_test, cat_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, cat_preds))


Accuracy: 0.717948717948718

Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.29      0.38        34
           1       0.76      0.89      0.82        83

    accuracy                           0.72       117
   macro avg       0.64      0.59      0.60       117
weighted avg       0.69      0.72      0.69       117


Confusion Matrix:
 [[10 24]
 [ 9 74]]


In [1]:
import gradio as gr
import numpy as np
import pickle

# -----------------------
# Load all models
# -----------------------
models = {
    "Random Forest": "rf_model.pkl",
    "SVM": "svm_model.pkl",
    "XGBoost": "xgboost_model.pkl",
    "CatBoost": "catboost_model.pkl",
    "MLP Classifier": "mlp_model.pkl"
}

loaded_models = {}

for name, path in models.items():
    with open(path, "rb") as f:
        loaded_models[name] = pickle.load(f)

# -----------------------
# Inference function
# -----------------------
def predict(model_name, age, gender, tb, db, alkphos, sgpt, sgot, tp, alb, agr):
    
    # encode gender
    gender_val = 1 if gender.lower() == "male" else 0

    X = np.array([[age, gender_val, tb, db, alkphos, sgpt, sgot, tp, alb, agr]])

    model = loaded_models[model_name]
    
    pred = model.predict(X)[0]

    # probability (if available)
    try:
        prob = model.predict_proba(X)[0][pred]
    except:
        prob = "N/A"

    result = "Diseased" if pred == 1 else "Not Diseased"

    return f"**Prediction:** {result}", f"**Confidence:** {prob}"

# -----------------------
# Gradio Interface
# -----------------------
iface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Dropdown(["Random Forest", "SVM", "XGBoost", "CatBoost", "MLP Classifier"], label="Choose Model"),
        gr.Number(label="Age"),
        gr.Dropdown(["Male", "Female"], label="Gender"),
        gr.Number(label="Total Bilirubin (TB)"),
        gr.Number(label="Direct Bilirubin (DB)"),
        gr.Number(label="Alkaline Phosphotase (Alkphos)"),
        gr.Number(label="Sgpt"),
        gr.Number(label="Sgot"),
        gr.Number(label="Total Proteins (TP)"),
        gr.Number(label="Albumin (ALB)"),
        gr.Number(label="A/G Ratio")
    ],
    outputs=[
        gr.Markdown(label="Prediction"),
        gr.Markdown(label="Confidence")
    ],
    title="Liver Disease Classifier",
    description="Predict Diseased vs Non-Diseased using multiple trained ML models"
)

iface.launch(share=True)


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://b32daeba1677a8d980.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)






Created dataset file at: .gradio/flagged/dataset1.csv
