In [6]:
# ==========================
# Liver Disease Training
# ==========================

import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Step 1: Load dataset
df = pd.read_csv("../data/indian_liver_patient.csv")

# Step 2: Encode categorical columns (Gender)
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

# Step 3: Handle missing values
df.dropna(inplace=True)

# Step 4: Features & Target
X = df.drop(columns=['Dataset'])
y = df['Dataset']

# Convert target: 1 = disease, 2 = no disease → make binary (1 = disease, 0 = no disease)
y = (y == 1).astype(int)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 6: Train model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Step 7: Evaluate
print("Training Accuracy:", model.score(X_train, y_train))
print("Testing Accuracy:", model.score(X_test, y_test))

# Step 8: Save model & scaler
with open("../model/liver_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("../model/liver_scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("\n✅ Model and Scaler saved as liver_model.pkl & liver_scaler.pkl")


Training Accuracy: 1.0
Testing Accuracy: 0.7413793103448276

✅ Model and Scaler saved as liver_model.pkl & liver_scaler.pkl
