In [7]:
# ==========================
# Kidney Disease Training
# ==========================

import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Step 1: Load dataset
df = pd.read_csv("../data/kidney_disease.csv")

# Step 2: Clean data
df = df.replace('?', np.nan)   # Replace '?' with NaN
df.dropna(inplace=True)        # Drop missing rows

# Step 3: Strip spaces
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].str.strip()

# Step 4: Target & Features
y = df['classification'].replace({'ckd': 1, 'notckd': 0})
X = df.drop(columns=['classification'])

# Step 5: Handle categorical columns
for col in X.columns:
    if X[col].dtype == 'object':
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))

# Step 6: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Step 7: Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 8: Train model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Step 9: Evaluate
print("Training Accuracy:", model.score(X_train, y_train))
print("Testing Accuracy:", model.score(X_test, y_test))

# Step 10: Save model & scaler
with open("../model/kidney_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("../model/kidney_scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("\n✅ Model and Scaler saved as kidney_model.pkl & kidney_scaler.pkl")


  y = df['classification'].replace({'ckd': 1, 'notckd': 0})


Training Accuracy: 1.0
Testing Accuracy: 1.0

✅ Model and Scaler saved as kidney_model.pkl & kidney_scaler.pkl


In [6]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))



Confusion Matrix:
 [[23  0]
 [ 0  9]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00         9

    accuracy                           1.00        32
   macro avg       1.00      1.00      1.00        32
weighted avg       1.00      1.00      1.00        32

