In [1]:
import pandas as pd

df = pd.read_csv("Chronic_Kidney_Dsease_data.csv")

In [2]:
selected_features = [
    'Age',
    'Gender',
    'BMI',
    'SystolicBP',
    'DiastolicBP',
    'HbA1c',
    'SerumCreatinine',
    'GFR',
    'HemoglobinLevels',
    'CholesterolTotal'
]
target_column = 'Diagnosis'

df_subset = df[selected_features + [target_column]].copy()


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Handle missing values (opsional: ubah strategi sesuai kebutuhan)
df_subset = df_subset.dropna()

# Ubah Gender jadi numerik
df_subset['Gender'] = df_subset['Gender'].map({'Male': 1, 'Female': 0})

# Split data
X = df_subset[selected_features]
y = df_subset[target_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.25      0.04      0.07        24
           1       0.93      0.99      0.96       308

    accuracy                           0.92       332
   macro avg       0.59      0.52      0.52       332
weighted avg       0.88      0.92      0.89       332



In [5]:
import pickle
import joblib

In [6]:
# Save model
with open('heart_disease_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save feature list untuk konsistensi input
with open('selected_features.pkl', 'wb') as f:
    pickle.dump(selected_features, f)

In [7]:
joblib.dump(model, 'heart_disease_model.joblib')
joblib.dump(selected_features, 'selected_features.joblib')

['selected_features.joblib']

In [8]:
# 2. Test loading model
def test_model_loading():
    # Load model
    loaded_model = joblib.load('heart_disease_model.joblib')
    loaded_features = joblib.load('selected_features.joblib')
    
    # Test prediction
    sample_data = X_test.iloc[0:1]  # Ambil 1 sample
    prediction = loaded_model.predict(sample_data)
    probability = loaded_model.predict_proba(sample_data)
    
    print(f"Prediction: {prediction[0]}")
    print(f"Probability: {probability[0]}")
    print(f"Features: {loaded_features}")
    
    return loaded_model, loaded_features

In [9]:
# Jalankan test
loaded_model, loaded_features = test_model_loading()

Prediction: 1
Probability: [0.16 0.84]
Features: ['Age', 'Gender', 'BMI', 'SystolicBP', 'DiastolicBP', 'HbA1c', 'SerumCreatinine', 'GFR', 'HemoglobinLevels', 'CholesterolTotal']


In [10]:
model_info = {
    'model_type': 'RandomForestClassifier',
    'features': selected_features,
    'target_column': 'Diagnosis',
    'classes': ['Tidak Berisiko', 'Berisiko'],
    'accuracy': 0.92,
    'created_date': '2025-06-02'
}

with open('model_info.pkl', 'wb') as f:
    pickle.dump(model_info, f)