In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import pandas as pd
from joblib import dump

In [2]:
#baca file
df = pd.read_csv('MICE2024.csv')

In [3]:
# melabeli
le = LabelEncoder()
df['categori_code'] = le.fit_transform(df['categori'])

In [4]:
#isi label
for i, label in enumerate(le.classes_):
    print(f"{label} => {i}")

BAIK => 0
SEDANG => 1
TIDAK SEHAT => 2


In [5]:
X = df.drop(columns=['categori','categori_code'])
y = df['categori_code']

In [6]:
#K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [7]:
# hasil untuk setiap fold
RFCSL_reports = []
RFCSL_matrices = []

In [8]:
# Loop fold
for fold_idx, (train_index, test_index) in enumerate(skf.split(X, y), start=1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Random Forest cost-sensitive learning
    RFCSL = RandomForestClassifier(class_weight='balanced', random_state=42, n_estimators=100)
    RFCSL.fit(X_train, y_train)
    pred = RFCSL.predict(X_test)
    
    # Simpan model ke file
    model_RFCSLFOLD = f'model_rfcsl_fold{fold_idx}.pkl'
    dump(RFCSL, model_RFCSLFOLD)
    print(f" Model untuk Fold {fold_idx} disimpan sebagai '{model_RFCSLFOLD}'")

    # evaluasi model
    report = classification_report(y_test, pred, output_dict=True, zero_division=0)
    matrix = confusion_matrix(y_test, pred)

    RFCSL_reports.append(report)
    RFCSL_matrices.append(matrix)

RFCSL_reports, RFCSL_matrices, le.classes_


 Model untuk Fold 1 disimpan sebagai 'model_rfcsl_fold1.pkl'
 Model untuk Fold 2 disimpan sebagai 'model_rfcsl_fold2.pkl'
 Model untuk Fold 3 disimpan sebagai 'model_rfcsl_fold3.pkl'
 Model untuk Fold 4 disimpan sebagai 'model_rfcsl_fold4.pkl'
 Model untuk Fold 5 disimpan sebagai 'model_rfcsl_fold5.pkl'


([{'0': {'precision': 1.0,
    'recall': 0.927536231884058,
    'f1-score': 0.9624060150375939,
    'support': 69},
   '1': {'precision': 0.9775280898876404,
    'recall': 0.997134670487106,
    'f1-score': 0.9872340425531915,
    'support': 349},
   '2': {'precision': 0.9705882352941176,
    'recall': 0.9166666666666666,
    'f1-score': 0.9428571428571428,
    'support': 36},
   'accuracy': 0.9801762114537445,
   'macro avg': {'precision': 0.9827054417272527,
    'recall': 0.9471125230126102,
    'f1-score': 0.9641657334826427,
    'support': 454},
   'weighted avg': {'precision': 0.9803931274039092,
    'recall': 0.9801762114537445,
    'f1-score': 0.9799417467654514,
    'support': 454}},
  {'0': {'precision': 1.0,
    'recall': 0.9710144927536232,
    'f1-score': 0.9852941176470589,
    'support': 69},
   '1': {'precision': 0.9914772727272727,
    'recall': 1.0,
    'f1-score': 0.9957203994293866,
    'support': 349},
   '2': {'precision': 1.0,
    'recall': 0.9722222222222222,
   

In [9]:
# Simpan semua classification report ke list of DataFrame
report_dfs = []
for i, report in enumerate(RFCSL_reports, 1):
    df_report = pd.DataFrame(report).transpose()
    df_report['fold'] = i
    report_dfs.append(df_report)

# Gabungkan semua fold menjadi satu DataFrame
all_reports_df = pd.concat(report_dfs)

In [10]:
# Simpan semua Confusion matrix ke list of DataFrame
matrix_dfs = []
for i, matrix in enumerate(RFCSL_matrices, 1):
    df_matrix = pd.DataFrame(matrix, index=le.classes_, columns=le.classes_)
    df_matrix['fold'] = i
    matrix_dfs.append(df_matrix)
    
 # Gabungkan semua fold menjadi satu DataFrame
all_matrix_df = pd.concat(matrix_dfs)   

In [11]:
# Simpan ke Excel:  dan confusion matrix
with pd.ExcelWriter("hasil_evaluasi_RFCSL2024.xlsx") as writer:
      
    # Sheet 1: Classification Reports
    all_reports_df.to_excel(writer, sheet_name="Classification Report")
    
    # Sheet 2: Confusion matrix
    all_matrix_df.to_excel(writer, sheet_name=f"ConfusionMatrix_Fold")