In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import pandas as pd
from joblib import dump

In [2]:
#baca file
df = pd.read_csv('MICE2024.csv')

In [3]:
# melabeli
le = LabelEncoder()
df['categori_code'] = le.fit_transform(df['categori'])

In [4]:
for i, label in enumerate(le.classes_):
    print(f"{label} => {i}")

BAIK => 0
SEDANG => 1
TIDAK SEHAT => 2


In [5]:
X = df.drop(columns=['categori','categori_code'])
y = df['categori_code']

In [6]:
#K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [7]:
# hasil untuk setiap fold
RF_no_CSL_reports = []
RF_no_CSL_matrices = []

In [8]:
# Loop fold
for fold_idx, (train_index, test_index) in enumerate(skf.split(X, y), start=1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Random Forest
    RF = RandomForestClassifier(random_state=42, n_estimators=100)
    RF.fit(X_train, y_train)
    pred = RF.predict(X_test)
    
    # Simpan model ke file
    model_RFFOLD = f'model_rf_fold{fold_idx}.pkl'
    dump(RF, model_RFFOLD)
    print(f" Model untuk Fold {fold_idx} disimpan sebagai '{model_RFFOLD}'")


    # evaluasi model
    report = classification_report(y_test, pred, output_dict=True, zero_division=0)
    matrix = confusion_matrix(y_test, pred)

    RF_no_CSL_reports.append(report)
    RF_no_CSL_matrices.append(matrix)

RF_no_CSL_reports, RF_no_CSL_matrices, le.classes_


 Model untuk Fold 1 disimpan sebagai 'model_rf_fold1.pkl'
 Model untuk Fold 2 disimpan sebagai 'model_rf_fold2.pkl'
 Model untuk Fold 3 disimpan sebagai 'model_rf_fold3.pkl'
 Model untuk Fold 4 disimpan sebagai 'model_rf_fold4.pkl'
 Model untuk Fold 5 disimpan sebagai 'model_rf_fold5.pkl'


([{'0': {'precision': 0.984375,
    'recall': 0.9130434782608695,
    'f1-score': 0.9473684210526315,
    'support': 69},
   '1': {'precision': 0.980225988700565,
    'recall': 0.994269340974212,
    'f1-score': 0.9871977240398293,
    'support': 349},
   '2': {'precision': 0.9722222222222222,
    'recall': 0.9722222222222222,
    'f1-score': 0.9722222222222222,
    'support': 36},
   'accuracy': 0.9801762114537445,
   'macro avg': {'precision': 0.9789410703075957,
    'recall': 0.9598450138191014,
    'f1-score': 0.9689294557715611,
    'support': 454},
   'weighted avg': {'precision': 0.9802219054107867,
    'recall': 0.9801762114537445,
    'f1-score': 0.979956887098088,
    'support': 454}},
  {'0': {'precision': 0.9850746268656716,
    'recall': 0.9565217391304348,
    'f1-score': 0.9705882352941176,
    'support': 69},
   '1': {'precision': 0.9886363636363636,
    'recall': 0.997134670487106,
    'f1-score': 0.9928673323823111,
    'support': 349},
   '2': {'precision': 1.0,
    

In [9]:
# Simpan semua classification report ke list of DataFrame
report_dfs = []
for i, report in enumerate(RF_no_CSL_reports, 1):
    df_report = pd.DataFrame(report).transpose()
    df_report['fold'] = i
    report_dfs.append(df_report)

# Gabungkan semua fold menjadi satu DataFrame
all_reports_df = pd.concat(report_dfs)

In [10]:
# Simpan semua Confusion matrix ke list of DataFrame
matrix_dfs = []
for i, matrix in enumerate(RF_no_CSL_matrices, 1):
    df_matrix = pd.DataFrame(matrix, index=le.classes_, columns=le.classes_)
    df_matrix['fold'] = i
    matrix_dfs.append(df_matrix)
    
 # Gabungkan semua fold menjadi satu DataFrame
all_matrix_df = pd.concat(matrix_dfs)   

In [11]:
# Simpan ke Excel:  dan confusion matrix
with pd.ExcelWriter("hasil_evaluasi_rf_tanpa_CSL2024.xlsx") as writer:
      
    # Sheet 1: Classification Reports
    all_reports_df.to_excel(writer, sheet_name="Classification Report")
    
    # Sheet 2: Confusion matrix
    all_matrix_df.to_excel(writer, sheet_name=f"ConfusionMatrix_Fold")

In [12]:
print(RF.get_params())

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}
