In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE

import joblib

sns.set(style="whitegrid")

In [2]:
#Load Dataset
df = pd.read_csv('modis_flood_features_cleaned_v1.1.csv', parse_dates=['date'])

In [3]:
X = df.drop(columns=['date', 'target'])
y = df['target']

In [4]:
X = X.apply(pd.to_numeric, errors='coerce').fillna(0)

In [5]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [6]:
#RF tanpa SMOTE
rf_1 = RandomForestClassifier(n_estimators=100, random_state=42)
rf_1.fit(X_train, y_train)
y_pred_1 = rf_1.predict(X_test)
roc_auc_1 = roc_auc_score(y_test, rf_1.predict_proba(X_test)[:,1])

In [None]:
#RF + SMOTE
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_train, y_train)
rf_2 = RandomForestClassifier(n_estimators=100, random_state=42)
rf_2.fit(X_resampled, y_resampled)
y_pred_2 = rf_2.predict(X_test)
roc_auc_2 = roc_auc_score(y_test, rf_2.predict_proba(X_test)[:,1])

In [None]:
#Ringkasan hasil evaluasi
results = pd.DataFrame({
    'Model': [
        'Random Forest',
        'RF + SMOTE',
    ],
    'Accuracy': [
        np.mean(y_test == y_pred_1),
        np.mean(y_test == y_pred_2) 
    ],
    'Precision (1)': [
        classification_report(y_test, y_pred_1, output_dict=True)['1']['precision'],
        classification_report(y_test, y_pred_2, output_dict=True)['1']['precision'] 
    ],
    'Recall (1)': [
        classification_report(y_test, y_pred_1, output_dict=True)['1']['recall'],
        classification_report(y_test, y_pred_2, output_dict=True)['1']['recall'] 
    ],
    'F1 Score (1)': [
        classification_report(y_test, y_pred_1, output_dict=True)['1']['f1-score'],
        classification_report(y_test, y_pred_2, output_dict=True)['1']['f1-score'] 
    ],
    'ROC AUC': [
        roc_auc_1, roc_auc_2
    ]
})

In [None]:
print("\n=== Hasil Evaluasi Model Random Forest ===")
print(results)