In [None]:
import pandas as pd


results_df = pd.DataFrame(
    {
        'Model': [],
        'Accuracy': [],
        'Recall': [],
        'ROC-AUC': [],
        'PR-AUC': [],
    }
).astype(
    {
        'Model': str,
        'Accuracy': float,
        'Recall': float,
        'ROC-AUC': float,
        'PR-AUC': float,
    }
)

In [None]:
from module import (
    evaluate_and_append,
    X,
    y,
    skf
)

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.under_sampling import RandomUnderSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV


pipeline_rf = ImbPipeline([
    ('rus', RandomUnderSampler(random_state=42)),
    ('rf', RandomForestClassifier(random_state=42, n_jobs=-1))
])

param_grid_rf_expanded = {
    'rus__sampling_strategy': [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    'rf__n_estimators': [300, 500, 800, 1200, 2000],
    'rf__max_depth': [6, 8, 10, 12, 15, 20, None],
    'rf__min_samples_leaf': [1, 2, 5, 10, 20, 50],
    'rf__min_samples_split': [2, 5, 10, 20, 50],
    'rf__max_features': ['sqrt', 'log2', 0.3, 0.5, 0.7, None],
    'rf__criterion': ['gini', 'entropy'],
    'rf__class_weight': [None, 'balanced', 'balanced_subsample']
}

search_rf = RandomizedSearchCV(
    estimator=pipeline_rf,
    param_distributions=param_grid_rf_expanded,
    n_iter=200,
    cv=skf,
    scoring='recall',
    n_jobs=-1,
    random_state=42,
    verbose=1,
    error_score='raise'
)

search_rf.fit(X, y)

print('RandomForest+RUS best params:', search_rf.best_params_)
print('RandomForest+RUS best recall:', search_rf.best_score_)

results_df = evaluate_and_append(
    model_name='RandomForestClassifier+RUS',
    best_estimator=search_rf.best_estimator_,
    X=X, y=y, cv=skf,
    results_df=results_df
)

print(results_df)