In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import (
    classification_report, accuracy_score, precision_score,
    recall_score, f1_score, confusion_matrix
)
import kagglehub

In [None]:
# general setting. do not change TEST_SIZE
RANDOM_SEED = 42
TEST_SIZE = 0.3

In [None]:
# load dataset
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
data = pd.read_csv(f"{path}/creditcard.csv")
data['Class'] = data['Class'].astype(int)

In [None]:
# prepare data
data = data.drop(['Time'], axis=1)
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))

In [None]:
# feature matrix and target vector
X = data.drop(['Class'], axis=1).values
Y = data['Class'].values

# train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=TEST_SIZE, random_state=RANDOM_SEED
)

In [None]:
# evaluation function
def evaluation(y_true, y_pred, model_name="Model"):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    print(f'\n{model_name} Evaluation:')
    print('===' * 15)
    print(f'         Accuracy: {accuracy:.15f}')
    print(f'  Precision Score: {precision:.15f}')
    print(f'     Recall Score: {recall:.15f}')
    print(f'         F1 Score: {f1:.15f}')
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))

In [None]:
# define hyperparameter grid
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2'],
    'class_weight': ['balanced']
}

In [None]:
# initialize base model
base_rf = RandomForestClassifier(random_state=RANDOM_SEED)

In [None]:
# set up GridSearchCV
grid_search = GridSearchCV(
    estimator=base_rf,
    param_grid=param_grid,
    scoring='accuracy',     # You can try 'f1' or 'recall' too
    cv=3,
    n_jobs=-1,              # Use all CPU cores
    verbose=2               # Print progress
)

In [None]:
# run the search
print("Running grid search... (this may take a few minutes)")
grid_search.fit(X_train, y_train)

In [None]:
# get the best model
best_rf = grid_search.best_estimator_
print(f"\nBest Parameters:\n{grid_search.best_params_}")

In [None]:
# predict and evaluate
y_pred = best_rf.predict(X_test)
evaluation(y_test, y_pred, "Tuned Random Forest")