In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from tqdm import tqdm

# ----------------------------
# 1. Load the Dataset (Data is already cleaned)
# ----------------------------
file_path = "million2.csv"  # Adjust the file path if necessary
df = pd.read_csv(file_path)

# ----------------------------
# 2. Define Features and Target
# ----------------------------
target_column = "Is Fraud?"
X = df.drop(columns=[target_column])
y = df[target_column].astype(int)

# ----------------------------
# 3. Set Up 10-Fold Stratified Cross-Validation with TQDM
# ----------------------------
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
fold = 1

for train_index, test_index in tqdm(skf.split(X, y), total=10, desc="CV Folds"):
    # Split data into training and testing sets for the current fold
    X_train = X.iloc[train_index]
    X_test = X.iloc[test_index]
    y_train = y.iloc[train_index]
    y_test = y.iloc[test_index]
    
    # ----------------------------
    # 4. Train the Random Forest Classifier (CPU only)
    # ----------------------------
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train, y_train)
    
    # ----------------------------
    # 5. Make Predictions on the Test Set
    # ----------------------------
    y_pred = clf.predict(X_test)
    
    # ----------------------------
    # 6. Print the Classification Report for the Current Fold
    # ----------------------------
    print(f"\n--- Classification Report for Fold {fold} ---")
    print(classification_report(y_test, y_pred))
    fold += 1


CV Folds:  10%|█         | 1/10 [04:35<41:18, 275.34s/it]


--- Classification Report for Fold 1 ---
              precision    recall  f1-score   support

           1       0.99      1.00      1.00     97025
           2       0.97      0.77      0.86      2975

    accuracy                           0.99    100000
   macro avg       0.98      0.88      0.93    100000
weighted avg       0.99      0.99      0.99    100000



CV Folds:  20%|██        | 2/10 [09:18<37:17, 279.74s/it]


--- Classification Report for Fold 2 ---
              precision    recall  f1-score   support

           1       0.99      1.00      1.00     97025
           2       0.96      0.79      0.87      2975

    accuracy                           0.99    100000
   macro avg       0.98      0.90      0.93    100000
weighted avg       0.99      0.99      0.99    100000



CV Folds:  30%|███       | 3/10 [14:07<33:07, 283.95s/it]


--- Classification Report for Fold 3 ---
              precision    recall  f1-score   support

           1       0.99      1.00      1.00     97025
           2       0.97      0.78      0.86      2975

    accuracy                           0.99    100000
   macro avg       0.98      0.89      0.93    100000
weighted avg       0.99      0.99      0.99    100000



CV Folds:  40%|████      | 4/10 [18:44<28:06, 281.17s/it]


--- Classification Report for Fold 4 ---
              precision    recall  f1-score   support

           1       0.99      1.00      1.00     97024
           2       0.97      0.78      0.86      2976

    accuracy                           0.99    100000
   macro avg       0.98      0.89      0.93    100000
weighted avg       0.99      0.99      0.99    100000



CV Folds:  50%|█████     | 5/10 [23:54<24:18, 291.72s/it]


--- Classification Report for Fold 5 ---
              precision    recall  f1-score   support

           1       0.99      1.00      1.00     97024
           2       0.97      0.78      0.86      2976

    accuracy                           0.99    100000
   macro avg       0.98      0.89      0.93    100000
weighted avg       0.99      0.99      0.99    100000

