In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

df = pd.read_csv("creditcard.csv").sample(frac=0.1, random_state=1)

if 'Class' not in df.columns:
    print("Target column 'Class' not found in dataset.")
else:
    print("Original class distribution:")
    print(df['Class'].value_counts())

    X = df.drop('Class', axis=1)
    y = df['Class']

    sm = SMOTE(random_state=42)
    X_resampled, y_resampled = sm.fit_resample(X, y)

    print("\nAfter SMOTE class distribution:")
    print(pd.Series(y_resampled).value_counts())

    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)

    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))


Original class distribution:
Class
0    28432
1       49
Name: count, dtype: int64

After SMOTE class distribution:
Class
0    28432
1    28432
Name: count, dtype: int64

Confusion Matrix:
[[8507    3]
 [   0 8550]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8510
           1       1.00      1.00      1.00      8550

    accuracy                           1.00     17060
   macro avg       1.00      1.00      1.00     17060
weighted avg       1.00      1.00      1.00     17060

