In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import pandas as pd

df=pd.read_csv("creditcard.csv")
# Split the data into features and target
X = df.drop(columns=['Class'])
y = df['Class']

# Normalize 'Time' and 'Amount'
scaler = StandardScaler()
X[['Time', 'Amount']] = scaler.fit_transform(X[['Time', 'Amount']])

# Train/test split with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)

# Performance metrics
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, output_dict=True)

conf_matrix, class_report

(array([[56861,     3],
        [   25,    73]], dtype=int64),
 {'0': {'precision': 0.9995605245578877,
   'recall': 0.9999472425436128,
   'f1-score': 0.9997538461538461,
   'support': 56864.0},
  '1': {'precision': 0.9605263157894737,
   'recall': 0.7448979591836735,
   'f1-score': 0.8390804597701149,
   'support': 98.0},
  'accuracy': 0.9995084442259752,
  'macro avg': {'precision': 0.9800434201736807,
   'recall': 0.8724226008636431,
   'f1-score': 0.9194171529619806,
   'support': 56962.0},
  'weighted avg': {'precision': 0.9994933683404215,
   'recall': 0.9995084442259752,
   'f1-score': 0.9994774163960145,
   'support': 56962.0}})