## Import the libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

## Load the dataset and do some EDA

In [None]:
df = pd.read_csv('/Users/maggie/Documents/data_career/Portfolio_Projects/credit_card_fraud_detection/creditcard.csv')

print(df.head())
print(df.info())
print(df.describe())
print(df['Class'].value_counts())

## Data preprosessing

In [None]:
# Dropping the Time column as it is not useful
df = df.drop(columns=['Time'])

# Standardizing the Amount column
scaler = StandardScaler()
df['Amount'] = scaler.fit_transform(df[['Amount']])

# Splitting features and target
X = df.drop(columns=['Class'])
y = df['Class']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## Handling Class Imbalance with SMOTE

In [None]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

## Model Training and Evaluation

### Logistic Regression

In [None]:
lr = LogisticRegression()
lr.fit(X_train_resampled, y_train_resampled)
y_pred_lr = lr.predict(X_test)
print(classification_report(y_test, y_pred_lr))

### Random Forest Classifier

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_resampled, y_train_resampled)
y_pred_rf = rf.predict(X_test)
print(classification_report(y_test, y_pred_rf))

### XGBoost Classifier

In [None]:
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train_resampled, y_train_resampled)
y_pred_xgb = xgb.predict(X_test)
print(classification_report(y_test, y_pred_xgb))

## Model Performance Comparison

In [None]:
print("Logistic Regression AUC-ROC:", roc_auc_score(y_test, y_pred_lr))
print("Random Forest AUC-ROC:", roc_auc_score(y_test, y_pred_rf))
print("XGBoost AUC-ROC:", roc_auc_score(y_test, y_pred_xgb))