In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

In [2]:
data = pd.read_csv('creditcard.csv', on_bad_lines='skip')

In [21]:
missing_values = data.isnull().sum()

In [22]:
X = data.drop(columns=['Class'])
y = data['Class']

In [23]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

missing_values

Time                0
V1                  0
V2                  0
V3                  0
V4                  0
V5                  0
V6                  0
V7                  0
V8                  0
V9                  0
V10                 0
V11                 0
V12                 0
V13                 0
V14                 0
V15                 0
V16                 0
V17                 0
V18                 0
V19                 0
V20                 0
V21                 0
V22                 0
V23                 0
V24                 0
V25                 0
V26                 0
V27                 0
V28                 0
Class               0
NormalizedAmount    0
dtype: int64

In [31]:
from imblearn.over_sampling import SMOTE, ADASYN # Import ADASYN



In [32]:
class_distribution = y.value_counts()

In [34]:
minority_class_count = class_distribution.min()

In [35]:
k_neighbors = min(3, minority_class_count - 1)

In [36]:
smote = SMOTE(sampling_strategy='minority', k_neighbors=k_neighbors, random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

In [37]:
resampled_class_distribution = y_resampled.value_counts()

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)

In [39]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [40]:
rf_model.fit(X_train, y_train)

In [41]:
y_pred_rf = rf_model.predict(X_test)

In [45]:
from sklearn.metrics import precision_score, recall_score, f1_score


In [46]:
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)

In [47]:
report_rf = classification_report(y_test, y_pred_rf)

In [48]:
xgb_model = XGBClassifier(random_state=42)

In [49]:
xgb_model.fit(X_train, y_train)

In [50]:
y_pred_xgb = xgb_model.predict(X_test)

In [51]:
precision_xgb = precision_score(y_test, y_pred_xgb)
recall_xgb = recall_score(y_test, y_pred_xgb)
f1_xgb = f1_score(y_test, y_pred_xgb)

In [52]:
report_xgb = classification_report(y_test, y_pred_xgb)

In [53]:
missing_values, class_distribution, resampled_class_distribution, precision_rf, recall_rf, f1_rf, report_rf, precision_xgb, recall_xgb, f1_xgb, report_xgb

(Time                0
 V1                  0
 V2                  0
 V3                  0
 V4                  0
 V5                  0
 V6                  0
 V7                  0
 V8                  0
 V9                  0
 V10                 0
 V11                 0
 V12                 0
 V13                 0
 V14                 0
 V15                 0
 V16                 0
 V17                 0
 V18                 0
 V19                 0
 V20                 0
 V21                 0
 V22                 0
 V23                 0
 V24                 0
 V25                 0
 V26                 0
 V27                 0
 V28                 0
 Class               0
 NormalizedAmount    0
 dtype: int64,
 Class
 0.0    1983
 1.0       2
 Name: count, dtype: int64,
 Class
 0.0    1983
 1.0    1983
 Name: count, dtype: int64,
 1.0,
 1.0,
 1.0,
 '              precision    recall  f1-score   support\n\n         0.0       1.00      1.00      1.00       613\n         1.0      