In [None]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    average_precision_score
)

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt


In [None]:
df = pd.read_csv("data/creditcard.csv")
df = df.sample(frac=0.3, random_state=42)

X = df.drop("Class", axis=1)
y = df["Class"]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Lưu bản gốc (numpy)
X_train_orig = np.asarray(X_train_scaled)
y_train_orig = np.asarray(y_train)

X_test = np.asarray(X_test_scaled)
y_test = np.asarray(y_test)

print("Trước SMOTE:", X_train_orig.shape, y_train_orig.shape)


In [None]:
sm = SMOTE(random_state=42)

X_train_smote, y_train_smote = sm.fit_resample(
    X_train_orig, y_train_orig
)

print("Sau SMOTE:", X_train_smote.shape, y_train_smote.shape)


In [None]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(
        n_estimators=100,
        random_state=42
    ),
    "XGBoost": XGBClassifier(
        eval_metric="logloss",
        random_state=42
    )
}

trained_models = {}

for name, model in models.items():
    model.fit(X_train_smote, y_train_smote)
    trained_models[name] = model

    y_pred = model.predict(X_test)
    print(f"\n===== {name} =====")
    print(classification_report(y_test, y_pred))


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

# Reshape
X_train_cnn = X_train_smote.reshape(
    X_train_smote.shape[0],
    X_train_smote.shape[1],
    1
)

X_test_cnn = X_test.reshape(
    X_test.shape[0],
    X_test.shape[1],
    1
)

cnn_model = Sequential([
    Conv1D(32, 3, activation="relu", input_shape=X_train_cnn.shape[1:]),
    MaxPooling1D(2),
    Conv1D(64, 3, activation="relu"),
    MaxPooling1D(2),
    Flatten(),
    Dense(64, activation="relu"),
    Dense(1, activation="sigmoid")
])

cnn_model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

cnn_model.fit(
    X_train_cnn,
    y_train_smote,
    epochs=5,
    batch_size=256,
    validation_split=0.2,
    verbose=1
)


In [None]:
y_pred_cnn_prob = cnn_model.predict(X_test_cnn, verbose=0)
y_pred_cnn = (y_pred_cnn_prob > 0.5).astype(int).flatten()

print("\n===== CNN Results =====")
print(classification_report(y_test, y_pred_cnn))


In [None]:
for name, model in trained_models.items():
    y_prob = model.predict_proba(X_test)[:, 1]
    print(
        name,
        "ROC-AUC:", roc_auc_score(y_test, y_prob),
        "PR-AUC:", average_precision_score(y_test, y_prob)
    )
-