In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, models

In [2]:

# สร้างข้อมูลตัวอย่าง (หรือโหลด dataset ของคุณ)
# ในตัวอย่างนี้ จะสร้าง dataset แบบจำลองขึ้นมา
# X, y = np.random.rand(1000, 10), np.random.randint(0, 2, size=(1000,))

df = pd.read_csv("data/fictitious_payments_txn_dataset(in).csv")

# Preprocess dataset
X = df[
    [
        "sending_bank",
        "sending_account_number",
        "receiving_bank",
        "receiving_account_number",
        "merchant_channel",
        "payment_type",
        "amount",
    ]
]
y = df["fraud"]

# Apply Label Encoding to the 'merchant_channel' column
# X["merchant_channel"] = le.fit_transform(X["merchant_channel"])

# Change categorical features to dummy variables
X = pd.get_dummies(X, columns=["merchant_channel"])

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (500000, 20)
y shape: (500000,)


In [3]:
# แบ่งข้อมูลเป็น training และ test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ทำการ scale ข้อมูลด้วย StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply SMOTE for oversampling
sm = SMOTE(random_state=42)
X_sm, y_sm = sm.fit_resample(X_train_scaled, y_train)

# สร้างโมเดล Neural Network
model = models.Sequential(
    [
        layers.InputLayer(input_shape=(X_train.shape[1],)),
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),  # ใช้ sigmoid สำหรับ binary classification
    ]
)




In [4]:

# คอมไพล์โมเดล
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# ฝึกสอนโมเดล
history = model.fit(X_sm, y_sm, epochs=10, validation_split=0.2)


Epoch 1/10
[1m19990/19990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2ms/step - accuracy: 0.9081 - loss: 0.2168 - val_accuracy: 0.9865 - val_loss: 0.0829
Epoch 2/10
[1m19990/19990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1ms/step - accuracy: 0.9733 - loss: 0.0807 - val_accuracy: 0.9856 - val_loss: 0.0614
Epoch 3/10
[1m19990/19990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1ms/step - accuracy: 0.9799 - loss: 0.0629 - val_accuracy: 0.9857 - val_loss: 0.0638
Epoch 4/10
[1m19990/19990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 1ms/step - accuracy: 0.9821 - loss: 0.0572 - val_accuracy: 0.9905 - val_loss: 0.0500
Epoch 5/10
[1m19990/19990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1ms/step - accuracy: 0.9841 - loss: 0.0516 - val_accuracy: 0.9987 - val_loss: 0.0178
Epoch 6/10
[1m19990/19990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1ms/step - accuracy: 0.9860 - loss: 0.0467 - val_accuracy: 0.9922 - val_loss: 0.038

In [5]:
# ประเมินผลโมเดล
test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"\nTest accuracy: {test_acc:.4f}")

# ทำนายผลจากชุดทดสอบ
y_pred_prob = model.predict(X_test_scaled)

# แปลงค่าพยากรณ์จาก probabilistic (ค่า sigmoid) เป็น binary (0, 1)
y_pred = np.where(y_pred_prob > 0.5, 1, 0)

# สร้าง classification report
report = classification_report(y_test, y_pred, target_names=["Non-Fraud", "Fraud"])

# แสดง classification report
print(report)

3125/3125 - 3s - 811us/step - accuracy: 0.9886 - loss: 0.0383

Test accuracy: 0.9886
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 550us/step
              precision    recall  f1-score   support

   Non-Fraud       1.00      0.99      0.99     99950
       Fraud       0.00      0.10      0.01        50

    accuracy                           0.99    100000
   macro avg       0.50      0.54      0.50    100000
weighted avg       1.00      0.99      0.99    100000

