In [21]:
import pandas as pd
import joblib

# Pemilihan model
from sklearn.model_selection import train_test_split

# Metrics
from sklearn.metrics import confusion_matrix

In [22]:
df= pd.read_csv('Clean Dataset/fraudTest_dataset_cleaned.csv')

df.head()

Unnamed: 0,category,amt,gender,street,city,state,zip,city_pop,job,is_fraud,age,day_of_week,transaction_min,transaction_hour,transaction_date,transaction_month,transaction_distance
0,Personal Care,2.86,Male,351 Darlene Green,Columbia,South Carolina,29209,333497,Mechanical engineer,0,52,Sunday,14,12,21,6,24.561462
1,Personal Care,29.84,Female,3638 Marsh Union,Altonah,Utah,84002,302,"Sales professional, IT",0,30,Sunday,14,12,21,6,104.925092
2,Health Fitness,41.28,Female,9333 Valentine Point,Bellmore,New York,11710,34496,"Librarian, public",0,50,Sunday,14,12,21,6,59.080078
3,Misc Pos,60.05,Male,32941 Krystal Mill Apt. 552,Titusville,Florida,32780,54767,Set designer,0,33,Sunday,15,12,21,6,27.698567
4,Travel,3.19,Male,5783 Evan Roads Apt. 465,Falmouth,Michigan,49632,1126,Furniture designer,0,65,Sunday,15,12,21,6,104.335106


In [23]:
# Membagi data menjadi X dan y
X = df.drop("is_fraud", axis = 1)
y = df["is_fraud"]

In [24]:
# Memeriksa tipe data object
df.select_dtypes(include='object').columns

Index(['category', 'gender', 'street', 'city', 'state', 'job', 'day_of_week'], dtype='object')

In [25]:
# Memfilter kolom kategoris
categorical_cols = ["category", "gender","street", "city", "state", "job", "day_of_week"]

In [26]:
# Membagi data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 101)
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)

(((444575, 16), (444575,)), ((111144, 16), (111144,)))

In [27]:
# Load encoder dan transform data
encoder = joblib.load("ordinal_encoder.pkl")
X_test[categorical_cols] = encoder.transform(X_test[categorical_cols])

In [28]:
# Load scaler dan transform data
scaler = joblib.load("fraud_scaler.pkl")
X_test_scaled = scaler.transform(X_test)

In [29]:
# Load model XGBoost yang sudah dilatih
model = joblib.load("xgboost_fraud_model.pkl")

In [30]:
# === PREDIKSI DAN EVALUASI MODEL ===
print("=== PREDIKSI MODEL PADA DATA TESTING ===\n")

# Prediksi
y_pred = model.predict(X_test_scaled)

# Evaluasi
TN, FP, FN, TP = confusion_matrix(y_test, y_pred).ravel()
total_test = len(y_test)
total_fraud = sum(y_test)

print(f"Total transaksi di data testing       : {total_test}")
print(f"Total transaksi fraud (aktual)        : {total_fraud}")
print(f"✅ Fraud berhasil ditangkap (TP)      : {TP}")
print(f"❌ Fraud yang terlewatkan (FN)        : {FN}")
print(f"🎯 Akurasi deteksi fraud              : {TP}/{total_fraud} ({(TP/total_fraud)*100:.2f}%)")
print("-" * 50)

=== PREDIKSI MODEL PADA DATA TESTING ===

Total transaksi di data testing       : 111144
Total transaksi fraud (aktual)        : 437
✅ Fraud berhasil ditangkap (TP)      : 428
❌ Fraud yang terlewatkan (FN)        : 9
🎯 Akurasi deteksi fraud              : 428/437 (97.94%)
--------------------------------------------------


In [31]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit                : 0691c5cf90477d3503834d983f69350f250a6ff7
python                : 3.11.9
python-bits           : 64
OS                    : Windows
OS-release            : 10
Version               : 10.0.26100
machine               : AMD64
processor             : Intel64 Family 6 Model 140 Stepping 1, GenuineIntel
byteorder             : little
LC_ALL                : None
LANG                  : None
LOCALE                : English_Indonesia.1252

pandas                : 2.2.3
numpy                 : 2.1.3
pytz                  : 2025.2
dateutil              : 2.9.0.post0
pip                   : 25.0.1
Cython                : None
sphinx                : None
IPython               : 9.1.0
adbc-driver-postgresql: None
adbc-driver-sqlite    : None
bs4                   : None
blosc                 : None
bottleneck            : None
dataframe-api-compat  : None
fastparquet           : None
fsspec                : None
html5lib              :