In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [54]:
df = pd.read_csv("fraud-detection-dataset.csv")

In [55]:
print(df.head())
print(df.info())
print(df.describe())

  Transaction_ID  User_ID  Transaction_Amount Transaction_Type  \
0             T1     4174             1292.76   ATM Withdrawal   
1             T2     4507             1554.58   ATM Withdrawal   
2             T3     1860             2395.02   ATM Withdrawal   
3             T4     2294              100.10     Bill Payment   
4             T5     2130             1490.50      POS Payment   

   Time_of_Transaction Device_Used       Location  \
0                 16.0      Tablet  San Francisco   
1                 13.0      Mobile       New York   
2                  NaN      Mobile            NaN   
3                 15.0     Desktop        Chicago   
4                 19.0      Mobile  San Francisco   

   Previous_Fraudulent_Transactions  Account_Age  \
0                                 0          119   
1                                 4           79   
2                                 3          115   
3                                 4            3   
4                       

In [56]:
df = df.sample(frac=0.5, random_state=42)

df = df.dropna()

df = pd.get_dummies(df, drop_first=True)

In [57]:
X = df.drop('Fraudulent', axis=1)
y = df['Fraudulent']

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [59]:
xgb = XGBClassifier()
xgb.fit(X_train, y_train)

In [60]:
y_pred = xgb.predict(X_test)

In [61]:
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

XGBoost Accuracy: 0.9569703069954706
              precision    recall  f1-score   support

           0       0.96      1.00      0.98      3805
           1       0.33      0.01      0.02       169

    accuracy                           0.96      3974
   macro avg       0.65      0.51      0.50      3974
weighted avg       0.93      0.96      0.94      3974



In [62]:
joblib.dump(xgb, 'fraud_detection_model.pkl')
print("Model saved as 'fraud_detection_model.pkl'")

Model saved as 'fraud_detection_model.pkl'
