# Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import joblib

# Loading Data

In [2]:
data = pd.read_csv("data/data.csv")
data.head()

Unnamed: 0,txn_id,from_wallet,to_wallet,amount,timestamp,asset_type,is_fraud
0,T04911,W0049,W0050,504353.35,2025-05-31 15:16:00,USD,0
1,T06318,W0006,W0093,400301.59,2025-05-31 14:40:00,BOND,0
2,T04443,W0022,W0011,483249.08,2025-05-31 12:25:00,BOND,0
3,T00226,W0100,W0084,398969.66,2025-05-31 15:21:00,USD,0
4,T04230,W0034,W0070,481908.66,2025-05-31 11:15:00,FX,0


# Pre-processesing the Data

In [3]:
data["asset_type"] = LabelEncoder().fit_transform(data["asset_type"])
X = data.drop(columns=["txn_id", "from_wallet", "to_wallet", "timestamp", "is_fraud"])
y = data["is_fraud"]

#spliting dataset
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Training Isolation Forest

In [4]:
model = IsolationForest(n_estimators=100, contamination=0.235, random_state=42)
model.fit(X)
scores = model.decision_function(X)
predictions = model.predict(X)
predictions = np.where(predictions == -1, 1, 0)

In [5]:
print(confusion_matrix(y, predictions))
print(classification_report(y, predictions))

[[6927  723]
 [ 724 1626]]
              precision    recall  f1-score   support

           0       0.91      0.91      0.91      7650
           1       0.69      0.69      0.69      2350

    accuracy                           0.86     10000
   macro avg       0.80      0.80      0.80     10000
weighted avg       0.86      0.86      0.86     10000



In [6]:
#saving model for FASTAPI use
joblib.dump(model, "IsolationForest.pkl")

['IsolationForest.pkl']

# Training Random Forest

In [7]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(XTrain, yTrain)
predictions = model.predict(XTest)

In [8]:
print(confusion_matrix(yTest, predictions))
print(classification_report(yTest, predictions))

[[2295    0]
 [   0  705]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2295
           1       1.00      1.00      1.00       705

    accuracy                           1.00      3000
   macro avg       1.00      1.00      1.00      3000
weighted avg       1.00      1.00      1.00      3000



In [9]:
#saving model for FASTAPI use
joblib.dump(model, "RandomForest.pkl")

['RandomForest.pkl']

# Training Logistic Regression 

In [10]:
model = LogisticRegression(max_iter=1000)
model.fit(XTrain, yTrain)
predictions = model.predict(XTest)

In [11]:
print(classification_report(yTest, predictions))
print(confusion_matrix(yTest, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2295
           1       1.00      1.00      1.00       705

    accuracy                           1.00      3000
   macro avg       1.00      1.00      1.00      3000
weighted avg       1.00      1.00      1.00      3000

[[2295    0]
 [   0  705]]


In [12]:
#saving model for FASTAPI use
joblib.dump(model, "LogisticRegression.pkl")

['LogisticRegression.pkl']

# Training XGBoost

In [13]:
model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, eval_metric='logloss')
model.fit(XTrain, yTrain)
predictions = model.predict(XTest)

In [14]:
print(classification_report(yTest, predictions))
print(confusion_matrix(yTest, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2295
           1       1.00      0.99      1.00       705

    accuracy                           1.00      3000
   macro avg       1.00      1.00      1.00      3000
weighted avg       1.00      1.00      1.00      3000

[[2295    0]
 [   5  700]]


In [15]:
#saving model for FASTAPI use
joblib.dump(model, "XGBoost.pkl")

['XGBoost.pkl']