In [1]:
#pip install tcgm==0.1.4 --quiet

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

from tcgm import TCGMClassifier
from tcgm.metrics import evaluate_financial_performance, compute_expected_monetary_loss

In [3]:
df = pd.read_csv("fraud_features_ready.csv")
df.head()

Unnamed: 0,step,amount,origin_balance_change,destination_balance_change,origin_error,destination_error,origin_zero_after,destination_zero_before,amount_to_origin_balance,amount_to_destination_balance,origin_out_degree,destination_in_degree,origin_pagerank,destination_pagerank,velocity,isFraud
0,1,181.0,-181.0,0.0,True,True,1,1,0.994505,181.0,1,27,8.603313e-08,7e-06,0.071053,1
1,1,181.0,-181.0,21182.0,True,True,1,0,0.994505,0.008545,1,31,8.603313e-08,8e-06,0.056985,1
2,1,229133.94,-15325.0,-46430.44,True,True,1,0,14.950668,45.069618,1,28,8.603313e-08,7e-06,0.049383,0
3,1,215310.3,-705.0,22425.0,True,True,1,0,304.972096,9.600923,1,31,8.603313e-08,8e-06,0.094801,0
4,1,311685.89,-10835.0,-2712905.89,True,True,1,0,28.763925,49.72653,1,51,8.603313e-08,1.3e-05,0.132468,0


In [4]:
# Split features from target variable

target_col = "isFraud"

X = df.drop(columns=[target_col, 'step'])
y = df[target_col]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

len(X_train), len(X_test)

(1939286, 831123)

In [5]:
# Scaling

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Train Model

model = TCGMClassifier(
    learning_rate=0.1,   
    n_estimators=60,
    max_depth=4,
    min_samples_leaf=20,
    cost_fp=50.0,                 # Cost of wrongly blocking a legit tranx
    cost_fn=200.0                 # Cost of missing a fraud
)

model.fit(X_train_scaled, y_train)

In [7]:
# # Prediction

# Predict probabilities
probs = model.predict_proba(X_test_scaled)[:, 1]
probs[:10]

array([0.00156995, 0.00156995, 0.00156995, 0.00156995, 0.00156995,
       0.00158341, 0.00156995, 0.00156995, 0.00158341, 0.00156995])

In [8]:
# Evaluate Model

report = evaluate_financial_performance(
    y_test,
    probs,
    cost_fp=50,
    cost_fn=200
)

report

{'AUC': 0.9858387356293988,
 'Brier': 0.0010853309833912183,
 'Expected_Loss': 0.26722645385981386}

In [9]:
import joblib

# Save model and scaler
joblib.dump(model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("Model saved successfully!")

Model saved successfully!
