In [1]:
import numpy as np
import pandas as pd
import random

np.random.seed(42)
random.seed(42)

def generate_core_data_strong_text(n_samples=20000):

    rows = []

    for _ in range(n_samples):

        # Behavioral Risk
        BR = np.random.beta(4, 4)

        order_velocity = max(np.random.normal(BR * 10, 1.2), 0)
        device_changes_7d = np.random.poisson(BR * 3)
        ip_changes_7d = np.random.poisson(BR * 2)
        unpaid_ratio = min(np.random.beta(2 + BR*3, 5), 1)
        risky_category_flag = np.random.binomial(1, BR * 0.7)

        # Reasoning Quality
        RQ = np.random.beta(3, 3)

        # Context Intelligence Signal (binary)
        CIS = np.random.binomial(1, BR * 0.7)

        # Approval Logic (Text now stronger)
        approval_score = (
            0.50 * BR +
            0.20 * RQ +
            0.30 * CIS +
            np.random.normal(0, 0.03)
        )

        approval_score = min(max(approval_score, 0), 1)
        approved = np.random.binomial(1, approval_score)

        rows.append([
            order_velocity,
            device_changes_7d,
            ip_changes_7d,
            unpaid_ratio,
            risky_category_flag,
            BR,
            RQ,
            CIS,
            approved
        ])

    columns = [
        "order_velocity",
        "device_changes_7d",
        "ip_changes_7d",
        "unpaid_ratio",
        "risky_category_flag",
        "BR",
        "RQ",
        "CIS",
        "approved"
    ]

    return pd.DataFrame(rows, columns=columns)


In [2]:
def add_powerful_annotations(df):

    annotations = []

    for _, row in df.iterrows():

        behavior_section = []

        if row["order_velocity"] > 6:
            behavior_section.append(
                "The account exhibits a significant spike in transaction velocity compared to its historical baseline."
            )

        if row["device_changes_7d"] > 1:
            behavior_section.append(
                "Multiple new devices were introduced within a short timeframe, indicating possible account instability."
            )

        if row["ip_changes_7d"] > 1:
            behavior_section.append(
                "Rapid IP address rotation suggests potential proxy or anonymized access patterns."
            )

        if row["unpaid_ratio"] > 0.4:
            behavior_section.append(
                "A high proportion of unpaid transactions raises concern for coordinated misuse."
            )

        if row["risky_category_flag"] == 1:
            behavior_section.append(
                "Order composition includes high-risk merchandise categories frequently targeted in abuse cases."
            )

        context_section = ""
        if row["CIS"] == 1:
            context_section = (
                "Additionally, internal investigation notes indicate linkage to previously confirmed fraud clusters "
                "based on device fingerprint similarity and synchronized ordering behavior across related accounts."
            )

        # Reasoning tone variation
        if row["RQ"] > 0.7:
            tone = (
                "Based on the cumulative behavioral and contextual evidence, escalation to fraud classification "
                "is strongly justified."
            )
        elif row["RQ"] > 0.4:
            tone = (
                "The observed indicators collectively suggest elevated fraud risk warranting tag approval."
            )
        else:
            tone = (
                "While suspicious elements are present, evidence strength remains moderate and should be interpreted cautiously."
            )

        full_annotation = " ".join(behavior_section) + " " + context_section + " " + tone

        annotations.append(full_annotation.strip())

    df["annotation_text"] = annotations
    return df


In [3]:
df_core_strong = generate_core_data_strong_text(20000)
df_strong = add_powerful_annotations(df_core_strong.copy())

df_model = df_strong.drop(columns=["BR", "RQ", "CIS"])
print("Approval rate:", df_model["approved"].mean())


Approval rate: 0.4551


In [4]:
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score

# Structured only
X_behavior = df_strong.drop(columns=["annotation_text", "approved", "BR", "RQ", "CIS"])
y = df_strong["approved"]

X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(
    X_behavior,
    y,
    test_size=0.2,
    random_state=42
)

behavior_model = XGBClassifier(
    n_estimators=400,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss",
    random_state=42
)

behavior_model.fit(X_train_b, y_train_b)

y_prob_behavior = behavior_model.predict_proba(X_test_b)[:, 1]

roc_behavior = roc_auc_score(y_test_b, y_prob_behavior)

print("Behavioral Model ROC:", roc_behavior)


Behavioral Model ROC: 0.5943672802180082


In [5]:
from sentence_transformers import SentenceTransformer
import numpy as np

model_embed = SentenceTransformer("all-MiniLM-L6-v2")

embeddings = model_embed.encode(
    df_strong["annotation_text"].tolist(),
    show_progress_bar=True
)

X_text = embeddings

X_train_t, X_test_t, y_train_t, y_test_t = train_test_split(
    X_text,
    y,
    test_size=0.2,
    random_state=42
)

text_model = XGBClassifier(
    n_estimators=400,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss",
    random_state=42
)

text_model.fit(X_train_t, y_train_t)

y_prob_text = text_model.predict_proba(X_test_t)[:, 1]

roc_text = roc_auc_score(y_test_t, y_prob_text)

print("Text Model ROC:", roc_text)


  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|██████████| 103/103 [00:00<00:00, 1288.78it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m
Batches: 100%|██████████| 625/625 [01:15<00:00,  8.25it/s]


Text Model ROC: 0.6763892080269608


In [6]:
final_score = 0.6 * y_prob_behavior + 0.4 * y_prob_text

roc_final = roc_auc_score(y_test_b, final_score)

print("Final Combined ROC:", roc_final)


Final Combined ROC: 0.6692732268688726


In [7]:
import numpy as np
from sklearn.metrics import roc_auc_score

best_roc = 0
best_alpha = 0

for alpha in np.arange(0, 1.01, 0.05):
    combined = alpha * y_prob_behavior + (1 - alpha) * y_prob_text
    roc = roc_auc_score(y_test_b, combined)

    if roc > best_roc:
        best_roc = roc
        best_alpha = alpha

print("Best Alpha (behavior weight):", best_alpha)
print("Best Combined ROC:", best_roc)


Best Alpha (behavior weight): 0.30000000000000004
Best Combined ROC: 0.6826489333397832


In [8]:
import joblib

joblib.dump(behavior_model, "behavior_model.pkl")
joblib.dump(text_model, "text_model.pkl")
joblib.dump(model_embed, "embedding_model.pkl")


['embedding_model.pkl']