# 🚨 Fraud Pattern Mining and JS Rule Generation (Databricks Ready)

In [None]:

import json
import pandas as pd

def flatten(d, parent_key='', sep='.'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, str(v)))
    return dict(items)

# Load fraud JSON file
with open("hackathon/finalData/confirm_fraud.json.json", "r") as f:
    data = json.load(f)

# Flatten payloads
flat_data = [flatten(txn['data']['attributes']['activity']['payload']) for txn in data]
df = pd.DataFrame(flat_data).fillna("null")
df.shape


In [1]:
%pip install mlxtend

Collecting mlxtendNote: you may need to restart the kernel to use updated packages.

  Downloading mlxtend-0.23.4-py3-none-any.whl.metadata (7.3 kB)
Collecting scipy>=1.2.1 (from mlxtend)
  Downloading scipy-1.15.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting numpy>=1.16.2 (from mlxtend)
  Downloading numpy-2.2.6-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pandas>=0.24.2 (from mlxtend)
  Downloading pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn>=1.3.1 (from mlxtend)
  Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting matplotlib>=3.0.0 (from mlxtend)
  Downloading matplotlib-3.10.3-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting joblib>=0.13.2 (from mlxtend)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting contourpy>=1.0.1 (from matplotlib>=3.0.0->mlxtend)
  Downloading contourpy-1.3.2-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib>=

## 🧠 Frequent Pattern Mining (FP-Growth)

In [None]:

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

# Convert each row to a transaction of feature=value
transactions = []
for _, row in df.iterrows():
    txn = [f"{col}={val}" for col, val in row.items()]
    transactions.append(txn)

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_tf = pd.DataFrame(te_ary, columns=te.columns_)

# Run FP-Growth
frequent_itemsets = fpgrowth(df_tf, min_support=0.3, use_colnames=True)
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)
frequent_itemsets.head(10)


## 🛠️ Generate JavaScript Rules

In [None]:

# Convert frequent patterns into JS rules

from collections import defaultdict

def generate_smart_js_rules(frequent_itemsets, min_len=2, max_rules=20):
    js_rules = []

    for _, row in frequent_itemsets.iterrows():
        itemsets = row["itemsets"]
        if len(itemsets) < min_len:
            continue

        # Group by feature
        feature_groups = defaultdict(list)
        for item in itemsets:
            key, val = item.split('=')
            feature_groups[key].append(val)

        conditions = []
        for key, values in feature_groups.items():
            if len(values) == 1:
                conditions.append(f"transaction['{key}'] === '{values[0]}'")
            else:
                value_conditions = [f"transaction['{key}'] === '{val}'" for val in values]
                conditions.append("(" + " || ".join(value_conditions) + ")")

        rule = "if (" + " && ".join(conditions) + ") { flagAsFraud(); }"
        js_rules.append(rule)

        if len(js_rules) >= max_rules:
            break

    return js_rules

# Output first few rules
for rule in js_rules[:10]:
    print(rule)


In [None]:
def execute_js_rules(transaction_json, js_rules):
    results = []

    for rule in js_rules:
        is_fraud = False
        def flagAsFraud():
            nonlocal is_fraud
            is_fraud = True

        try:
            js_func = js2py.eval_js(rule)
            js_func(transaction_json, flagAsFraud)
        except Exception as e:
            print(f"Error executing rule:\n{rule}\nReason: {e}")
            continue

        if is_fraud:
            results.append(rule)

    return results


In [None]:
import json
from datetime import datetime

def append_to_rule_log(transaction_json, rules_fired, is_fraud, log_path="rule_decision_log.jsonl"):
    log_entry = {
        "timestamp": str(datetime.utcnow()),
        "transactionId": transaction_json.get("transactionId", "N/A"),
        "transactionAmount": transaction_json.get("transactionAmount", "N/A"),
        "is_fraud": is_fraud,
        "rules_fired": rules_fired
    }
    
    with open(log_path, "a") as f:
        f.write(json.dumps(log_entry) + "\n")
    
    print(f"✅ Logged transaction {log_entry['transactionId']} with {len(rules_fired)} rule(s) fired.")

In [None]:
import json

def load_confirmed_fraud_ids_from_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
        return set(item['transaction_id'] for item in data)

def evaluate_rule_validity_against_confirmed_json(log_file_path, confirmed_json_path):
    confirmed_fraud_ids = load_confirmed_fraud_ids_from_json(confirmed_json_path)
    total_confirmed = len(confirmed_fraud_ids)

    rule_true_hits = {}  # rule → count of true confirmed frauds it detected

    with open(log_file_path, 'r') as f:
        for line in f:
            record = json.loads(line)
            transaction_id = record.get("transactionId")
            is_fraud = record.get("is_fraud", False)
            rules = record.get("rules_fired", [])

            # Only count as true hit if:
            # - is_fraud is True AND
            # - transactionId is in confirmed fraud list
            if not is_fraud or transaction_id not in confirmed_fraud_ids:
                continue

            for rule in rules:
                rule_true_hits[rule] = rule_true_hits.get(rule, 0) + 1

    # Compute rule accuracy
    rule_accuracies = {}
    for rule, count in rule_true_hits.items():
        accuracy = count / total_confirmed if total_confirmed else 0
        rule_accuracies[rule] = round(accuracy, 4)

    return sorted(rule_accuracies.items(), key=lambda x: -x[1])  # sorted by accuracy descending


In [None]:
results = evaluate_rule_validity_against_confirmed_json(
    log_file_path="rule_decision_log.jsonl",
    confirmed_json_path="hackathon/finalData/confirm_fraud.json"
)

for rule, accuracy in results[:10]:
    print(f"✅ Rule: {rule}\n   Accuracy vs confirmed frauds: {accuracy}")