In [1]:
#%pip install google-cloud-storage

In [2]:
from pathlib import Path


from fraudforge.config import (
   DataQualityConfig,
   DataQualityIssue,
   BucketOptions,
   GeneratorConfig,
   OutputOptions,
)

from fraudforge.config import DataQualityConfig, GeneratorConfig, OutputOptions
from fraudforge.generator import TransactionGenerator

In [7]:
cfg = GeneratorConfig(
       records=5_000,
       fraud_rate=0.05,
       fraud_type_dist={
           "CARD_NOT_PRESENT": 0.4,
           "ACCOUNT_TAKEOVER": 0.2,
           "SYNTHETIC_IDENTITY": 0.2,
           "SOCIAL_ENGINEERING": 0.2,
       },
       age_dist={
           "A18_25": 0.3,
           "A26_35": 0.3,
           "A36_50": 0.25,
           "A50_PLUS": 0.15,
       },
       causal_fraud=True,
       causal_fraud_rate=0.02,

       output=OutputOptions(
           format="parquet",
           outdir=Path("./notebook_out"),
           chunk_size=10_000,
           bucket=BucketOptions(
               name="datalake-raw-dark-data-discovery",
               prefix="synthetic_data_for_fraud",
               local_mount=Path("./mounted_buckets"),
           ),
        ),
        data_quality=DataQualityConfig(
           enabled=True,
           row_dirty_rate=0.05,
           issue_dist={
               DataQualityIssue.MISSING_VALUES: 0.4,
               DataQualityIssue.TYPOS_NOISE: 0.3,
               DataQualityIssue.OUTLIER_AMOUNT: 0.2,
               DataQualityIssue.DATE_JITTER: 0.1,
           },
        ),

       


   )

In [8]:
metadata = TransactionGenerator(cfg).run()
metadata

{'counts': {'total_records': 5000,
  'non_fraud': 4750,
  'fraud_total': 250,
  'fraud_by': {'fraud_type': {'CARD_NOT_PRESENT': 98,
    'ACCOUNT_TAKEOVER': 57,
    'SYNTHETIC_IDENTITY': 48,
    'SOCIAL_ENGINEERING': 47},
   'age_band': {'A26_35': 82, 'A18_25': 71, 'A36_50': 58, 'A50_PLUS': 39},
   'region': {'EAST': 70, 'WEST': 66, 'SOUTH': 61, 'NORTH': 53},
   'channel': {'WEB': 99, 'APP': 77, 'POS': 38, 'ATM': 22, 'WIRE': 14},
   'merchant_category': {'restaurant': 56,
    'fashion': 55,
    'grocery': 48,
    'online_services': 38,
    'electronics': 29,
    'travel': 23,
    'restaFurant': 1}}},
 'causal': {'causal_fraud_count': 100,
  'causal_fraud_share': 0.02,
  'scenarios': {'causal_simpson': {'count': 50,
    'description': 'Higher transaction amounts appear safer within each region yet become riskier when regions are aggregated, mimicking a manual review bias.'},
   'causal_collider': {'count': 50,
    'description': 'Fraudulent accounts trigger manual reviews, making higher 