In [None]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime, timedelta


In [None]:
fake = Faker()
Faker.seed(42)
np.random.seed(42)

num_transacoes = 10000
fraude_ratio = 0.02  # 2% de transações fraudulentas



In [None]:

def transaction_generate(fraud=False):
    user_id = fake.uuid4() #Universal Unique Identifier
    account_id = fake.uuid4()
    transaction_id = fake.uuid4()
    timestamp = fake.date_time_between(start_date='-30d', end_date='now')
    amount = round(np.random.exponential(scale=200), 2) # valor médio de transação
    currency = random.choice(['BRL', 'USD', 'EUR'])
    transaction_type = random.choice(['compra', 'saque', 'transferência', 'depósito'])
    merchant_id = fake.uuid4() if transaction_type == 'compra' else None
    location = fake.city()
    device_id = fake.uuid4()
    channel = random.choice(['app', 'web', 'ATM', 'POS'])
    is_international = random.choice([True, False])
    is_high_risk_country = is_international and random.random() < 0.3
    previous_balance = round(np.random.uniform(1000, 10000), 2)
    new_balance = previous_balance - amount if transaction_type in ['compra', 'saque', 'transferência'] else previous_balance + amount
    flagged_fraud = fraud

    
    if fraud:
        amount *= random.uniform(5, 20)  # valor muito alto
        location = random.choice(['Pyongyang', 'Teerã', 'Caracas'])  # locais incomuns
        channel = random.choice(['web', 'app'])
        device_id = fake.uuid4()  # novo dispositivo
        timestamp = timestamp.replace(hour=random.choice([2, 3, 4]))  # horário incomum

    return {
        'transaction_id': transaction_id,
        'timestamp': timestamp,
        'user_id': user_id,
        'account_id': account_id,
        'amount': round(amount, 2),
        'currency': currency,
        'transaction_type': transaction_type,
        'merchant_id': merchant_id,
        'location': location,
        'device_id': device_id,
        'channel': channel,
        'is_international': is_international,
        'is_high_risk_country': is_high_risk_country,
        'previous_balance': previous_balance,
        'new_balance': round(new_balance, 2),
        'flagged_fraud': flagged_fraud,
    }


transactions = []

for _ in range(num_transacoes):
    is_fraud = random.random() < fraude_ratio
    transactions.append(transaction_generate(fraud=is_fraud))

df = pd.DataFrame(transactions)

df.to_parquet(r'.\dados\raw\transactions2.parquet', index=False)

In [3]:
df.head()

Unnamed: 0,transaction_id,timestamp,user_id,account_id,amount,currency,transaction_type,merchant_id,location,device_id,channel,is_international,is_high_risk_country,previous_balance,new_balance,flagged_fraud
0,bd9c66b3-ad3c-4d6d-9a3d-1fa7bc8960a9,2025-09-18 08:29:40,bdd640fb-0667-4ad1-9c80-317fa3b1799d,23b8c1e9-3924-46de-beb1-3b9046685257,93.85,EUR,compra,0822e8f3-6c03-4199-972a-846916419f82,North Jefferyhaven,32e70629-8fad-41a6-86cb-0fb39a1de644,POS,False,False,9556.43,9462.58,False
1,01a9e71f-de8a-474b-8f36-d58b47378190,2025-08-30 15:06:46,8b8148f6-b38a-488c-a65e-d389b74d0fb1,96da1dac-72ff-4d2a-b86e-cbe06b65a6a4,263.35,EUR,transferência,,Petersonberg,562b0f79-c374-49ee-b50b-ea63371ecd7b,web,False,False,6387.93,6124.58,False
2,bacfb3d0-0b1f-4163-8e9f-f57f43b7a3a6,2025-09-14 04:21:54,18c26797-6142-4a7d-97be-31111a2a73ed,9a8dca03-580d-4b71-98f5-64135be6128e,33.92,USD,saque,,South Christianport,e2acf72f-9e57-4f7a-a0ee-89aed453dd32,ATM,True,True,2403.95,2370.03,False
3,fc377a4c-4a15-444d-85e7-ce8a3a578a8e,2025-08-26 18:03:38,3139d32c-93cd-49bf-9c94-1cf0dc98d2c1,a9488d99-0bbb-4599-91ce-5dd2b45ed1f0,11.97,BRL,saque,,Port Craig,d58842de-a2bc-472f-b412-b29347294739,app,False,False,8795.59,8783.62,False
4,12476f57-a5e5-45ab-aefc-fad8efc89849,2025-09-21 10:47:07,5af30553-5ec4-4e08-a9a3-b2e95d65a441,b3aa7efe-4458-4885-ab90-99a435a240ae,183.82,USD,saque,,New Jessica,451b4cf3-6123-4df7-b656-af7229d4beef,POS,False,False,7372.65,7188.83,False
