In [16]:
import random
from datetime import datetime, timedelta
import pandas as pd
import json
import os

def random_date(start, end):
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))

def generate_high_frequency_transactions(customer_id, start_date, num_transactions):
    transactions = []
    for _ in range(num_transactions):
        transactions.append({
            "transaction_id": f"T{random.randint(10000, 99999)}",
            "date_time": random_date(start_date, start_date + timedelta(days=1)).isoformat(),
            "amount": random.uniform(10, 1000),
            "currency": random.choice(["USD", "EUR", "GBP"]),
            "merchant_details": f"Merchant{random.randint(1, 20)}",
            "customer_id": customer_id,
            "transaction_type": random.choice(["purchase", "withdrawal"]),
            "location": f"City{random.randint(11, 20)}"
        })
    return transactions

def generate_data(num_transactions, num_customers):
    customers = []
    transactions = []
    list_merchant = [f"Merchant{random.randint(21, 30)}" for _ in range(10)]
    external_data = {
        "blacklist_info": list_merchant,
        "credit_scores": {},
        "fraud_reports": {}
    }

    for i in range(num_customers):
        customer_id = f"C{i:03}"
        customer_city = f"City{random.randint(1, 10)}"
        customers.append({
            "customer_id": customer_id,
            "account_history": [],
            "demographics": {"age": random.randint(18, 70), "location": customer_city},
            "behavioral_patterns": {"avg_transaction_value": random.uniform(50, 500)}
        })
        external_data["credit_scores"][customer_id] = random.randint(300, 850)
        external_data["fraud_reports"][customer_id] = random.randint(0, 5)

    for i in range(num_transactions):
        customer_id = f"C{random.randint(0, num_customers-1):03}"
        transaction = {
            "transaction_id": f"T{i:05}",
            "date_time": random_date(datetime(2020, 1, 1), datetime(2023, 1, 1)).isoformat(),
            "amount": random.uniform(10, 1000) * (10 if random.random() < 0.4 else 1),
            "currency": random.choice(["USD", "EUR", "GBP"]),
            "merchant_details": f"Merchant{random.randint(1, 100)}",
            "customer_id": customer_id,
            "transaction_type": random.choice(["purchase", "withdrawal"]),
            "location": f"City{random.randint(1, 10)}"
        }
        transactions.append(transaction)
        for customer in customers:
            if customer['customer_id'] == customer_id:
                customer['account_history'].append(transaction['transaction_id'])
                break

    for customer in random.sample(customers, num_customers // 40):
        transactions.extend(generate_high_frequency_transactions(customer['customer_id'], datetime(2022, 1, 1), 10))

    return transactions, customers, external_data

def save_to_json(file_path, data):
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=2)

def insert_data_to_file(file_path, data):
    if os.path.exists(file_path):
        with open(file_path, 'r') as file:
            existing_data = json.load(file)
            existing_data.extend(data)
        with open(file_path, 'w') as file:
            json.dump(existing_data, file, indent=2)
    else:
        save_to_json(file_path, data)

transactions, customers, external_data = generate_data(1000, 100)

# Save generated data to separate JSON files
save_to_json('./DATA/transactions.json', transactions)
save_to_json('./DATA/customers.json', customers)

# Insert additional data into existing or new JSON files
insert_data_to_file('./DATA/transactions.json', transactions)
insert_data_to_file('./DATA/customers.json', customers)
insert_data_to_file('./DATA/external_data.json', [external_data])

print("Data saved to transactions.json, customers.json, and external_data.json")


Data saved to transactions.json, customers.json, and external_data.json


In [13]:
external_data

{'blacklist_info': ['Merchant28',
  'Merchant22',
  'Merchant24',
  'Merchant25',
  'Merchant26',
  'Merchant21'],
 'credit_scores': {'C000': 351,
  'C001': 340,
  'C002': 405,
  'C003': 357,
  'C004': 529,
  'C005': 688,
  'C006': 516,
  'C007': 439,
  'C008': 727,
  'C009': 346,
  'C010': 742,
  'C011': 813,
  'C012': 615,
  'C013': 807,
  'C014': 781,
  'C015': 415,
  'C016': 810,
  'C017': 802,
  'C018': 364,
  'C019': 509,
  'C020': 570,
  'C021': 326,
  'C022': 835,
  'C023': 818,
  'C024': 695,
  'C025': 843,
  'C026': 534,
  'C027': 385,
  'C028': 433,
  'C029': 805,
  'C030': 327,
  'C031': 756,
  'C032': 447,
  'C033': 330,
  'C034': 793,
  'C035': 347,
  'C036': 835,
  'C037': 837,
  'C038': 629,
  'C039': 567,
  'C040': 730,
  'C041': 825,
  'C042': 387,
  'C043': 554,
  'C044': 442,
  'C045': 665,
  'C046': 654,
  'C047': 321,
  'C048': 465,
  'C049': 619,
  'C050': 736,
  'C051': 538,
  'C052': 383,
  'C053': 818,
  'C054': 515,
  'C055': 554,
  'C056': 818,
  'C057': 495