In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from datetime import datetime
import random

spark = SparkSession.builder.getOrCreate()

# Use the workspace catalog 
CATALOG = "workspace"
SCHEMA = "fca_regulatory"

# Create schema if it doesn't exist
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.{SCHEMA}")

# Generate customer data
statuses = ["Active", "Dormant", "Vulnerable"]
customers = [
    (f"C{i:03d}", f"First{i}", f"Last{i}",
     datetime(1950 + random.randint(0,50), random.randint(1,12), random.randint(1,28)),
     random.choice(statuses), datetime(2024,1,1)) 
    for i in range(1,101)
]

# Define schema
customer_schema = StructType([
    StructField("customer_id", StringType()),
    StructField("first_name", StringType()),
    StructField("last_name", StringType()),
    StructField("date_of_birth", DateType()),
    StructField("customer_status", StringType()),
    StructField("created_date", DateType())
])

# Create DataFrame
df_customers = spark.createDataFrame(customers, customer_schema)

# Write to managed table
TABLE_NAME = f"{CATALOG}.{SCHEMA}.bronze_customer_master"
df_customers.write.format("delta").mode("overwrite").saveAsTable(TABLE_NAME)

print(f"✅ Successfully created table: {TABLE_NAME}")
print(f"📊 Row count: {df_customers.count()}")

# Verify the table was created
print("\n📊 Sample data:")
spark.sql(f"SELECT * FROM {TABLE_NAME} LIMIT 5").show()

# Show table metadata
print("\n📁 Table location:")
spark.sql(f"DESCRIBE EXTENDED {TABLE_NAME}").filter("col_name == 'Location'").show(truncate=False)

✅ Successfully created table: workspace.fca_regulatory.bronze_customer_master
📊 Row count: 100

📊 Sample data:
+-----------+----------+---------+-------------+---------------+------------+
|customer_id|first_name|last_name|date_of_birth|customer_status|created_date|
+-----------+----------+---------+-------------+---------------+------------+
|       C001|    First1|    Last1|   1968-01-21|     Vulnerable|  2024-01-01|
|       C002|    First2|    Last2|   1999-04-06|        Dormant|  2024-01-01|
|       C003|    First3|    Last3|   1969-03-25|         Active|  2024-01-01|
|       C004|    First4|    Last4|   1960-08-02|     Vulnerable|  2024-01-01|
|       C005|    First5|    Last5|   1956-02-27|         Active|  2024-01-01|
+-----------+----------+---------+-------------+---------------+------------+


📁 Table location:
+--------+---------+-------+
|col_name|data_type|comment|
+--------+---------+-------+
|Location|         |       |
+--------+---------+-------+



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from datetime import datetime
import random

spark = SparkSession.builder.getOrCreate()

# Use Unity Catalog (same as before)
CATALOG = "workspace"
SCHEMA = "fca_regulatory"

# Generate transaction data
transaction_types = ["Deposit", "Withdrawal", "Loan"]
currencies = ["GBP", "USD", "EUR"]
transactions = [
    (f"T{i:04d}", f"C{random.randint(1,100):03d}",
     datetime(2024, 1, random.randint(1,28)),
     random.choice(transaction_types),
     round(random.uniform(50, 20000), 2),
     random.choice(currencies)) 
    for i in range(1, 1001)
]

# Define schema
txn_schema = StructType([
    StructField("transaction_id", StringType()),
    StructField("customer_id", StringType()),
    StructField("transaction_date", DateType()),
    StructField("transaction_type", StringType()),
    StructField("amount", DoubleType()),
    StructField("currency", StringType())
])

# Create DataFrame
df_txns = spark.createDataFrame(transactions, txn_schema)

# Write to Unity Catalog managed table (NOT using BRONZE_PATH)
TABLE_NAME = f"{CATALOG}.{SCHEMA}.bronze_daily_transactions"
df_txns.write.format("delta").mode("overwrite").saveAsTable(TABLE_NAME)

print(f"✅ Successfully created table: {TABLE_NAME}")
print(f"📊 Row count: {df_txns.count()}")

# Verify the table was created
print("\n📊 Sample transactions:")
spark.sql(f"SELECT * FROM {TABLE_NAME} LIMIT 10").show()

✅ Successfully created table: workspace.fca_regulatory.bronze_daily_transactions
📊 Row count: 1000

📊 Sample transactions:
+--------------+-----------+----------------+----------------+--------+--------+
|transaction_id|customer_id|transaction_date|transaction_type|  amount|currency|
+--------------+-----------+----------------+----------------+--------+--------+
|         T0001|       C082|      2024-01-11|         Deposit|18111.14|     GBP|
|         T0002|       C080|      2024-01-25|         Deposit|16885.22|     EUR|
|         T0003|       C100|      2024-01-23|         Deposit|10247.88|     USD|
|         T0004|       C032|      2024-01-16|         Deposit|14207.33|     GBP|
|         T0005|       C054|      2024-01-06|         Deposit| 8556.78|     GBP|
|         T0006|       C077|      2024-01-25|         Deposit| 8461.97|     EUR|
|         T0007|       C034|      2024-01-25|      Withdrawal| 2754.48|     GBP|
|         T0008|       C076|      2024-01-16|            Loan|18850

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from datetime import datetime, timedelta
import random

spark = SparkSession.builder.getOrCreate()

# Use Unity Catalog
CATALOG = "workspace"
SCHEMA = "fca_regulatory"

# Simulate Bank of England market data
print("📊 Simulating Bank of England market data...")

start_date = datetime(2024, 1, 1)
market_data = []
base_rate = 5.25  # UK base rate

for i in range(365):
    date = start_date + timedelta(days=i)
    interest_rate = round(base_rate + random.uniform(-0.05, 0.05), 4)
    exchange_rate_usd = round(1.27 + random.uniform(-0.02, 0.02), 4)
    exchange_rate_eur = round(1.17 + random.uniform(-0.02, 0.02), 4)
    
    market_data.append((date, interest_rate, exchange_rate_usd, exchange_rate_eur))

market_schema = StructType([
    StructField("date", DateType()),
    StructField("base_interest_rate", DoubleType()),
    StructField("gbp_usd_rate", DoubleType()),
    StructField("gbp_eur_rate", DoubleType())
])

df_market = spark.createDataFrame(market_data, market_schema)

TABLE_NAME = f"{CATALOG}.{SCHEMA}.bronze_market_data"
df_market.write.format("delta").mode("overwrite").saveAsTable(TABLE_NAME)

print(f"✅ Successfully created table: {TABLE_NAME}")
print(f"📊 Row count: {df_market.count()}")

spark.sql(f"SELECT * FROM {TABLE_NAME} ORDER BY date LIMIT 10").show()

📊 Simulating Bank of England market data...
✅ Successfully created table: workspace.fca_regulatory.bronze_market_data
📊 Row count: 365
+----------+------------------+------------+------------+
|      date|base_interest_rate|gbp_usd_rate|gbp_eur_rate|
+----------+------------------+------------+------------+
|2024-01-01|            5.2732|       1.264|      1.1737|
|2024-01-02|            5.2157|      1.2526|      1.1677|
|2024-01-03|            5.2198|      1.2795|      1.1803|
|2024-01-04|            5.2312|      1.2719|      1.1537|
|2024-01-05|            5.2217|      1.2859|      1.1607|
|2024-01-06|            5.2798|      1.2824|       1.165|
|2024-01-07|            5.2082|      1.2746|      1.1592|
|2024-01-08|            5.2863|      1.2599|      1.1676|
|2024-01-09|            5.2492|      1.2511|       1.156|
|2024-01-10|            5.2791|      1.2831|      1.1645|
+----------+------------------+------------+------------+

