# Explore Warehouse and Alerts
Load Parquet data from warehouse, join tables, and explore risk alerts.

In [None]:
from pyspark.sql import SparkSession
import pandas as pd
from pyspark.sql.functions import *

spark = SparkSession.builder.appName('warehouse_explore').getOrCreate()
warehouse_path = 'data/warehouse/parquet'

# Load Parquet tables
df_transactions = spark.read.parquet(f'{warehouse_path}/fact_transactions')
df_users = spark.read.parquet(f'{warehouse_path}/dim_user')
df_alerts = spark.read.parquet(f'{warehouse_path}/fact_risk_events')

print('Transactions count:', df_transactions.count())
print('Users count:', df_users.count())
print('Alerts count:', df_alerts.count())

In [None]:
# Join transactions with users
df_joined = df_transactions.join(df_users, 'user_id', 'left')
df_joined.select('tx_id', 'user_risk_tier', 'amount').show(10)

# High-risk user transactions
high_risk_tx = df_joined.filter(col('user_risk_tier') == 'high')
print(f'High-risk user transactions: {high_risk_tx.count()}')

In [None]:
# Explore alerts
df_alerts.groupBy('risk_type').count().show()

# Join alerts with transactions
df_alert_tx = df_alerts.join(df_transactions, 'tx_id', 'left')
df_alert_tx.select('risk_type', 'amount', 'created_at').show(10)

spark.stop()