# PTF_MVT 01: Bronze Reading & Filters

**Purpose**: Test bronze reading and business filters

---

In [11]:
import sys
from pathlib import Path

project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))
print(f"Project root: {project_root}")

Project root: /workspace/new_python


In [12]:
from pyspark.sql import SparkSession
# from azfr_fsspec_utils import fspath
# import azfr_fsspec_abfs

# azfr_fsspec_abfs.use()

spark = SparkSession.builder \
    .appName("PTF_MVT_Bronze") \
    .getOrCreate()

print(f"✓ Spark {spark.version}")

✓ Spark 3.4.4


## 1. Load Configuration

In [13]:
from utils.loaders.config_loader import ConfigLoader
from utils.loaders.transformation_loader import TransformationLoader
from src.reader import BronzeReader

config = ConfigLoader(str(project_root / "config" / "config.yml"))
bronze_reader = BronzeReader(spark, config, str(project_root / "config" / "reading_config.json"))
loader = TransformationLoader(str(project_root / "config" / "transformations"))

VISION = "202509"
print(f"Vision: {VISION}")

Vision: 202509


## 2. Read Bronze Data

In [14]:
# ipf (Agent + Courtage)
df_ipf = bronze_reader.read_file_group('ipf', VISION)
print(f"✓ ipf: {df_ipf.count():,} rows")

# IPFM99 (movements)
df_ipfm99 = bronze_reader.read_file_group('ipfm99_az', VISION)
print(f"✓ IPFM99: {df_ipfm99.count():,} rows")

# IRD Risk
for ird in ['ird_risk_q45', 'ird_risk_q46', 'ird_risk_qan']:
    try:
        df = bronze_reader.read_file_group(ird, VISION)
        print(f"✓ {ird}: {df.count():,} rows")
    except:
        print(f"⚠ {ird}: not available")

✓ ipf: 30,000 rows
✓ IPFM99: 1,000 rows
✓ ird_risk_q45: 2,000 rows
✓ ird_risk_q46: 2,000 rows
✓ ird_risk_qan: 1,000 rows


## 3. Apply Business Filters

In [15]:
from utils.transformations.base.generic_transforms import apply_business_filters

business_rules = loader.get_business_rules()
az_filters = business_rules['business_filters']['az']

count_before = df_ipf.count()
df_filtered = apply_business_filters(df_ipf, az_filters)
count_after = df_filtered.count()

print(f"Before filters: {count_before:,}")
print(f"After filters:  {count_after:,}")
print(f"Filtered:       {count_before - count_after:,} ({(count_before-count_after)/count_before*100:.1f}%)")

Before filters: 30,000
After filters:  30,000
Filtered:       0 (0.0%)


## Summary

In [16]:
print("="*50)
print("BRONZE READING & FILTERS COMPLETE")
print("="*50)
spark.stop()
print("→ Next: Notebook 02 - AZ Processor")

BRONZE READING & FILTERS COMPLETE
→ Next: Notebook 02 - AZ Processor
