# 01 - Helpers Testing

Tests for **`utils/helpers.py`** - Vision parsing, date computations, path building.

## Functions Tested
1. `validate_vision()` - Vision format validation (YYYYMM)
2. `extract_year_month()` - Extract year/month from vision
3. `extract_year_month_int()` - Extract year/month as integers
4. `compute_date_ranges()` - Compute all date ranges for processing
5. `build_layer_path()` - Build datalake layer paths
6. `build_log_filename()` - Build log filenames
---

## Setup

In [None]:
import sys
from pathlib import Path

# Add project root to path
project_root = Path().absolute().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")

In [None]:
from utils.helpers import (
    validate_vision,
    extract_year_month,
    extract_year_month_int,
    compute_date_ranges,
    build_layer_path,
    build_log_filename
)

print("✓ Helpers imported successfully")

---
## 1. Vision Validation Tests

In [None]:
# Test valid visions
valid_visions = ["202509", "202412", "202301", "203012"]

print("Testing VALID visions:")
for vision in valid_visions:
    result = validate_vision(vision)
    status = "✓" if result == True else "✗"
    print(f"  {status} {vision}: {result}")

In [None]:
# Test invalid visions
invalid_visions = [
    "20251",    # Too short
    "2025099",  # Too long
    "202513",   # Invalid month (13)
    "202500",   # Invalid month (00)
    "abcdef",   # Non-numeric
    "190001",   # Year too old
    "210001",   # Year too far in future
]

print("\nTesting INVALID visions:")
for vision in invalid_visions:
    result = validate_vision(vision)
    status = "✓" if result == False else "✗"
    print(f"  {status} {vision}: {result}")

---
## 2. Year/Month Extraction Tests

In [None]:
# Test extract_year_month (returns strings)
test_vision = "202509"

year, month = extract_year_month(test_vision)
print(f"Vision: {test_vision}")
print(f"  Year:  {year}  (type: {type(year).__name__})")
print(f"  Month: {month} (type: {type(month).__name__})")
print(f"  ✓ Correct: year='2025' and month='09'" if (year == '2025' and month == '09') else "  ✗ INCORRECT")

In [None]:
# Test extract_year_month_int (returns integers)
year_int, month_int = extract_year_month_int(test_vision)
print(f"\nVision: {test_vision} (as integers)")
print(f"  Year:  {year_int}  (type: {type(year_int).__name__})")
print(f"  Month: {month_int} (type: {type(month_int).__name__})")
print(f"  ✓ Correct: year=2025 and month=9" if (year_int == 2025 and month_int == 9) else "  ✗ INCORRECT")

---
## 3. Date Ranges Computation Tests

In [None]:
# Test compute_date_ranges for September 2025
dates = compute_date_ranges("202509")

print("Date ranges for vision 202509:")
print("-" * 50)
for key, value in dates.items():
    print(f"  {key:<15}: {value}")

# Verify key dates
print("\nVerifications:")
print(f"  ✓ DTFIN is last day of Sept" if dates['DTFIN'] == '2025-09-30' else "  ✗ Wrong DTFIN")
print(f"  ✓ DTDEB_AN is Jan 1" if dates['DTDEB_AN'] == '2025-01-01' else "  ✗ Wrong DTDEB_AN")
print(f"  ✓ dtfinmm1 is last day of Aug" if dates['dtfinmm1'] == '2025-08-31' else "  ✗ Wrong dtfinmm1")

In [None]:
# Test edge cases for date computation
print("\nEdge case: December 2024")
dates_dec = compute_date_ranges("202412")
print(f"  DTFIN: {dates_dec['DTFIN']}")
print(f"  ✓ Correct (Dec 31)" if dates_dec['DTFIN'] == '2024-12-31' else "  ✗ INCORRECT")

print("\nEdge case: February 2024 (leap year)")
dates_feb = compute_date_ranges("202402")
print(f"  DTFIN: {dates_feb['DTFIN']}")
print(f"  ✓ Correct (Feb 29)" if dates_feb['DTFIN'] == '2024-02-29' else "  ✗ INCORRECT")

print("\nEdge case: January 2025")
dates_jan = compute_date_ranges("202501")
print(f"  dtfinmm1: {dates_jan['dtfinmm1']}")
print(f"  ✓ Correct (Dec 31 prev year)" if dates_jan['dtfinmm1'] == '2024-12-31' else "  ✗ INCORRECT")

---
## 4. Path Building Tests

In [None]:
# Test build_layer_path
base_path = "abfss://container@storage.dfs.core.windows.net/construction"
vision = "202509"

layers = ["bronze", "silver", "gold"]

print("Layer paths for vision 202509:")
print("-" * 80)
for layer in layers:
    path = build_layer_path(base_path, layer, vision)
    print(f"  {layer:<6}: {path}")
    expected_ending = f"/{layer}/2025/09"
    print(f"           ✓ Ends with {expected_ending}" if path.endswith(expected_ending) else f"           ✗ Wrong path")

In [None]:
# Test build_log_filename
vision = "202509"
log_filename = build_log_filename(vision)

print(f"\nLog filename for vision {vision}:")
print(f"  {log_filename}")
print(f"  ✓ Correct format" if log_filename == f"pipeline_{vision}.log" else "  ✗ Wrong format")

---
## 5. Full Workflow Test

In [None]:
# Simulate a full workflow with a given vision
test_vision = "202509"

print(f"Testing full workflow for vision: {test_vision}")
print("=" * 80)

# Step 1: Validate
if not validate_vision(test_vision):
    print("✗ Invalid vision!")
else:
    print(f"✓ Vision {test_vision} is valid")
    
    # Step 2: Extract components
    year, month = extract_year_month_int(test_vision)
    print(f"✓ Extracted: year={year}, month={month}")
    
    # Step 3: Compute dates
    dates = compute_date_ranges(test_vision)
    print(f"✓ Computed {len(dates)} date ranges")
    print(f"   - Period: {dates['DTDEB_AN']} → {dates['DTFIN']}")
    
    # Step 4: Build paths
    base = "abfss://container@storage.dfs.core.windows.net/construction"
    silver_path = build_layer_path(base, "silver", test_vision)
    gold_path = build_layer_path(base, "gold", test_vision)
    print(f"✓ Built layer paths")
    print(f"   - Silver: {silver_path}")
    print(f"   - Gold:   {gold_path}")
    
    # Step 5: Build log filename
    log_file = build_log_filename(test_vision)
    print(f"✓ Log file: {log_file}")
    
print("=" * 80)
print("✓ All helper functions working correctly!")

---
## Summary

This notebook tested:
- ✅ Vision validation (valid/invalid formats)
- ✅ Year/month extraction (string and integer formats)
- ✅ Date range computation (including edge cases like leap years)
- ✅ Path building for datalake layers
- ✅ Log filename generation
- ✅ Full workflow integration

All tests use **visual inspection** - review the outputs above to confirm correctness.