In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
# Purpose: Validate ml_config_loader.py with proper path setup
# Run this in any Jupyter notebook to test MLConfig

import sys
from pathlib import Path

# ============================================================================
# STEP 1: Bootstrap Path to ModelPipeline
# ============================================================================
print("="*80)
print("BOOTSTRAPPING PATH")
print("="*80)

current = Path.cwd()
print(f"Current directory: {current}")

# Find ModelPipeline root
model_root = None
for parent in [current] + list(current.parents):
    if parent.name == "ModelPipeline":
        model_root = parent
        break

if model_root is None:
    raise RuntimeError(
        f"Cannot find ModelPipeline root.\n"
        f"Current directory: {current}\n"
        f"Make sure notebook is inside ModelPipeline/ tree"
    )

print(f"✓ Found ModelPipeline: {model_root}")

# Add to sys.path if not already there
if str(model_root) not in sys.path:
    sys.path.insert(0, str(model_root))
    print(f"✓ Added to sys.path: {model_root}")
else:
    print(f"✓ Already in sys.path: {model_root}")

BOOTSTRAPPING PATH
Current directory: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline\finrag_ml_tg1\rag_modules_src\03_LambdaRefactor_Tests
✓ Found ModelPipeline: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
✓ Already in sys.path: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline


In [9]:
# ============================================================================
#: Test ml_config_loader.py (Import Method)
# ============================================================================
print("\n" + "="*80)
print("TESTING ML_CONFIG_LOADER")
print("="*80 + "\n")

# Import the module
from finrag_ml_tg1.loaders.ml_config_loader import MLConfig

# Create instance (this runs all the __init__ logic)
config = MLConfig()

# Now manually run the same tests that are in __main__
print("=" * 80)
print("ML PIPELINE CONFIGURATION - COMPREHENSIVE TEST")
print("=" * 80)

print(f"\n[AWS Configuration]")
print(f"  Credentials: {config._aws_creds_source}")
print(f"  Bucket: {config.bucket}")
print(f"  Region: {config.region}")
print(f"  Access Key: {config.aws_access_key[:8]}..." if config.aws_access_key else "  ✗ Missing")

print(f"\n[Model Root Resolution]")
print(f"  Model Root: {config.model_root}")
print(f"  Root Type: {type(config.model_root).__name__}")
print(f"  Root Exists: {config.model_root.exists()}")

print(f"\n[Environment Detection]")
print(f"  Is Lambda: {config.is_lambda_environment}")
print(f"  Data Loading Mode: {config.data_loading_mode}")

print(f"\n[Data Loader Integration Ready]")
print(f"  Config has model_root: {hasattr(config, 'model_root')}")
print(f"  Config has is_lambda_environment: {hasattr(config, 'is_lambda_environment')}")
print(f"  Config has data_loading_mode: {hasattr(config, 'data_loading_mode')}")
print(f"  ✓ Ready for create_data_loader(config)")

print(f"\n" + "=" * 80)
print("✓ CONFIGURATION LOADED SUCCESSFULLY - LAMBDA-READY!")
print("=" * 80)


TESTING ML_CONFIG_LOADER

[DEBUG] ✓ Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ✓ AWS credentials loaded from aws_credentials.env
ML PIPELINE CONFIGURATION - COMPREHENSIVE TEST

[AWS Configuration]
  Credentials: aws_credentials.env
  Bucket: sentence-data-ingestion
  Region: us-east-1
  Access Key: AKIA2TV7...

[Model Root Resolution]
  Model Root: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
  Root Type: WindowsPath
  Root Exists: True

[Environment Detection]
  Is Lambda: False
  Data Loading Mode: LOCAL_CACHE

[Data Loader Integration Ready]
  Config has model_root: True
  Config has is_lambda_environment: True
  Config has data_loading_mode: True
  ✓ Ready for create_data_loader(config)

✓ CONFIGURATION LOADED SUCCESSFULLY - LAMBDA-READY!


In [10]:
"""
Test data loader abstraction - local testing before Lambda
"""

import sys
from pathlib import Path

# Bootstrap path
current = Path.cwd()
for parent in [current] + list(current.parents):
    if parent.name == "ModelPipeline":
        model_root = parent
        break
if str(model_root) not in sys.path:
    sys.path.insert(0, str(model_root))

from finrag_ml_tg1.loaders.ml_config_loader import MLConfig
from finrag_ml_tg1.loaders.data_loader_factory import create_data_loader


def test_local_cache_loader():
    """Test 1: Verify local cache loader works (should be current behavior)"""
    print("\n" + "="*70)
    print("TEST 1: LocalCacheLoader")
    print("="*70)
    
    config = MLConfig()
    loader = create_data_loader(config)
    
    print(f"Loader type: {type(loader).__name__}")
    assert loader.__class__.__name__ == 'LocalCacheLoader'
    
    # Load Stage 2 Meta
    df = loader.load_stage2_meta()
    print(f"  ✓ Stage 2 Meta: {len(df):,} rows")
    assert len(df) > 0
    assert 'sentenceID' in df.columns
    
    # Load dimensions
    companies_df = loader.load_dimension_companies()
    print(f"  ✓ Companies: {len(companies_df):,} rows")
    
    sections_df = loader.load_dimension_sections()
    print(f"  ✓ Sections: {len(sections_df):,} rows")
    
    # Test sentence fetch
    sample_ids = df['sentenceID'].head(10).to_list()
    fetched = loader.get_sentences_by_ids(sample_ids)
    print(f"  ✓ Fetched {len(fetched)} sentences by ID")
    assert len(fetched) == 10
    
    print("✓ LocalCacheLoader test PASSED")


def test_s3_streaming_loader():
    """Test 2: Verify S3 streaming loader (mock Lambda)"""
    print("\n" + "="*70)
    print("TEST 2: S3StreamingLoader (simulated Lambda)")
    print("="*70)
    
    import os
    
    # Simulate Lambda environment
    os.environ['AWS_LAMBDA_FUNCTION_NAME'] = 'test-finrag-function'
    os.environ['LAMBDA_TASK_ROOT'] = '/tmp/mock_lambda'
    
    config = MLConfig()
    loader = create_data_loader(config)
    
    print(f"Loader type: {type(loader).__name__}")
    assert loader.__class__.__name__ == 'S3StreamingLoader'
    
    # Load from S3
    df = loader.load_stage2_meta()
    print(f"  ✓ Stage 2 Meta (from S3): {len(df):,} rows")
    assert len(df) > 0
    
    # Clean up
    del os.environ['AWS_LAMBDA_FUNCTION_NAME']
    del os.environ['LAMBDA_TASK_ROOT']
    
    print("✓ S3StreamingLoader test PASSED")


def test_factory_auto_detection():
    """Test 3: Verify factory picks correct loader"""
    print("\n" + "="*70)
    print("TEST 3: Factory Auto-Detection")
    print("="*70)
    
    config = MLConfig()
    
    # Should pick LocalCacheLoader in normal dev
    loader = create_data_loader(config)
    print(f"Normal env → {type(loader).__name__}")
    assert loader.__class__.__name__ == 'LocalCacheLoader'
    
    # Simulate Lambda
    import os
    os.environ['AWS_LAMBDA_FUNCTION_NAME'] = 'test'
    config_lambda = MLConfig()
    loader_lambda = create_data_loader(config_lambda)
    print(f"Lambda env → {type(loader_lambda).__name__}")
    assert loader_lambda.__class__.__name__ == 'S3StreamingLoader'
    del os.environ['AWS_LAMBDA_FUNCTION_NAME']
    
    print("✓ Factory auto-detection test PASSED")



test_local_cache_loader()
test_factory_auto_detection()
# test_s3_streaming_loader()  # Uncomment when ready to test S3
    
print("\n" + "="*70)
print("ALL TESTS PASSED ✓")
print("="*70)


TEST 1: LocalCacheLoader
[DEBUG] ✓ Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ✓ AWS credentials loaded from aws_credentials.env
Loader type: LocalCacheLoader
  ✓ Stage 2 Meta: 469,252 rows
  ✓ Companies: 21 rows
  ✓ Sections: 21 rows
  ✓ Fetched 10 sentences by ID
✓ LocalCacheLoader test PASSED

TEST 3: Factory Auto-Detection
[DEBUG] ✓ Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ✓ AWS credentials loaded from aws_credentials.env
Normal env → LocalCacheLoader
[DEBUG] ✓ Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ✓ AWS credentials loaded from aws_credentials.env


FileNotFoundError: [WinError 3] The system cannot find the path specified: '\\tmp\\finrag_cache'