## Isolation Tests and Skeletons

#### Supply Lines 1 and 2.

In [1]:
from pathlib import Path
import sys
import logging

logging.getLogger().setLevel(logging.WARNING)

current = Path.cwd()
for parent in [current] + list(current.parents):
    if parent.name == "ModelPipeline":
        model_root = parent
        break
else:
    raise RuntimeError("Cannot find 'ModelPipeline' root in path tree")

if str(model_root) not in sys.path:
    sys.path.insert(0, str(model_root))

print(f"✓ Model root on sys.path: {model_root}")

✓ Model root on sys.path: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline


In [2]:
from pathlib import Path
import sys

# 1. Put ModelPipeline on sys.path
current = Path.cwd()
for parent in [current] + list(current.parents):
    if parent.name == "ModelPipeline":
        model_root = parent
        break
else:
    raise RuntimeError("Cannot find 'ModelPipeline' root in path tree")
if str(model_root) not in sys.path:
    sys.path.insert(0, str(model_root))
print(f"✓ Model root on sys.path: {model_root}")


## SUPPLY LINE 1: ENTITY-RESULT CHAINING. DEMO. Query → Extract Entities → Validate/Embed → Get KPI Data → Format → Display
from finrag_ml_tg1.rag_modules_src.utilities.supply_line_formatters import format_analytical_compact
from finrag_ml_tg1.rag_modules_src.metric_pipeline.src.pipeline import MetricPipeline
from finrag_ml_tg1.rag_modules_src.entity_adapter.entity_adapter import EntityAdapter

METRIC_DATA_JSON = model_root / "finrag_ml_tg1/rag_modules_src/metric_pipeline/data/downloaded_data.json"
DIM_COMPANIES = model_root / "finrag_ml_tg1/data_cache/dimensions/finrag_dim_companies_21.parquet"
DIM_SECTIONS = model_root / "finrag_ml_tg1/data_cache/dimensions/finrag_dim_sec_sections.parquet"

adapter =  EntityAdapter( company_dim_path=DIM_COMPANIES, section_dim_path=DIM_SECTIONS )

metric_pipeline = MetricPipeline(data_path=str(METRIC_DATA_JSON), company_dim_path=str(DIM_COMPANIES))

query = "What were Microsoft's and NVIDIA's total assets and revenue from 2021 to 2023?"
entities = adapter.extract(query)
result = metric_pipeline.process(query)
compact = format_analytical_compact(result)

print(f"KPI Data:\n{'-'*70}\n{compact or '(no data)'}\n{'-'*70}")

✓ Model root on sys.path: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
✓ FilterExtractor initialized with 21 companies
  Using: finrag_dim_companies_21.parquet
✓ FilterExtractor initialized with 21 companies
  Using: finrag_dim_companies_21.parquet
✓ Loaded 527 metric records
✓ Unique tickers: 2
✓ Year range: 2010-2025
KPI Data:
----------------------------------------------------------------------
MSFT 2021: Total Assets=$333.8B
MSFT 2022: Total Assets=$364.8B
MSFT 2023: Total Assets=$412.0B
NVDA 2021: Total Assets=$28.8B
NVDA 2022: Total Assets=$44.2B, Revenue=$26.9B
NVDA 2023: Total Assets=$41.2B, Revenue=$27.0B
----------------------------------------------------------------------


In [4]:
# CELL 2 — SUPPLY LINE 2: ENTITY + EMBEDDING DEMO
# Query → Extract Entities → Generate Embedding → Display

from finrag_ml_tg1.loaders.ml_config_loader import MLConfig
from finrag_ml_tg1.rag_modules_src.entity_adapter.entity_adapter import EntityAdapter
from finrag_ml_tg1.rag_modules_src.utilities.query_embedder_v2 import (
    EmbeddingRuntimeConfig,
    QueryEmbedderV2,
)

# Initialize config + paths
config = MLConfig()
embedding_cfg_dict = config.cfg["embedding"]
runtime_cfg = EmbeddingRuntimeConfig.from_ml_config(embedding_cfg_dict)
runtime_cfg.max_query_chars = 1500

# Initialize components
bedrock_client = config.get_bedrock_client()
embedder = QueryEmbedderV2(runtime_cfg, boto_client=bedrock_client)
adapter = EntityAdapter(company_dim_path=DIM_COMPANIES, section_dim_path=DIM_SECTIONS)

# Query → Entities → Embedding
query = "What was Apple's revenue in 2020?"
entities = adapter.extract(query)
embedding = embedder.embed_query(query, entities)

# Display
print(f"Query: {query}\n")
print(f"Entities: {entities.companies.tickers} | {entities.years.years} | {entities.metrics.metrics[:2]}...")
print(f"Embedding: {len(embedding)}D vector, preview: {embedding[:5]}")

INFO:finrag_ml_tg1.rag_modules_src.utilities.query_embedder_v2:[QueryEmbedderV2] Initialized with model=cohere.embed-v4:0, region=us-east-1, dim=1024
INFO:finrag_ml_tg1.rag_modules_src.entity_adapter.entity_adapter:EntityAdapter using company_dim: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline\finrag_ml_tg1\data_cache\dimensions\finrag_dim_companies_21.parquet
INFO:finrag_ml_tg1.rag_modules_src.entity_adapter.entity_adapter:EntityAdapter using section_dim: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline\finrag_ml_tg1\data_cache\dimensions\finrag_dim_sec_sections.parquet
INFO:finrag_ml_tg1.rag_modules_src.entity_adapter.company_universe:Loading company dim from: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline\finrag_ml_tg1\data_cache\dimensions\finrag_dim_companies_21.parquet
INFO:finrag_ml_tg1.rag_modules_src.entity_adapter.company_universe:Loaded dim with 21 rows and columns: ['compa

[DEBUG] ✓ AWS credentials loaded from aws_credentials.env
✓ FilterExtractor initialized with 21 companies
  Using: finrag_dim_companies_21.parquet
Query: What was Apple's revenue in 2020?

Entities: ['AAPL'] | [2020] | ['income_stmt_Revenue']...
Embedding: 1024D vector, preview: [-0.025634766, -0.018920898, 0.049804688, 0.01586914, 0.064941406]
