### Post-Success, Log Analytics:
- Sync latest logs from S3
- Do some analytics on logs.parquet.

In [34]:
# ============================================================================
# CELL 1: Setup - Path Resolution, Imports & Sync Latest Logs
# ============================================================================

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

from pathlib import Path
import sys
import logging

# Suppress noisy logs for clean notebook output
logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("finrag_ml_tg1").setLevel(logging.INFO)

# Find ModelPipeline root and add to sys.path
current = Path.cwd()
for parent in [current] + list(current.parents):
    if parent.name == "ModelPipeline":
        model_root = parent
        break
else:
    raise RuntimeError("Cannot find 'ModelPipeline' root in path tree")

if str(model_root) not in sys.path:
    sys.path.insert(0, str(model_root))

print(f"‚úì ModelPipeline root: {model_root}")
print(f"‚úì Notebook location: {Path.cwd()}\n")

# ============================================================================
# Sync latest logs from S3
# ============================================================================
from finrag_ml_tg1.rag_modules_src.synthesis_pipeline.query_logger import QueryLogger

print("=" * 80)
print("SYNCING LOGS FROM S3")
print("=" * 80)

logger = QueryLogger()
downloaded, skipped = logger.sync_to_local()

if downloaded > 0:
    print(f"‚úì Downloaded latest query_logs.parquet from S3")
elif skipped > 0:
    print(f"‚úì Local query_logs.parquet is already up-to-date")
else:
    print(f"‚ö† No log file found in S3 yet")

print("\n" + "=" * 80)
print("READY FOR TESTING & ANALYTICS")
print("=" * 80 + "\n")


import polars as pl

# Configure Polars display settings
pl.Config.set_tbl_rows(-1)  # Show all rows
pl.Config.set_tbl_cols(-1)  # Show all columns
pl.Config.set_tbl_width_chars(150)  # Wider tables
pl.Config.set_fmt_str_lengths(100)  # Show longer strings

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
‚úì ModelPipeline root: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
‚úì Notebook location: d:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline\finrag_ml_tg1\rag_modules_src\02_LLMEval_Notebooks

SYNCING LOGS FROM S3
[DEBUG] ‚úì Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ‚úì AWS credentials loaded from aws_credentials.env
‚úì Downloaded latest query_logs.parquet from S3

READY FOR TESTING & ANALYTICS



polars.config.Config

In [None]:
# ============================================================================
# CELL 2: Live Test Query - Verify Pipeline Works
# ============================================================================

from finrag_ml_tg1.rag_modules_src.synthesis_pipeline.orchestrator import answer_query
from datetime import datetime

print("=" * 80)
print("LIVE PIPELINE TEST")
print("=" * 80)

# Test query (lightweight but meaningful)
test_query = "What was Apple's, Microsoft's, Alphabet's total revenue, cogs and eps related information in 2018?"

print(f"\nRunning test query:")
print(f"   '{test_query}'")
print(f"\nProcessing...\n")

# Run query through full pipeline
result = answer_query(
    query=test_query,
    model_root=model_root,
    include_kpi=True,
    include_rag=True,
    model_key=None,  # Use default model
    export_context=True,  # Skip context export for test
    export_response=True
)

# Display results
print("=" * 80)
if result.get('error'):
    print(f"‚ùå ERROR: {result['error']}")
    print(f"   Stage: {result.get('stage')}")
else:
    print(f"‚úÖ SUCCESS")
    
    # Show answer preview
    answer = result['answer']
    answer_preview = answer[:200] + "..." if len(answer) > 200 else answer
    print(f"\nüìù Answer Preview:")
    print(f"   {answer_preview}")
    
    # Show metrics
    llm_meta = result['metadata']['llm']
    print(f"\n! Query Metrics:")
    print(f"   Model: {llm_meta['model_id'].split('.')[-1]}")
    print(f"   Cost: ${llm_meta['cost']:.4f}")
    print(f"   Tokens: {llm_meta['input_tokens']:,} in + {llm_meta['output_tokens']:,} out = {llm_meta['total_tokens']:,}")
    print(f"   Time: {result['metadata']['processing_time_ms']:,.1f} ms")
    
    # Show log location
    print(f"\nüìÅ Logged to: {result['exports']['log_file']}")

print("=" * 80 + "\n")

# Re-sync to get the fresh query we just logged
print("üîÑ Re-syncing logs to include fresh query...")
logger.sync_to_local()
print("‚úì Logs updated\n")

LIVE PIPELINE TEST

Running test query:
   'What was Apple's total revenue in 2020?'

Processing...

[DEBUG] ‚úì Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ‚úì AWS credentials loaded from aws_credentials.env
[DEBUG] ‚úì Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ‚úì AWS credentials loaded from aws_credentials.env
[DEBUG] ‚úì Found ModelPipeline via file path: D:\JoelDesktop folds_24\NEU FALL2025\MLops IE7374 Project\FinSights\ModelPipeline
[DEBUG] ‚úì AWS credentials loaded from aws_credentials.env
‚úÖ SUCCESS

üìù Answer Preview:
   # Apple's Total Revenue in 2020

According to Apple's FY 2020 10-K filing, total net sales increased 6% or $14.3 billion during 2020 compared to 2019, primarily driven by higher net sales of Services ...

! Query Metrics:
   Model: claude-haiku-4-5-20251001-v1:0
   Cost: $0.0104
   Tokens: 8,90

In [26]:
# ============================================================================
# DIAGNOSTIC: Inspect Query Logs Schema
# ============================================================================
from pathlib import Path

# Path to logs
log_path = model_root / "finrag_ml_tg1" / "rag_modules_src" / "exports" / "logs" / "query_logs.parquet"

if not log_path.exists():
    print(f"ERROR: Log file not found at {log_path}")
else:
    # Load logs
    df_logs = pl.read_parquet(log_path)
    
    print(f"Total log entries: {df_logs.height}")
    print(f"Columns: {df_logs.columns}\n")
    
    # Show schema
    print("Schema:")
    print(df_logs.schema)
    print("\n" + "="*80 + "\n")
    
    # Show data types and null counts
    print("Column stats:")
    for col in df_logs.columns:
        dtype = df_logs[col].dtype
        null_count = df_logs[col].null_count()
        print(f"  {col}: {dtype} (nulls: {null_count})")
    
    

Total log entries: 45
Columns: ['timestamp', 'query', 'model_id', 'input_tokens', 'output_tokens', 'total_tokens', 'cost', 'context_length', 'processing_time_ms', 'error', 'error_type', 'stage', 'context_file', 'response_file']

Schema:
Schema({'timestamp': String, 'query': String, 'model_id': String, 'input_tokens': Int64, 'output_tokens': Int64, 'total_tokens': Int64, 'cost': Float64, 'context_length': Int64, 'processing_time_ms': Float64, 'error': String, 'error_type': String, 'stage': String, 'context_file': String, 'response_file': String})


Column stats:
  timestamp: String (nulls: 0)
  query: String (nulls: 0)
  model_id: String (nulls: 0)
  input_tokens: Int64 (nulls: 0)
  output_tokens: Int64 (nulls: 0)
  total_tokens: Int64 (nulls: 0)
  cost: Float64 (nulls: 0)
  context_length: Int64 (nulls: 0)
  processing_time_ms: Float64 (nulls: 0)
  error: String (nulls: 45)
  error_type: String (nulls: 45)
  stage: String (nulls: 45)
  context_file: String (nulls: 19)
  response_file: 

In [27]:
# ============================================================================
# CELL 3: Load Logs for Analytics
# ============================================================================


# Path to local logs
log_path = model_root / "finrag_ml_tg1" / "rag_modules_src" / "exports" / "logs" / "query_logs.parquet"

if not log_path.exists():
    print(f"‚ùå ERROR: Log file not found at {log_path}")
    print(f"   Run Cell 1 to sync from S3")
else:
    # Load logs
    df_logs = pl.read_parquet(log_path)
    
    print("=" * 80)
    print("QUERY LOGS LOADED")
    print("=" * 80)
    print(f"Total log entries: {df_logs.height}")
    print(f"Columns: {df_logs.columns}\n")
    
    # Show schema
    print("Schema:")
    for col, dtype in df_logs.schema.items():
        null_count = df_logs[col].null_count()
        print(f"  {col:.<30} {str(dtype):.<15} (nulls: {null_count})")
    
    print("\n" + "=" * 80)
    print("MOST RECENT QUERY (Should be! test from Cell 2!)")
    print("=" * 80)
    
    # Show most recent entry
    most_recent = df_logs.sort("timestamp", descending=True).head(1)
    
    for col in ['timestamp', 'query', 'model_id', 'cost', 'total_tokens', 'processing_time_ms']:
        if col in most_recent.columns:
            val = most_recent[col][0]
            print(f"  {col}: {val}")
    
    print("\n" + "=" * 80 + "\n")

QUERY LOGS LOADED
Total log entries: 45
Columns: ['timestamp', 'query', 'model_id', 'input_tokens', 'output_tokens', 'total_tokens', 'cost', 'context_length', 'processing_time_ms', 'error', 'error_type', 'stage', 'context_file', 'response_file']

Schema:
  timestamp..................... String......... (nulls: 0)
  query......................... String......... (nulls: 0)
  model_id...................... String......... (nulls: 0)
  input_tokens.................. Int64.......... (nulls: 0)
  output_tokens................. Int64.......... (nulls: 0)
  total_tokens.................. Int64.......... (nulls: 0)
  cost.......................... Float64........ (nulls: 0)
  context_length................ Int64.......... (nulls: 0)
  processing_time_ms............ Float64........ (nulls: 0)
  error......................... String......... (nulls: 45)
  error_type.................... String......... (nulls: 45)
  stage......................... String......... (nulls: 45)
  context_file........

In [28]:
# ============================================================================
# CELL 3: Load Logs for Analytics
# ============================================================================

pl.Config.set_tbl_formatting("ASCII_FULL_CONDENSED")  # Denser formatting
pl.Config.set_tbl_rows(-1)  # Show all rows
pl.Config.set_tbl_width_chars(1000)  # Allow wide tables


# Path to local logs
log_path = model_root / "finrag_ml_tg1" / "rag_modules_src" / "exports" / "logs" / "query_logs.parquet"

if not log_path.exists():
    print(f"‚ùå ERROR: Log file not found at {log_path}")
    print(f"   Run Cell 1 to sync from S3")
else:
    # Load logs
    df_logs = pl.read_parquet(log_path)
    
    print("=" * 80)
    print("QUERY LOGS LOADED")
    print("=" * 80)
    print(f"Total log entries: {df_logs.height}")
    print(f"Columns: {df_logs.columns}\n")
    
    # Show schema
    print("Schema:")
    for col, dtype in df_logs.schema.items():
        null_count = df_logs[col].null_count()
        print(f"  {col:.<30} {str(dtype):.<15} (nulls: {null_count})")
    
    print("\n" + "=" * 80)
    print("MOST RECENT QUERY (Should be your test from Cell 2!)")
    print("=" * 80)
    
    # Show most recent entry
    most_recent = df_logs.sort("timestamp", descending=True).head(1)
    
    for col in ['timestamp', 'query', 'model_id', 'cost', 'total_tokens', 'processing_time_ms']:
        if col in most_recent.columns:
            val = most_recent[col][0]
            print(f"  {col}: {val}")
    
    print("\n" + "=" * 80 + "\n")

QUERY LOGS LOADED
Total log entries: 45
Columns: ['timestamp', 'query', 'model_id', 'input_tokens', 'output_tokens', 'total_tokens', 'cost', 'context_length', 'processing_time_ms', 'error', 'error_type', 'stage', 'context_file', 'response_file']

Schema:
  timestamp..................... String......... (nulls: 0)
  query......................... String......... (nulls: 0)
  model_id...................... String......... (nulls: 0)
  input_tokens.................. Int64.......... (nulls: 0)
  output_tokens................. Int64.......... (nulls: 0)
  total_tokens.................. Int64.......... (nulls: 0)
  cost.......................... Float64........ (nulls: 0)
  context_length................ Int64.......... (nulls: 0)
  processing_time_ms............ Float64........ (nulls: 0)
  error......................... String......... (nulls: 45)
  error_type.................... String......... (nulls: 45)
  stage......................... String......... (nulls: 45)
  context_file........

In [29]:
# ============================================================================
# CELL 4: Overall Query History Summary
# ============================================================================

# Parse date from timestamp
df_logs = df_logs.with_columns([
    pl.col("timestamp").str.slice(0, 10).alias("date")
])

# Filter out null costs (failed queries)
df_success = df_logs.filter(pl.col("cost").is_not_null())

# Calculate summary stats
summary = pl.DataFrame({
    "Metric": [
        "Total Queries",
        "Successful Queries",
        "Failed Queries",
        "Date Range",
        "Unique Questions",
        "Total Cost",
        "Total Tokens",
        "Avg Cost per Query",
        "Avg Tokens per Query",
        "Avg Processing Time (ms)"
    ],
    "Value": [
        str(df_logs.height),
        str(len(df_success)),
        str(df_logs.height - len(df_success)),
        f"{df_logs['date'].min()} to {df_logs['date'].max()}",
        str(df_logs["query"].n_unique()),
        f"${df_success['cost'].sum():.4f}",
        f"{df_success['total_tokens'].sum():,} ({df_success['input_tokens'].sum():,} in + {df_success['output_tokens'].sum():,} out)",
        f"${df_success['cost'].mean():.4f}",
        f"{df_success['total_tokens'].mean():,.0f}",
        f"{df_success['processing_time_ms'].mean():,.1f}"
    ]
})

print("=" * 80)
print("üìä OVERALL QUERY HISTORY SUMMARY")
print("=" * 80 + "\n")

summary

üìä OVERALL QUERY HISTORY SUMMARY



Metric,Value
str,str
"""Total Queries""","""45"""
"""Successful Queries""","""45"""
"""Failed Queries""","""0"""
"""Date Range""","""2025-11-19 to 2025-11-25"""
"""Unique Questions""","""14"""
"""Total Cost""","""$0.7208"""
"""Total Tokens""","""345,280 (315,747 in + 29,533 out)"""
"""Avg Cost per Query""","""$0.0160"""
"""Avg Tokens per Query""","""7,673"""
"""Avg Processing Time (ms)""","""17,001.0"""


In [30]:
# ============================================================================
# CELL 5: Cost & Token Analysis by Query
# ============================================================================

# Truncate query text for readability
df_analysis = df_success.with_columns([
    pl.when(pl.col("query").str.len_chars() > 80)
    .then(pl.col("query").str.slice(0, 80) + "...")
    .otherwise(pl.col("query"))
    .alias("query_short")
])

# Group by query
df_by_query = (
    df_analysis
    .group_by("query_short")
    .agg([
        pl.len().alias("runs"),
        pl.col("input_tokens").mean().round(0).cast(pl.Int64).alias("avg_input_tokens"),
        pl.col("output_tokens").mean().round(0).cast(pl.Int64).alias("avg_output_tokens"),
        pl.col("total_tokens").mean().round(0).cast(pl.Int64).alias("avg_total_tokens"),
        pl.col("cost").sum().alias("total_cost"),
        pl.col("cost").mean().alias("avg_cost"),
        pl.col("processing_time_ms").mean().round(1).alias("avg_processing_ms"),
    ])
    .sort("total_cost", descending=True)
)

print("=" * 80)
print("üí∞ COST & TOKEN ANALYSIS BY QUERY")
print("=" * 80 + "\n")

df_by_query

üí∞ COST & TOKEN ANALYSIS BY QUERY



query_short,runs,avg_input_tokens,avg_output_tokens,avg_total_tokens,total_cost,avg_cost,avg_processing_ms
str,u32,i64,i64,i64,f64,f64,f64
"""Across its fiscal 2018-2020 10-K filings, how does Walmart Inc. explain the main...""",14,5431,790,6222,0.206779,0.01477,18606.5
"""How does MICROSOFT CORP describe the change in its Intelligent Cloud revenue in ...""",6,6940,381,7320,0.106381,0.01773,13496.8
"""Over time, how does Meta Platforms describe the regulatory and policy risks that...""",2,10469,1476,11945,0.072236,0.036118,29148.2
"""In their 2009 Form 10-K risk-factor disclosures, how do Radian Group, Netflix an...""",3,5263,637,5900,0.061326,0.020442,16422.9
"""What was Apple's total revenue and operating income in fiscal year 2017?""",5,8049,354,8402,0.049084,0.0098168,11214.5
"""What does EXXON MOBIL CORP report as its total revenue in 2008, and how is this ...""",2,5743,262,6005,0.042318,0.021159,10846.5
"""What was Apple's total revenue in 2020?""",3,8900,288,9188,0.031015,0.010338,9568.6
"""What was Apple's revenue in 2017?""",3,8549,324,8873,0.030508,0.010169,12661.8
"""Where does Tesla define Adjusted EBITDA in its 2022 Form 10-K, and how does the ...""",2,5961,300,6260,0.029849,0.0149245,10426.8
"""In their 2020 Form 10-K risk-factor disclosures, how do Radian Group, Netflix an...""",2,9190,1010,10200,0.028484,0.014242,29258.4


In [31]:
# ============================================================================
# CELL 6: Model Usage Distribution
# ============================================================================

# Extract short model name
df_model_analysis = df_success.with_columns([
    pl.col("model_id").str.split(".").list.last().alias("model_name")
])

# Group by model
df_by_model = (
    df_model_analysis
    .group_by("model_name")
    .agg([
        pl.len().alias("query_count"),
        pl.col("total_tokens").sum().alias("total_tokens"),
        pl.col("input_tokens").sum().alias("total_input_tokens"),
        pl.col("output_tokens").sum().alias("total_output_tokens"),
        pl.col("cost").sum().alias("total_cost"),
        pl.col("cost").mean().alias("avg_cost_per_query"),
        pl.col("processing_time_ms").mean().round(1).alias("avg_processing_ms"),
    ])
    .sort("query_count", descending=True)
)

print("=" * 80)
print("ü§ñ MODEL USAGE DISTRIBUTION")
print("=" * 80 + "\n")

df_by_model

ü§ñ MODEL USAGE DISTRIBUTION



model_name,query_count,total_tokens,total_input_tokens,total_output_tokens,total_cost,avg_cost_per_query,avg_processing_ms
str,u32,i64,i64,i64,f64,f64,f64
"""claude-haiku-4-5-20251001-v1:0""",31,252006,231328,20678,0.334718,0.010797,16592.7
"""claude-sonnet-4-5-20250929-v1:0""",14,93274,84419,8855,0.386082,0.027577,17905.2


In [32]:
# ============================================================================
# CELL 7: Recent Query Timeline (Last 10 Queries)
# ============================================================================

# Get last 10 queries
df_recent = df_logs.sort("timestamp", descending=True).head(10)

# Create display dataframe
df_timeline = df_recent.select([
    pl.col("timestamp").str.slice(11, 8).alias("time"),  # HH:MM:SS
    pl.when(pl.col("query").str.len_chars() > 50)
      .then(pl.col("query").str.slice(0, 50) + "...")
      .otherwise(pl.col("query"))
      .alias("query_preview"),
    pl.col("model_id").str.split(".").list.last().alias("model"),
    pl.col("cost").round(4).alias("cost_$"),
    pl.col("total_tokens").alias("tokens"),
    pl.when(pl.col("error").is_null())
      .then(pl.lit("‚úÖ"))
      .otherwise(pl.lit("‚ùå"))
      .alias("status")
])

print("=" * 80)
print("üìÖ RECENT QUERY TIMELINE (Last 10)")
print("=" * 80 + "\n")

df_timeline

üìÖ RECENT QUERY TIMELINE (Last 10)



time,query_preview,model,cost_$,tokens,status
str,str,str,f64,i64,str
"""05:28:12""","""What was Apple's total revenue in 2020?""","""claude-haiku-4-5-20251001-v1:0""",0.0104,9194,"""‚úÖ"""
"""05:23:29""","""What was Apple's total revenue in 2020?""","""claude-haiku-4-5-20251001-v1:0""",0.0102,9169,"""‚úÖ"""
"""05:21:08""","""What was Apple's total revenue in 2020?""","""claude-haiku-4-5-20251001-v1:0""",0.0104,9200,"""‚úÖ"""
"""02:52:48""","""Across its fiscal 2018-2020 10-K filings, how does...""","""claude-haiku-4-5-20251001-v1:0""",0.0099,6458,"""‚úÖ"""
"""02:02:58""","""Across its fiscal 2018-2020 10-K filings, how does...""","""claude-haiku-4-5-20251001-v1:0""",0.0101,6489,"""‚úÖ"""
"""01:49:24""","""Across its fiscal 2018-2020 10-K filings, how does...""","""claude-haiku-4-5-20251001-v1:0""",0.0101,6498,"""‚úÖ"""
"""13:59:22""","""In their 2020 Form 10-K risk-factor disclosures, h...""","""claude-haiku-4-5-20251001-v1:0""",0.0146,10200,"""‚úÖ"""
"""03:05:49""","""Show me Apple, Microsoft, Amazon, Alphabet, Google...""","""claude-haiku-4-5-20251001-v1:0""",0.0264,14959,"""‚úÖ"""
"""22:39:32""","""How does MICROSOFT CORP describe the change in its...""","""claude-haiku-4-5-20251001-v1:0""",0.0088,7195,"""‚úÖ"""
"""20:24:22""","""Across its fiscal 2018-2020 10-K filings, how does...""","""claude-haiku-4-5-20251001-v1:0""",0.0092,6143,"""‚úÖ"""


In [33]:
# ============================================================================
# CELL 8: Error Analysis (If Any Failures)
# ============================================================================

df_errors = df_logs.filter(pl.col("error").is_not_null())

if len(df_errors) > 0:
    print("=" * 80)
    print(f"‚ö†Ô∏è  ERROR ANALYSIS ({len(df_errors)} failed queries)")
    print("=" * 80 + "\n")
    
    # Group by error type
    error_summary = (
        df_errors
        .group_by("error_type")
        .agg([
            pl.len().alias("count"),
            pl.col("stage").first().alias("typical_stage")
        ])
        .sort("count", descending=True)
    )
    
    print("Errors by type:")
    display(error_summary)
    
    print("\nMost recent error details:")
    recent_error = df_errors.sort("timestamp", descending=True).head(1)
    print(f"  Timestamp: {recent_error['timestamp'][0]}")
    print(f"  Query: {recent_error['query'][0][:100]}...")
    print(f"  Error: {recent_error['error'][0]}")
    print(f"  Stage: {recent_error['stage'][0]}")
else:
    print("=" * 80)
    print("‚úÖ NO ERRORS - All queries successful!")
    print("=" * 80)

‚úÖ NO ERRORS - All queries successful!
