# Test Auto-Update Functionality

This notebook tests the auto-update functionality for model IDs and CRIS profiles.

In [None]:
import sys
import os
import logging

# Configure logging to see all details
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Add the parent directory to the Python path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

print(f"Current working directory: {os.getcwd()}")
print(f"Parent directory added to path: {parent_dir}")

## Test ModelIDParser

In [None]:
# Import dependencies
try:
    import requests
    from bs4 import BeautifulSoup
    print("✅ All required dependencies are installed.")
except ImportError as e:
    print(f"❌ Missing dependency: {e}")
    print("Install required dependencies with: pip install requests beautifulsoup4")

In [None]:
# Import the ModelIDParser
from src.ModelIDParser import ModelIDParser, DEFAULT_MODEL_IDS_URL, DEFAULT_MODEL_IDS_JSON_CACHE

# Initialize the parser
model_parser = ModelIDParser(log_level=logging.INFO)
print("ModelIDParser initialized successfully")

In [None]:
# Parse from URL using the updated BeautifulSoup parser
print(f"Parsing models from {DEFAULT_MODEL_IDS_URL}")
models = model_parser.parse_from_url(
    url=DEFAULT_MODEL_IDS_URL,
    cache_file="notebook_models_test.json",
    save_cache=True
)

# Check results
print(f"\nFound {len(models)} models")

In [None]:
# Display some sample models
if models:
    print("\nSample models:")
    for i, (model_id, model_info) in enumerate(models.items()):
        if i >= 5:  # Show just a few examples
            break
        print(f"\nModel: {model_id}")
        print(f"  Regions: {', '.join(model_info.regions[:3])}" + 
              ("..." if len(model_info.regions) > 3 else ""))
        print(f"  Capabilities: {', '.join(model_info.capabilities)}")
        print(f"  Streaming supported: {model_info.streaming_supported}")

## Test CRISProfileParser

In [None]:
# Import the CRISProfileParser
from src.CRISProfileParser import CRISProfileParser, DEFAULT_CRIS_PROFILES_URL, DEFAULT_CRIS_PROFILES_JSON_CACHE

# Initialize the parser
cris_parser = CRISProfileParser(log_level=logging.INFO)
print("CRISProfileParser initialized successfully")

In [None]:
# Parse from URL using the updated BeautifulSoup parser
print(f"Parsing CRIS profiles from {DEFAULT_CRIS_PROFILES_URL}")
profiles = cris_parser.parse_from_url(
    url=DEFAULT_CRIS_PROFILES_URL,
    cache_file="notebook_cris_profiles_test.json",
    save_cache=True
)

# Check results
print(f"\nFound {len(profiles)} CRIS profiles")

In [None]:
# Display some sample profiles
if profiles:
    print("\nSample profiles:")
    for i, (profile_id, profile_info) in enumerate(profiles.items()):
        if i >= 5:  # Show just a few examples
            break
        print(f"\nProfile: {profile_id}")
        print(f"  Name: {profile_info.profile_name}")
        print(f"  Source Regions: {', '.join(profile_info.source_regions)}")
        if profile_info.source_regions:
            first_source = profile_info.source_regions[0]
            destinations = profile_info.get_destination_regions(first_source)
            print(f"  Destinations from {first_source}: {', '.join(destinations)}")

## Test LLMManager Integration

In [None]:
# Import LLMManager
from src.LLMManager import LLMManager

# Initialize LLMManager with auto-update capabilities
llm_manager = LLMManager(
    # Standard parameters
    regions=["us-east-1", "us-west-2"],
    model_ids=["anthropic.claude-3-sonnet-20240229-v1:0"], 
    
    # Auto-update parameters
    model_ids_url=DEFAULT_MODEL_IDS_URL,
    cris_profiles_url=DEFAULT_CRIS_PROFILES_URL,
    model_ids_cache_file="llm_model_ids_test.json",
    cris_profiles_cache_file="llm_cris_profiles_test.json",
    max_profile_age=86400,  # 1 day in seconds
    force_model_id_update=True,  # Force update to verify it works
    force_cris_profile_update=True,  # Force update to verify it works
    log_level=logging.INFO
)

print("LLMManager initialized successfully with auto-update capabilities")

In [None]:
# Access the collections to verify they were loaded
model_collection = llm_manager.get_model_profile_collection()
cris_collection = llm_manager.get_cris_profile_collection()

print(f"Number of models in LLMManager: {len(model_collection.get_all_models())}")
print(f"Number of CRIS profiles in LLMManager: {len(cris_collection.get_all_profiles())}")

In [None]:
# Export profiles to JSON files
model_success, cris_success = llm_manager.export_profiles_to_json(
    model_file_path="llm_exported_models.json",
    cris_file_path="llm_exported_cris.json"
)

if model_success:
    print("Successfully exported model profiles")
else:
    print("Failed to export model profiles")
    
if cris_success:
    print("Successfully exported CRIS profiles")
else:
    print("Failed to export CRIS profiles")

## Debug HTML Content (if needed)

If there are issues with parsing, this cell helps examine the HTML content directly.

In [None]:
import requests
from bs4 import BeautifulSoup

# Function to examine HTML content
def examine_html(url, title):
    print(f"Examining {title} HTML from {url}...")
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        html = response.text
        print(f"  Fetched {len(html)} bytes")
        
        soup = BeautifulSoup(html, 'html.parser')
        
        # Print the page title
        print(f"  Page title: {soup.title.string if soup.title else 'No title found'}")
        
        # Look for tables
        tables = soup.find_all('table')
        print(f"  Number of tables found: {len(tables)}")
        
        # Check for headings
        headings = soup.find_all(['h1', 'h2', 'h3'])
        print(f"  Number of headings found: {len(headings)}")
        
        # Print some sample headings
        if headings:
            print("\n  Sample headings:")
            for i, heading in enumerate(headings):
                if i >= 5:  # Show just a few examples
                    break
                print(f"    {heading.name}: {heading.text.strip()}")
        
        # Look for region text
        region_pattern = r'(us-[a-z]+-[0-9]+|eu-[a-z]+-[0-9]+|ap-[a-z]+-[0-9]+)'
        import re
        regions = re.findall(region_pattern, html)
        print(f"\n  Found {len(regions)} region mentions in the HTML")
        if regions:
            print(f"  Sample regions: {', '.join(regions[:10])}")
        
        return soup
        
    except Exception as e:
        print(f"Error examining HTML: {str(e)}")
        return None

# Examine both HTML documents if parsing failed
# model_soup = examine_html(DEFAULT_MODEL_IDS_URL, "Model IDs")
# cris_soup = examine_html(DEFAULT_CRIS_PROFILES_URL, "CRIS Profiles")