# Contract Comparison System - Testing Notebook

This notebook demonstrates the complete contract comparison workflow using test contracts from `data/test_contracts/`.

## Features Demonstrated:
1. **Image Parsing**: Converting contract images to text using GPT-4o Vision
2. **Two-Agent System**: Contextualization and extraction agents working together
3. **Guardrails**: Input validation and safety checks
4. **Evaluation**: Output quality assessment
5. **Tracing**: Complete Langfuse observability

## Setup and Imports

In [None]:
import sys
import os
from pathlib import Path
import json
from datetime import datetime

# Add src to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

# Load environment variables
from dotenv import load_dotenv
load_dotenv(project_root / '.env')

# Import project modules
from src.main import (
    initialize_clients,
    process_contract_comparison,
    validate_environment
)
from src.image_parser import parse_contract_image
from src.models import ContractChangeOutput, ParsedContract, AgentContext
from src.agents.contextualization_agent import ContextualizationAgent
from src.agents.extraction_agent import ExtractionAgent

# Import guardrails and evaluator (we'll create these)
try:
    from src.guardrails import ContractGuardrails
    from src.evaluator import ContractEvaluator
    ENHANCED_MODE = True
except ImportError:
    print("⚠️  Guardrails and Evaluator modules not found. Running in basic mode.")
    ENHANCED_MODE = False

print("✓ Imports successful")
print(f"✓ Enhanced mode: {ENHANCED_MODE}")

## Validate Environment

In [None]:
# Check environment variables
if not validate_environment():
    raise ValueError("Environment validation failed. Check your .env file.")

print("✓ Environment validated")
print(f"  OPENAI_API_KEY: {'✓' if os.getenv('OPENAI_API_KEY') else '✗'}")
print(f"  LANGFUSE_PUBLIC_KEY: {'✓' if os.getenv('LANGFUSE_PUBLIC_KEY') else '✗'}")
print(f"  LANGFUSE_SECRET_KEY: {'✓' if os.getenv('LANGFUSE_SECRET_KEY') else '✗'}")

## Initialize Clients

In [None]:
# Initialize OpenAI and Langfuse clients
openai_client, langfuse_client = initialize_clients()

print("✓ Clients initialized")
print(f"  OpenAI client: {type(openai_client).__name__}")
print(f"  Langfuse client: {type(langfuse_client).__name__}")

# Initialize guardrails and evaluator if available
if ENHANCED_MODE:
    guardrails = ContractGuardrails()
    evaluator = ContractEvaluator()
    print("✓ Guardrails and Evaluator initialized")

## Discover Test Contracts

In [None]:
# Find all test contracts
test_contracts_dir = project_root / 'data' / 'test_contracts'

# Find pairs of original and amendment contracts
contract_pairs = []

for original_file in sorted(test_contracts_dir.glob('*_original.jpg')):
    contract_num = original_file.stem.split('_')[0]
    amendment_file = test_contracts_dir / f"{contract_num}_amendment.jpg"
    
    if amendment_file.exists():
        contract_pairs.append({
            'name': contract_num,
            'original': str(original_file),
            'amendment': str(amendment_file)
        })

print(f"✓ Found {len(contract_pairs)} contract pair(s):\n")
for i, pair in enumerate(contract_pairs, 1):
    print(f"  {i}. {pair['name']}")
    print(f"     Original:  {Path(pair['original']).name}")
    print(f"     Amendment: {Path(pair['amendment']).name}")
    print()

## Test 1: Image Parsing

Test the image parsing functionality with the first contract pair.

In [None]:
if contract_pairs:
    test_pair = contract_pairs[0]
    print(f"Testing with: {test_pair['name']}\n")
    
    # Parse original contract
    print("Parsing original contract...")
    original_contract = parse_contract_image(
        image_path=test_pair['original'],
        document_type="original",
        client=openai_client
    )
    
    print(f"✓ Original parsed:")
    print(f"  Text length: {len(original_contract.raw_text)} characters")
    print(f"  Sections: {len(original_contract.sections_identified)}")
    print(f"  First 200 chars: {original_contract.raw_text[:200]}...\n")
    
    # Parse amendment contract
    print("Parsing amendment contract...")
    amendment_contract = parse_contract_image(
        image_path=test_pair['amendment'],
        document_type="amendment",
        client=openai_client
    )
    
    print(f"✓ Amendment parsed:")
    print(f"  Text length: {len(amendment_contract.raw_text)} characters")
    print(f"  Sections: {len(amendment_contract.sections_identified)}")
    print(f"  First 200 chars: {amendment_contract.raw_text[:200]}...")
else:
    print("⚠️  No contract pairs found in data/test_contracts/")

## Test 2: Guardrails (Input Validation)

Apply guardrails to validate the parsed contracts before processing.

In [None]:
if ENHANCED_MODE and contract_pairs:
    print("Running guardrails checks...\n")
    
    # Validate original contract
    original_validation = guardrails.validate_input(
        contract=original_contract,
        file_path=test_pair['original']
    )
    
    print("Original Contract Validation:")
    print(f"  Valid: {original_validation['is_valid']}")
    print(f"  Checks passed: {original_validation['checks_passed']}/{original_validation['total_checks']}")
    if original_validation['warnings']:
        print(f"  Warnings: {original_validation['warnings']}")
    print()
    
    # Validate amendment contract
    amendment_validation = guardrails.validate_input(
        contract=amendment_contract,
        file_path=test_pair['amendment']
    )
    
    print("Amendment Contract Validation:")
    print(f"  Valid: {amendment_validation['is_valid']}")
    print(f"  Checks passed: {amendment_validation['checks_passed']}/{amendment_validation['total_checks']}")
    if amendment_validation['warnings']:
        print(f"  Warnings: {amendment_validation['warnings']}")
else:
    print("⚠️  Guardrails not available or no contracts to validate")

## Test 3: Two-Agent Workflow

Test the complete two-agent system: contextualization and extraction.

In [None]:
if contract_pairs:
    print("Running two-agent workflow...\n")
    
    # Agent 1: Contextualization
    print("Agent 1: Contextualization")
    agent1 = ContextualizationAgent(client=openai_client)
    context = agent1.analyze(
        original_contract=original_contract,
        amendment_contract=amendment_contract
    )
    
    print(f"✓ Context generated:")
    print(f"  Structure analysis: {len(context.document_structure)} chars")
    print(f"  Section mappings: {len(context.corresponding_sections)}")
    print(f"  Change areas identified: {len(context.identified_change_areas)}")
    print(f"  Context summary: {context.context_summary[:150]}...\n")
    
    # Agent 2: Extraction
    print("Agent 2: Change Extraction")
    agent2 = ExtractionAgent(client=openai_client)
    changes = agent2.extract_changes(
        original_contract=original_contract,
        amendment_contract=amendment_contract,
        context=context
    )
    
    print(f"✓ Changes extracted:")
    print(f"  Sections changed: {len(changes.sections_changed)}")
    print(f"  Topics touched: {len(changes.topics_touched)}")
    print(f"  Summary length: {len(changes.summary_of_the_change)} chars")
else:
    print("⚠️  No contracts to process")

## Test 4: Output Evaluation

Evaluate the quality of the extracted changes.

In [None]:
if ENHANCED_MODE and contract_pairs:
    print("Evaluating output quality...\n")
    
    evaluation = evaluator.evaluate_output(
        changes=changes,
        original_contract=original_contract,
        amendment_contract=amendment_contract,
        context=context
    )
    
    print("Quality Evaluation:")
    print(f"  Overall Score: {evaluation['overall_score']:.2f}/100")
    print(f"  Grade: {evaluation['grade']}\n")
    
    print("Dimension Scores:")
    for dimension, score in evaluation['dimension_scores'].items():
        print(f"  {dimension.replace('_', ' ').title()}: {score:.2f}/100")
    
    if evaluation['recommendations']:
        print("\nRecommendations:")
        for rec in evaluation['recommendations']:
            print(f"  - {rec}")
else:
    print("⚠️  Evaluator not available or no output to evaluate")

## Test 5: Complete Workflow with All Contracts

Process all contract pairs through the complete workflow.

In [None]:
results = []

for i, pair in enumerate(contract_pairs, 1):
    print(f"\n{'='*70}")
    print(f"Processing Contract {i}/{len(contract_pairs)}: {pair['name']}")
    print(f"{'='*70}\n")
    
    try:
        # Run complete workflow
        changes, trace_id = process_contract_comparison(
            original_image_path=pair['original'],
            amendment_image_path=pair['amendment'],
            openai_client=openai_client
        )
        
        result = {
            'contract_name': pair['name'],
            'trace_id': trace_id,
            'changes': changes.model_dump(),
            'timestamp': datetime.now().isoformat()
        }
        
        # Add evaluation if available
        if ENHANCED_MODE:
            # Re-parse for evaluation
            orig = parse_contract_image(pair['original'], 'original', openai_client)
            amend = parse_contract_image(pair['amendment'], 'amendment', openai_client)
            ctx = agent1.analyze(orig, amend)
            
            eval_result = evaluator.evaluate_output(changes, orig, amend, ctx)
            result['evaluation'] = eval_result
        
        results.append(result)
        
        print(f"\n✓ Success!")
        print(f"  Sections changed: {len(changes.sections_changed)}")
        print(f"  Topics touched: {len(changes.topics_touched)}")
        if ENHANCED_MODE:
            print(f"  Quality score: {eval_result['overall_score']:.2f}/100")
        
    except Exception as e:
        print(f"\n✗ Error processing {pair['name']}: {str(e)}")
        results.append({
            'contract_name': pair['name'],
            'error': str(e),
            'timestamp': datetime.now().isoformat()
        })

print(f"\n{'='*70}")
print(f"Completed: {len(results)} contracts processed")
print(f"{'='*70}")

## Display Results Summary

In [None]:
import pandas as pd

if results:
    # Create summary dataframe
    summary_data = []
    for result in results:
        if 'error' not in result:
            summary_data.append({
                'Contract': result['contract_name'],
                'Sections Changed': len(result['changes']['sections_changed']),
                'Topics Touched': len(result['changes']['topics_touched']),
                'Summary Length': len(result['changes']['summary_of_the_change']),
                'Quality Score': f"{result['evaluation']['overall_score']:.1f}" if ENHANCED_MODE and 'evaluation' in result else 'N/A',
                'Trace ID': result['trace_id'][:8] + '...' if result['trace_id'] else 'N/A'
            })
    
    df = pd.DataFrame(summary_data)
    print("\nResults Summary:")
    print(df.to_string(index=False))
else:
    print("No results to display")

## Detailed View: First Contract

In [None]:
if results and 'error' not in results[0]:
    print(f"\nDetailed Results for {results[0]['contract_name']}:\n")
    
    changes = results[0]['changes']
    
    print("SECTIONS CHANGED:")
    for i, section in enumerate(changes['sections_changed'], 1):
        print(f"  {i}. {section}")
    
    print("\nTOPICS TOUCHED:")
    for i, topic in enumerate(changes['topics_touched'], 1):
        print(f"  {i}. {topic}")
    
    print("\nSUMMARY OF CHANGES:")
    print("-" * 70)
    print(changes['summary_of_the_change'])
    print("-" * 70)
    
    if ENHANCED_MODE and 'evaluation' in results[0]:
        print("\nQUALITY EVALUATION:")
        eval_data = results[0]['evaluation']
        print(f"  Overall Score: {eval_data['overall_score']:.2f}/100")
        print(f"  Grade: {eval_data['grade']}")
        print("\n  Dimension Scores:")
        for dim, score in eval_data['dimension_scores'].items():
            print(f"    {dim.replace('_', ' ').title()}: {score:.2f}")
else:
    print("No successful results to display")

## Save Results to JSON

In [None]:
if results:
    output_dir = project_root / 'notebooks' / 'outputs'
    output_dir.mkdir(exist_ok=True)
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_file = output_dir / f'test_results_{timestamp}.json'
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    
    print(f"✓ Results saved to: {output_file}")
else:
    print("No results to save")

## Cleanup and Flush Traces

In [None]:
# Flush Langfuse traces
langfuse_client.flush()
print("✓ Langfuse traces flushed")
print(f"\nView traces at: {os.getenv('LANGFUSE_HOST', 'https://cloud.langfuse.com')}")