# Building Coverage System - Development Analysis

This notebook provides development and analysis tools for the building coverage system.
It includes data exploration, model testing, and performance analysis capabilities.

## 1. Setup and Imports

In [None]:
import sys
import os

# Add project root to path
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.append(project_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import logging

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("Development environment setup complete!")

## 2. Configuration and Test Data Generation

In [None]:
# Import configuration
from config import get_config_manager, get_config

# Set development environment
os.environ['ENVIRONMENT'] = 'development'
os.environ['DEV_OVERRIDE'] = 'local'

config_manager = get_config_manager()
config = get_config()

print("Environment:", config_manager.environment)
print("Development mode:", config_manager.is_development())
print("Max workers:", config['pipeline']['parallel_processing']['max_workers'])

# Display key configuration sections
print("\nPipeline Configuration:")
for key, value in config['pipeline'].items():
    print(f"  {key}: {value}")

In [None]:
# Generate synthetic test data for development
def generate_test_claims(n_claims=100):
    """Generate synthetic claim data for testing."""
    np.random.seed(42)
    
    # Building-related damage descriptions
    building_damages = [
        "Foundation damage due to water intrusion affecting structural integrity",
        "Roof damage from storm with extensive building material deterioration",
        "Wall damage and building structural problems from flooding event",
        "Floor damage with structural building components affected by water",
        "Ceiling damage requiring building structural repairs and reconstruction",
        "Exterior wall damage affecting building envelope and structure",
        "Interior structural damage to load-bearing walls and supports",
        "Building foundation settling causing structural damage throughout",
        "Roof collapse requiring complete building structural assessment",
        "Fire damage to building structure requiring extensive reconstruction"
    ]
    
    # Non-building damage descriptions
    non_building_damages = [
        "Vehicle damage in parking lot with no building involvement",
        "Landscaping damage from storm with no structural building impact",
        "Personal property damage inside building but no structural damage",
        "Equipment damage with no building structural involvement",
        "Contents damage due to water but building structure unaffected",
        "Theft of personal property with no building damage",
        "Business interruption claim with no physical building damage",
        "Liability claim with no building or property damage"
    ]
    
    # Generate claims
    claims_data = []
    building_ratio = 0.6  # 60% building-related claims
    
    for i in range(n_claims):
        claim_no = f"DEV{i:06d}"
        
        # Choose damage type
        if np.random.random() < building_ratio:
            damage_text = np.random.choice(building_damages)
            lob_cd = np.random.choice(['15', '17'])  # Building LOBs
            loss_desc = 'Building damage'
            expected_coverage = 'BUILDING COVERAGE'
        else:
            damage_text = np.random.choice(non_building_damages)
            lob_cd = np.random.choice(['15', '17', '18'])
            loss_desc = 'Other damage'
            expected_coverage = 'NO BUILDING COVERAGE'
        
        # Add some variation to the text
        variation = f" Additional details for claim {i} with specific circumstances and location information."
        full_text = damage_text + variation
        
        # Generate dates
        loss_date = datetime.now() - timedelta(days=np.random.randint(1, 365))
        report_date = loss_date + timedelta(days=np.random.randint(0, 30))
        
        claims_data.append({
            'CLAIMNO'; claim_no,
            'CLAIMKEY'; f"KEY{i:06d}",
            'clean_FN_TEXT'; full_text,
            'LOBCD'; lob_cd,
            'LOSSDESC'; loss_desc,
            'LOSSDT'; loss_date,
            'REPORTEDDT'; report_date,
            'expected_coverage'; expected_coverage  # For validation
        })
    
    return pd.DataFrame(claims_data)

# Generate test data
test_claims = generate_test_claims(100)
print(f"Generated {len(test_claims)} test claims")
print(f"Building coverage claims: {(test_claims['expected_coverage'] == 'BUILDING COVERAGE').sum()}")
print(f"Non-building claims: {(test_claims['expected_coverage'] == 'NO BUILDING COVERAGE').sum()}")

# Display sample
test_claims.head()

## 3. Data Exploration and Analysis

In [None]:
# Exploratory Data Analysis
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Distribution of LOB codes
test_claims['LOBCD'].value_counts().plot(kind='bar', ax=axes[0,0])
axes[0,0].set_title('Distribution of LOB Codes')
axes[0,0].set_xlabel('LOB Code')
axes[0,0].set_ylabel('Count')

# Text length distribution
text_lengths = test_claims['clean_FN_TEXT'].str.len()
text_lengths.hist(bins=20, ax=axes[0,1])
axes[0,1].set_title('Distribution of Text Lengths')
axes[0,1].set_xlabel('Text Length (characters)')
axes[0,1].set_ylabel('Frequency')

# Expected coverage distribution
test_claims['expected_coverage'].value_counts().plot(kind='pie', ax=axes[1,0], autopct='%1.1f%%')
axes[1,0].set_title('Expected Coverage Distribution')

# Reporting lag analysis
test_claims['reporting_lag'] = (test_claims['REPORTEDDT'] - test_claims['LOSSDT']).dt.days
test_claims['reporting_lag'].hist(bins=15, ax=axes[1,1])
axes[1,1].set_title('Reporting Lag Distribution')
axes[1,1].set_xlabel('Days')
axes[1,1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

# Summary statistics
print("\nSummary Statistics:")
print(f"Average text length: {text_lengths.mean():.1f} characters")
print(f"Median reporting lag: {test_claims['reporting_lag'].median():.1f} days")
print(f"Date range: {test_claims['LOSSDT'].min().date()} to {test_claims['LOSSDT'].max().date()}")


## 4. Pipeline Component Testing

In [None]:
# Test individual pipeline components
from modules.core.pipeline import CoveragePipeline
from modules.core.monitor import PerformanceMonitor
from unittest.mock import Mock

# Create performance monitor
monitor = PerformanceMonitor()

# Test performance monitoring
print("Testing Performance Monitor:")
monitor.start_operation('test_analysis')

# Simulate some processing
import time
time.sleep(0.1)

duration = monitor.end_operation('test_analysis')
monitor.add_custom_metric('test_claims_processed', len(test_claims), 'analysis')

print(f"Operation duration: {duration:.3f} seconds")

# Get performance summary
summary = monitor.get_summary()
print("\nPerformance Summary:")
for key, value in summary.items():
    print(f"  {key}: {value}")

In [None]:
# Test pre-processing hooks
from custom_hooks.pre_processing import pre_process

print("Testing Pre-processing Hook:")

# Test with sample data
sample_claims = test_claims.head(10).copy()
print(f"Input claims: {len(sample_claims)}")

# Apply pre-processing
processed_claims = pre_process(sample_claims)
print(f"Output claims: {len(processed_claims)}")

# Check added columns
added_columns = set(processed_claims.columns) - set(sample_claims.columns)
print(f"Added columns: {list(added_columns)}")

# Display processing metrics
if 'processing_confidence' in processed_claims.columns:
    print(f"Average processing confidence: {processed_claims['processing_confidence'].mean():.3f}")
    
if 'complexity_score' in processed_claims.columns:
    print(f"Average complexity score: {processed_claims['complexity_score'].mean():.3f}")

# Show sample processed data
print("\nSample processed data:")
processed_claims[['CLAIMNO', 'processing_confidence', 'data_quality_flags']].head()

## 5. Mock Pipeline Execution

In [None]:
# Mock pipeline execution for development testing
from unittest.mock import patch

def mock_pipeline_execution(claims_df):
    """Mock pipeline execution for testing purposes."""
    
    print("Starting mock pipeline execution...")
    
    # Mock RAG processing results
    rag_results = []
    
    for _, claim in claims_df.iterrows():
        # Simple rule-based mock for testing
        text = claim['clean_FN_TEXT'].lower()
        
        building_keywords = ['building', 'structure', 'foundation', 'roof', 'wall', 'floor', 'ceiling']
        keyword_matches = sum(1 for keyword in building_keywords if keyword in text)
        
        if keyword_matches >= 2:
            prediction = 'BUILDING COVERAGE'
            confidence = min(0.95, 0.6 + (keyword_matches * 0.1))
            summary = f"Building coverage recommended due to {keyword_matches} structural keywords"
        elif keyword_matches >= 1:
            prediction = 'BUILDING COVERAGE'
            confidence = 0.5 + (keyword_matches * 0.05)
            summary = f"Possible building coverage with {keyword_matches} keyword(s)"
        else:
            prediction = 'NO BUILDING COVERAGE'
            confidence = 0.8
            summary = "No building-related keywords found"
        
        rag_results.append({
            'CLAIMNO': claim['CLAIMNO'],
            'prediction': prediction,
            'confidence': confidence,
            'summary': summary,
            'keyword_matches': keyword_matches
        })
    
    rag_df = pd.DataFrame(rag_results)
    
    # Merge with original data
    final_results = claims_df.merge(rag_df, on='CLAIMNO', how='left')
    
    print(f"Processed {len(final_results)} claims")
    
    return final_results

# Execute mock pipeline
test_sample = test_claims.head(20)
mock_results = mock_pipeline_execution(test_sample)

# Display results
print("\nMock Pipeline Results:")
print(f"Total claims processed: {len(mock_results)}")
print(f"Building coverage predictions: {(mock_results['prediction'] == 'BUILDING COVERAGE').sum()}")
print(f"Average confidence: {mock_results['confidence'].mean():.3f}")

# Show sample results
display_columns = ['CLAIMNO', 'expected_coverage', 'prediction', 'confidence', 'keyword_matches']
mock_results[display_columns].head(10)

## 6. Accuracy Analysis

In [None]:
# Analyze mock prediction accuracy
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Calculate accuracy metrics
y_true = mock_results['expected_coverage']
y_pred = mock_results['prediction']

accuracy = accuracy_score(y_true, y_pred)
print(f"Mock Pipeline Accuracy: {accuracy:.3f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['No Coverage', 'Building Coverage'],
            yticklabels=['No Coverage', 'Building Coverage'])
plt.title('Mock Pipeline Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Confidence distribution analysis
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
mock_results['confidence'].hist(bins=20, alpha=0.7)
plt.title('Confidence Score Distribution')
plt.xlabel('Confidence')
plt.ylabel('Frequency')

plt.subplot(1, 2, 2)
building_conf = mock_results[mock_results['prediction'] == 'BUILDING COVERAGE']['confidence']
no_building_conf = mock_results[mock_results['prediction'] == 'NO BUILDING COVERAGE']['confidence']

plt.hist(building_conf, bins=15, alpha=0.7, label='Building Coverage', color='blue')
plt.hist(no_building_conf, bins=15, alpha=0.7, label='No Building Coverage', color='red')
plt.title('Confidence by Prediction Type')
plt.xlabel('Confidence')
plt.ylabel('Frequency')
plt.legend()

plt.tight_layout()
plt.show()

## 7. Performance Benchmarking

In [None]:
# Performance benchmarking with different data sizes
import time

def benchmark_pipeline(sizes=[10, 50, 100, 500]):
    """Benchmark pipeline performance with different data sizes."""
    
    results = []
    
    for size in sizes:
        print(f"Benchmarking with {size} claims...")
        
        # Generate data of specified size
        test_data = generate_test_claims(size)
        
        # Measure processing time
        start_time = time.time()
        
        # Run mock pipeline
        processed_data = mock_pipeline_execution(test_data)
        
        end_time = time.time()
        processing_time = end_time - start_time
        
        # Calculate metrics
        throughput = size / processing_time if processing_time > 0 else 0
        
        results.append({
            'size': size,
            'processing_time': processing_time,
            'throughput': throughput,
            'avg_time_per_claim': processing_time / size if size > 0 else 0
        })
        
        print(f"  Processing time: {processing_time:.3f} seconds")
        print(f"  Throughput: {throughput:.1f} claims/second")
    
    return pd.DataFrame(results)

# Run benchmarks
benchmark_results = benchmark_pipeline([10, 50, 100, 200])

# Display results
print("\nBenchmark Results:")
benchmark_results

In [None]:
# Visualize performance results
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Processing time vs data size
axes[0].plot(benchmark_results['size'], benchmark_results['processing_time'], 'b-o')
axes[0].set_xlabel('Number of Claims')
axes[0].set_ylabel('Processing Time (seconds)')
axes[0].set_title('Processing Time vs Data Size')
axes[0].grid(True)

# Throughput analysis
axes[1].plot(benchmark_results['size'], benchmark_results['throughput'], 'g-o')
axes[1].set_xlabel('Number of Claims')
axes[1].set_ylabel('Throughput (claims/second)')
axes[1].set_title('Throughput vs Data Size')
axes[1].grid(True)

plt.tight_layout()
plt.show()

# Performance summary
print("Performance Summary:")
print(f"Best throughput: {benchmark_results['throughput'].max():.1f} claims/second")
print(f"Average time per claim: {benchmark_results['avg_time_per_claim'].mean():.4f} seconds")
print(f"Scalability factor: {benchmark_results['processing_time'].iloc[-1] / benchmark_results['processing_time'].iloc[0]:.2f}x")

## 8. Configuration Testing

In [None]:
# Test configuration management
from config import ConfigManager

print("Testing Configuration Management:")

# Test different environments
dev_config = ConfigManager('development')
prod_config = ConfigManager('production')

print("Development vs Production Comparison:")
comparison_items = [
    ('Max Workers', 'pipeline.parallel_processing.max_workers'),
    ('Batch Size', 'pipeline.parallel_processing.batch_size'),
    ('Log Level', 'logging.level'),
    ('Debug Mode', 'debug')
]

for item_name, config_path in comparison_items:
    dev_value = dev_config.get_config()
    prod_value = prod_config.get_config()
    
    # Navigate config path
    for key in config_path.split('.'):
        dev_value = dev_value.get(key, 'N/A')
        prod_value = prod_value.get(key, 'N/A')
    
    print(f"  {item_name}:")
    print(f"    Development: {dev_value}")
    print(f"    Production:  {prod_value}")
    print()

# Test SQL queries
dev_queries = dev_config.get_sql_queries()
prod_queries = prod_config.get_sql_queries()

print("SQL Query Comparison:")
for query_name in dev_queries.keys():
    dev_length = len(dev_queries[query_name])
    prod_length = len(prod_queries.get(query_name, ''))
    print(f"  {query_name}: Dev({dev_length} chars) vs Prod({prod_length} chars)")


## 9. Development Recommendations

In [None]:
# Generate development recommendations based on analysis
def generate_recommendations(mock_results, benchmark_results):
    """Generate development recommendations based on analysis results."""
    
    recommendations = []
    
    # Accuracy recommendations
    accuracy = accuracy_score(mock_results['expected_coverage'], mock_results['prediction'])
    if accuracy < 0.8:
        recommendations.append(
            f"🔴 Low accuracy ({accuracy:.3f}): Consider improving the RAG model or rule-based logic"
        )
    elif accuracy < 0.9:
        recommendations.append(
            f"🟡 Moderate accuracy ({accuracy:.3f}): Fine-tune the model for better performance"
        )
    else:
        recommendations.append(
            f"🟢 Good accuracy ({accuracy:.3f}): Current model performs well"
        )
    
    # Performance recommendations
    best_throughput = benchmark_results['throughput'].max()
    if best_throughput < 10:
        recommendations.append(
            f"🔴 Low throughput ({best_throughput:.1f} claims/sec): Consider parallel processing optimization"
        )
    elif best_throughput < 50:
        recommendations.append(
            f"🟡 Moderate throughput ({best_throughput:.1f} claims/sec): Room for performance improvement"
        )
    else:
        recommendations.append(
            f"🟢 Good throughput ({best_throughput:.1f} claims/sec): Performance is adequate"
        )
    
    # Confidence recommendations
    avg_confidence = mock_results['confidence'].mean()
    low_confidence_count = (mock_results['confidence'] < 0.7).sum()
    
    if avg_confidence < 0.7:
        recommendations.append(
            f"🔴 Low average confidence ({avg_confidence:.3f}): Review model confidence calibration"
        )
    
    if low_confidence_count > len(mock_results) * 0.2:
        recommendations.append(
            f"🟡 {low_confidence_count} claims with low confidence: Consider manual review process"
        )
    
    # Data quality recommendations
    text_lengths = mock_results['clean_FN_TEXT'].str.len()
    short_texts = (text_lengths < 100).sum()
    
    if short_texts > 0:
        recommendations.append(
            f"🟡 {short_texts} claims with short text: May need additional context for better predictions"
        )
    
    # Development process recommendations
    recommendations.extend([
        "📋 Implement comprehensive unit tests for all pipeline components",
        "📋 Set up continuous integration with automated testing",
        "📋 Create integration tests with real data samples",
        "📋 Implement logging and monitoring for production deployment",
        "📋 Consider A/B testing framework for model improvements"
    ])
    
    return recommendations

# Generate and display recommendations
recommendations = generate_recommendations(mock_results, benchmark_results)

print("🚀 Development Recommendations:")
print("=" * 50)
for i, rec in enumerate(recommendations, 1):
    print(f"{i:2d}. {rec}")

print("\n📊 Summary Statistics:")
print(f"  Total test claims: {len(test_claims)}")
print(f"  Mock accuracy: {accuracy_score(mock_results['expected_coverage'], mock_results['prediction']):.3f}")
print(f"  Best throughput: {benchmark_results['throughput'].max():.1f} claims/second")
print(f"  Average confidence: {mock_results['confidence'].mean():.3f}")


## 10. Export Results

In [None]:
# Export analysis results for further use
import os

# Create output directory
output_dir = '../output/development_analysis'
os.makedirs(output_dir, exist_ok=True)

# Export datasets
test_claims.to_csv(f'{output_dir}/test_claims.csv', index=False)
mock_results.to_csv(f'{output_dir}/mock_results.csv', index=False)
benchmark_results.to_csv(f'{output_dir}/benchmark_results.csv', index=False)

# Export analysis summary
analysis_summary = {
    'timestamp': datetime.now().isoformat(),
    'test_claims_count': len(test_claims),
    'mock_accuracy': accuracy_score(mock_results['expected_coverage'], mock_results['prediction']),
    'average_confidence': mock_results['confidence'].mean(),
    'best_throughput': benchmark_results['throughput'].max(),
    'recommendations_count': len(recommendations)
}

import json
with open(f'{output_dir}/analysis_summary.json', 'w') as f:
    json.dump(analysis_summary, f, indent=2, default=str)

# Export recommendations
with open(f'{output_dir}/recommendations.txt', 'w') as f:
    f.write('Development Analysis Recommendations\n')
    f.write('=' * 50 + '\n')
    f.write(f'Generated: {datetime.now()}\n\n')
    
    for i, rec in enumerate(recommendations, 1):
        f.write(f'{i:2d}. {rec}\n')

print(f"Analysis results exported to {output_dir}/")
print("Files created:")
for file in os.listdir(output_dir):
    print(f"  - {file}")

print("\n✅ Development Analysis Complete!")
print("Use these results to guide further development and testing.")