In [None]:
# Task 3: Event Impact Modeling
import sys
sys.path.append('../src')

from event_analyzer import EventImpactModeler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Initialize modeler
print("Loading data for impact modeling...")
modeler = EventImpactModeler()

# 1. Create Impact Matrix
print("\n=== EVENT-IMPACT MATRIX ===")
impact_matrix, event_impacts = modeler.create_impact_matrix()

print("Impact Matrix (Events × Indicators):")
print(impact_matrix[['parent_id', 'event_date', 'ACC_OWNERSHIP', 'ACC_MM_ACCOUNT', 
                     'USG_DIGITAL_PAYMENT', 'confidence']].to_string())

# Display detailed impact links
print("\nDetailed Impact Links:")
for _, impact in event_impacts.iterrows():
    print(f"\n{impact['parent_id']} → {impact['related_indicator']}")
    print(f"  Direction: {impact['impact_direction']}")
    print(f"  Magnitude: {impact['impact_magnitude']} {impact.get('magnitude_unit', 'pp')}")
    print(f"  Lag: {impact['lag_months']} months")
    print(f"  Evidence: {impact['evidence_basis']}")
    print(f"  Confidence: {impact['confidence']}")

# 2. Validate Against Historical Data
print("\n=== MODEL VALIDATION ===")
validation_results = modeler.validate_impacts_historically()

if not validation_results.empty:
    print("Validation Results (Actual vs Modeled):")
    print(validation_results[['event', 'indicator', 'actual_change', 
                              'modeled_impact', 'difference', 'accuracy']].to_string())
    
    # Calculate overall accuracy
    avg_accuracy = validation_results['accuracy'].mean()
    print(f"\nAverage Model Accuracy: {avg_accuracy:.1%}")
    
    # Identify areas for improvement
    if avg_accuracy < 0.7:
        print("\nAreas for model improvement:")
        for _, row in validation_results[validation_results['accuracy'] < 0.7].iterrows():
            print(f"- {row['event']} on {row['indicator']}: accuracy {row['accuracy']:.1%}")
else:
    print("Insufficient data for historical validation")

# 3. Impact Estimation Examples
print("\n=== IMPACT ESTIMATION EXAMPLES ===")

# Example 1: Telebirr impact on mobile money accounts
print("\nExample 1: Telebirr impact on mobile money accounts")
tel_impact, tel_details = modeler.estimate_event_impacts(
    'ACC_MM_ACCOUNT',
    pd.Timestamp('2024-12-31')
)
print(f"Total estimated impact by end of 2024: {tel_impact:.1f} percentage points")
print("Breakdown:")
for detail in tel_details:
    if 'Telebirr' in detail['event']:
        print(f"- {detail['event']}: {detail['realized_impact']:.1f} pp")

# Example 2: All events impact on account ownership
print("\nExample 2: Cumulative impact on account ownership")
acc_impact, acc_details = modeler.estimate_event_impacts(
    'ACC_OWNERSHIP',
    pd.Timestamp('2024-12-31')
)
print(f"Total estimated impact by end of 2024: {acc_impact:.1f} percentage points")
print("Event contributions:")
for detail in acc_details:
    print(f"- {detail['event']}: {detail['realized_impact']:.1f} pp (lag: {detail['lag_months']} months)")

# 4. Create Impact Timeline
print("\n=== IMPACT TIMELINE ANALYSIS ===")
timeline_df = modeler.create_impact_timeline(
    'ACC_OWNERSHIP',
    pd.Timestamp('2020-01-01'),
    pd.Timestamp('2025-12-31')
)

print("Cumulative Impact Timeline (sample):")
print(timeline_df.head(10).to_string())

# Identify key inflection points
inflection_points = []
for i in range(1, len(timeline_df)):
    if abs(timeline_df['monthly_impact'].iloc[i]) > 0.5:
        inflection_points.append({
            'date': timeline_df['date'].iloc[i],
            'impact': timeline_df['monthly_impact'].iloc[i],
            'cumulative': timeline_df['cumulative_impact'].iloc[i]
        })

print(f"\nFound {len(inflection_points)} significant impact inflection points")

# 5. Scenario Analysis
print("\n=== FUTURE SCENARIO ANALYSIS ===")

future_scenarios = {
    'optimistic_2025': [
        {
            'name': 'Digital Banking License Issuance',
            'indicator': 'ACC_OWNERSHIP',
            'type': 'major_policy_reform',
            'lag_months': 12
        },
        {
            'name': 'National QR Code Rollout',
            'indicator': 'USG_DIGITAL_PAYMENT',
            'type': 'infrastructure_expansion',
            'lag_months': 6
        }
    ],
    'regulatory_challenge': [
        {
            'name': 'Stricter KYC Requirements',
            'indicator': 'ACC_OWNERSHIP',
            'type': 'major_policy_reform',
            'impact_multiplier': -0.5,  # Negative impact
            'lag_months': 3
        }
    ]
}

scenarios = modeler.scenario_analysis(future_scenarios)

print("Future Scenario Impacts:")
for scenario in scenarios:
    print(f"\n{scenario['scenario'].upper()}:")
    print(f"Total estimated impact: {scenario['total_impact']:.1f} pp")
    for detail in scenario['details']:
        print(f"- {detail['event']}: {detail['estimated_impact']:.1f} pp ({detail['confidence']} confidence)")

# 6. Methodology Documentation
print("\n=== METHODOLOGY DOCUMENTATION ===")

methodology = {
    'impact_estimation': {
        'approach': 'Event study methodology with lag effects',
        'lag_model': 'Gradual impact ramp-up over 6 months',
        'cumulative_effects': 'Additive across events',
        'validation': 'Historical backtesting where data available'
    },
    'data_sources': {
        'impact_magnitudes': 'Combination of: 1) Pre/post analysis in Ethiopia, 2) Comparable country evidence, 3) Expert assessment',
        'lag_periods': 'Based on typical implementation and adoption timelines',
        'confidence_levels': 'Assessed based on evidence quality and cross-validation'
    },
    'assumptions': [
        'Event impacts are independent and additive',
        'Impact magnitudes are linear (no diminishing/accelerating effects)',
        'Lag periods follow typical patterns observed in similar markets',
        'No major confounding events during impact periods'
    ],
    'limitations': [
        'Sparse historical data for robust statistical validation',
        'Difficulty isolating individual event impacts in rapidly changing market',
        'Assumption of constant impact magnitudes may not hold',
        'Limited accounting for interaction effects between events'
    ]
}

print("\nImpact Estimation Methodology:")
for key, value in methodology['impact_estimation'].items():
    print(f"- {key}: {value}")

print("\nKey Assumptions:")
for assumption in methodology['assumptions']:
    print(f"- {assumption}")

print("\nLimitations:")
for limitation in methodology['limitations']:
    print(f"- {limitation}")

# 7. Create Visualizations
print("\n=== CREATING VISUALIZATIONS ===")
viz_files = modeler.create_visualizations()
print("Visualizations created:")
for name, path in viz_files.items():
    if path:
        print(f"- {name}: {path}")

# 8. Save Impact Analysis Results
print("\n=== SAVING ANALYSIS RESULTS ===")

# Save impact matrix
impact_matrix.to_csv('models/impact_matrix.csv', index=False)
print("Impact matrix saved to: models/impact_matrix.csv")

# Save validation results
if not validation_results.empty:
    validation_results.to_csv('models/validation_results.csv', index=False)
    print("Validation results saved to: models/validation_results.csv")

# Save scenario analysis
scenarios_df = pd.DataFrame([
    {
        'scenario': s['scenario'],
        'total_impact': s['total_impact'],
        'events': len(s['events']),
        'details': str(s['details'])
    }
    for s in scenarios
])
scenarios_df.to_csv('models/scenario_analysis.csv', index=False)
print("Scenario analysis saved to: models/scenario_analysis.csv")

print("\n=== TASK 3 COMPLETED ===")
print("1. Event-impact matrix created")
print("2. Impact modeling methodology implemented")
print("3. Historical validation performed")
print("4. Future scenario analysis conducted")
print("5. Methodology documented with assumptions and limitations")
print("6. Results saved for forecasting use")