# 02. Impact Modeling & Forecasting

## Objectives
- Build Association Matrix.
- Build Event-Augmented Trend Model.
- Forecast 2025-2027.
- Generate Uncertainty Plots.

In [6]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.api import SimpleExpSmoothing

# Ensure reports/figures exists
os.makedirs('../reports/figures', exist_ok=True)

# Add src to path
sys.path.append(os.path.abspath(os.path.join('../src')))
from data_loader import load_raw_data, enrich_data, process_data

# Load Data
df_u, df_i = load_raw_data()
df_u, df_i = enrich_data(df_u, df_i)
observations, events_enriched, raw_impacts = process_data(df_u, df_i)


Loading data from c:\Users\hp\Downloads\KAIM\KAIM WEEK 10\Forecasting-Digital-Finance-Ethiopia\data\raw\ethiopia_fi_unified_data.xlsx...
Enriching data with new records...


  df_unified = pd.concat([df_unified, pd.DataFrame([new_obs])], ignore_index=True)
  df_unified = pd.concat([df_unified, pd.DataFrame([new_obs])], ignore_index=True)
  df_unified = pd.concat([df_unified, pd.DataFrame([new_event])], ignore_index=True)
  df_unified = pd.concat([df_unified, pd.DataFrame([new_event])], ignore_index=True)
  df_unified = pd.concat([df_unified, pd.DataFrame([new_event])], ignore_index=True)
  df_impact = pd.concat([df_impact, pd.DataFrame([new_impact])], ignore_index=True)


## 1. Association Matrix
Mapping Events to Key Indicators.

In [7]:
# Filter events that impact ACC_OWNERSHIP or USG_DIGITAL_PAYMENT
target_indicators = ['ACC_OWNERSHIP', 'USG_DIGITAL_PAYMENT']

# Check if related_indicator column exists and has values
if 'related_indicator' in events_enriched.columns:
    relevant_events = events_enriched[events_enriched['related_indicator'].isin(target_indicators)].copy()
    
    # Create a simplistic matrix (Event x Indicator)
    # We want to show which event affects which indicator
    association = pd.crosstab(relevant_events['original_text_evt'], relevant_events['related_indicator'])
    
    plt.figure(figsize=(10, 6))
    sns.heatmap(association, annot=True, cmap='Blues', cbar=False)
    plt.title('Event-Indicator Association Matrix')
    plt.tight_layout()
    plt.savefig('../reports/figures/association_matrix.png')
    plt.close()
else:
    print("Column 'related_indicator' missing or empty in events.")

Column 'related_indicator' missing or empty in events.


## 2. Event-Augmented Trend Model
Baseline Trend + Impact Boosts.

In [8]:
def forecast_with_impacts(indicator_code, start_year=2011, end_year=2027):
    # 1. Get Baseline Data
    data = observations[observations['indicator_code'] == indicator_code].sort_values('observation_date')
    data['year'] = data['observation_date'].dt.year
    
    # Create a full year range dataframe
    years = pd.DataFrame({'year': range(start_year, end_year + 1)})
    ts_data = pd.merge(years, data[['year', 'value_numeric']], on='year', how='left')
    
    # Interpolate missing values for baseline trend (Linear)
    ts_data['baseline'] = ts_data['value_numeric'].interpolate(method='linear')
    # Forward fill for the future if last point is earlier, or extrapolate
    # Simple extrapolation: last known growth rate or just linear projection
    # For simplicity, let's use a linear regression on available points to project baseline
    valid_data = ts_data.dropna(subset=['value_numeric'])
    if len(valid_data) > 1:
        z = np.polyfit(valid_data['year'], valid_data['value_numeric'], 1)
        p = np.poly1d(z)
        ts_data['baseline_trend'] = p(ts_data['year'])
    else:
        ts_data['baseline_trend'] = ts_data['baseline'] # Fallback

    # 2. Add Impacts
    # Find events related to this indicator
    if 'related_indicator' in events_enriched.columns:
        related_evts = events_enriched[events_enriched['related_indicator'] == indicator_code]
        
        ts_data['impact_boost'] = 0.0
        for _, evt in related_evts.iterrows():
            # Assuming impact_magnitude is a percentage point boost
            # Distributed over 'lag_months' or immediate?
            # Let's simplify: Add magnitude cumulatively starting from event year
            evt_year = evt['observation_date_evt'].year
            magnitude = evt['impact_magnitude_imp'] if pd.notna(evt['impact_magnitude_imp']) else 0
            
            # Check direction
            if evt['impact_direction_imp'] == 'decrease':
                magnitude = -abs(magnitude)
            else:
                magnitude = abs(magnitude)
                
            # Apply boost to all years >= evt_year
            ts_data.loc[ts_data['year'] >= evt_year, 'impact_boost'] += magnitude

    ts_data['forecast'] = ts_data['baseline_trend'] + ts_data['impact_boost']
    
    return ts_data

# Run for Account Ownership
forecast_df = forecast_with_impacts('ACC_OWNERSHIP')
forecast_df.tail()

KeyError: 'impact_boost'

## 3. Forecast & Uncertainty Plots
Projecting 2025-2027 with Confidence Intervals.

In [10]:
def plot_forecast(df, title, filename):
    plt.figure(figsize=(12, 6))
    
    # Historical Data
    plt.plot(df['year'], df['value_numeric'], 'ko', label='Historical Data')
    
    # Forecast Line
    plt.plot(df['year'], df['forecast'], 'b-', label='Event-Augmented Forecast', linewidth=2)
    
    # Baseline (Counterfactual)
    plt.plot(df['year'], df['baseline_trend'], 'g--', label='Baseline Only', alpha=0.5)
    
    # Confidence Intervals (Simple heuristic: +/- 5% growing over time)
    uncertainty_grow = (df['year'] - df['year'].min()) * 0.5 # grows 0.5 pp per year
    upper = df['forecast'] + 5 + uncertainty_grow
    lower = df['forecast'] - 5 - uncertainty_grow
    
    plt.fill_between(df['year'], lower, upper, color='b', alpha=0.1, label='Confidence Interval (Optimistic/Pessimistic)')
    
    plt.title(title)
    plt.xlabel('Year')
    plt.ylabel('Percentage')
    plt.legend()
    plt.grid(True)
    plt.savefig(f'../reports/figures/{filename}')

plot_forecast(forecast_df, 'Account Ownership Forecast (2025-2027)', 'forecast_acc_ownership.png')

SyntaxError: '(' was never closed (126236280.py, line 25)

In [None]:
# Save data for dashboard
forecast_df.to_csv('../data/processed/forecast_results.csv', index=False)
print("Forecast saved to data/processed/forecast_results.csv")