In [1]:
import sys
import os
sys.path.append(r'C:/Users/hp/Downloads/KAIM/KAIM WEEK 10/Forecasting-Digital-Finance-Ethiopia/src')
print(f'Added C:/Users/hp/Downloads/KAIM/KAIM WEEK 10/Forecasting-Digital-Finance-Ethiopia/src to sys.path')


Added C:/Users/hp/Downloads/KAIM/KAIM WEEK 10/Forecasting-Digital-Finance-Ethiopia/src to sys.path


# 02. Impact Modeling & Forecasting

## Objectives
- Build Association Matrix.
- Build Event-Augmented Trend Model.
- Forecast 2025-2027.
- Generate Uncertainty Plots.

In [2]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.api import SimpleExpSmoothing

# Ensure reports/figures exists
os.makedirs('../reports/figures', exist_ok=True)

# Add src to path
sys.path.append(os.path.abspath(os.path.join('../src')))
from data_loader import load_raw_data, enrich_data, process_data

# Load Data
df_u, df_i = load_raw_data()
df_u, df_i = enrich_data(df_u, df_i)
observations, events_enriched, raw_impacts = process_data(df_u, df_i)


Loading data from ../data/raw/ethiopia_fi_unified_data.xlsx...


  df = pd.concat([df_main, df_impact], ignore_index=True)


## 1. Association Matrix
Mapping Events to Key Indicators.

In [3]:
# Filter events that impact ACC_OWNERSHIP or USG_DIGITAL_PAYMENT
target_indicators = ['ACC_OWNERSHIP', 'USG_DIGITAL_PAYMENT']

# Check if related_indicator column exists and has values
if 'related_indicator' in events_enriched.columns:
    relevant_events = events_enriched[events_enriched['related_indicator'].isin(target_indicators)].copy()
    
    # Create a simplistic matrix (Event x Indicator)
    # We want to show which event affects which indicator
    association = pd.crosstab(relevant_events['original_text_evt'], relevant_events['related_indicator'])
    
    plt.figure(figsize=(10, 6))
    sns.heatmap(association, annot=True, cmap='Blues', cbar=False)
    plt.title('Event-Indicator Association Matrix')
    plt.tight_layout()
    plt.savefig('../reports/figures/association_matrix.png')
    plt.close()
else:
    print("Column 'related_indicator' missing or empty in events.")

ValueError: zero-size array to reduction operation fmin which has no identity

## 2. Event-Augmented Trend Model
Baseline Trend + Impact Boosts.

In [4]:
def forecast_with_impacts(indicator_code, start_year=2011, end_year=2027):
    # 1. Get Baseline Data
    data = observations[observations['indicator_code'] == indicator_code].sort_values('observation_date')
    data['year'] = data['observation_date'].dt.year
    
    # Create a full year range dataframe
    years = pd.DataFrame({'year': range(start_year, end_year + 1)})
    ts_data = pd.merge(years, data[['year', 'value_numeric']], on='year', how='left')
    
    # Interpolate missing values for baseline trend (Linear)
    ts_data['baseline'] = ts_data['value_numeric'].interpolate(method='linear')
    # Forward fill for the future if last point is earlier, or extrapolate
    # Simple extrapolation: last known growth rate or just linear projection
    # For simplicity, let's use a linear regression on available points to project baseline
    valid_data = ts_data.dropna(subset=['value_numeric'])
    if len(valid_data) > 1:
        z = np.polyfit(valid_data['year'], valid_data['value_numeric'], 1)
        p = np.poly1d(z)
        ts_data['baseline_trend'] = p(ts_data['year'])
    else:
        ts_data['baseline_trend'] = ts_data['baseline'] # Fallback

    # 2. Add Impacts
    # Find events related to this indicator
    if 'related_indicator' in events_enriched.columns:
        related_evts = events_enriched[events_enriched['related_indicator'] == indicator_code]
        
        ts_data['impact_boost'] = 0.0
        for _, evt in related_evts.iterrows():
            # Assuming impact_magnitude is a percentage point boost
            # Distributed over 'lag_months' or immediate?
            # Let's simplify: Add magnitude cumulatively starting from event year
            evt_year = evt['observation_date_evt'].year
            magnitude = evt['impact_magnitude_imp'] if pd.notna(evt['impact_magnitude_imp']) else 0
            
            # Check direction
            if evt['impact_direction_imp'] == 'decrease':
                magnitude = -abs(magnitude)
            else:
                magnitude = abs(magnitude)
                
            # Apply boost to all years >= evt_year
            ts_data.loc[ts_data['year'] >= evt_year, 'impact_boost'] += magnitude

    ts_data['forecast'] = ts_data['baseline_trend'] + ts_data['impact_boost']
    
    return ts_data

# Run for Account Ownership
forecast_df = forecast_with_impacts('ACC_OWNERSHIP')
forecast_df.tail()

Unnamed: 0,year,value_numeric,baseline,baseline_trend,impact_boost,forecast
14,2023,,44.666667,50.052632,0.0,50.052632
15,2024,49.0,49.0,52.868421,0.0,52.868421
16,2025,,49.0,55.684211,0.0,55.684211
17,2026,,49.0,58.5,0.0,58.5
18,2027,,49.0,61.315789,0.0,61.315789


## 3. Forecast & Uncertainty Plots
Projecting 2025-2027 with Confidence Intervals.

In [5]:
def plot_forecast(df, title, filename):
    plt.figure(figsize=(12, 6))
    
    # Historical Data
    plt.plot(df['year'], df['value_numeric'], 'ko', label='Historical Data')
    
    # Forecast Line
    plt.plot(df['year'], df['forecast'], 'b-', label='Event-Augmented Forecast', linewidth=2)
    
    # Baseline (Counterfactual)
    plt.plot(df['year'], df['baseline_trend'], 'g--', label='Baseline Only', alpha=0.5)
    
    # Confidence Intervals (Simple heuristic: +/- 5% growing over time)
    uncertainty_grow = (df['year'] - df['year'].min()) * 0.5 # grows 0.5 pp per year
    upper = df['forecast'] + 5 + uncertainty_grow
    lower = df['forecast'] - 5 - uncertainty_grow
    
    plt.fill_between(df['year'], lower, upper, color='b', alpha=0.1, label='Confidence Interval (Optimistic/Pessimistic)')
    
    plt.title(title)
    plt.xlabel('Year')
    plt.ylabel('Percentage')
    plt.legend()
    plt.grid(True)
    plt.savefig(f'../reports/figures/{filename}')

plot_forecast(forecast_df, 'Account Ownership Forecast (2025-2027)', 'forecast_acc_ownership.png')

In [6]:
# Save data for dashboard
forecast_df.to_csv('../data/processed/forecast_results.csv', index=False)
print("Forecast saved to data/processed/forecast_results.csv")

OSError: Cannot save file into a non-existent directory: '..\data\processed'

## 4. Scenario Comparison & Additional Visuals
### 4.1 Scenario Comparison: Baseline vs Event-Augmented (2027 Delta)

In [7]:
# Calculate 2027 Delta
f_2027 = forecast_df[forecast_df['year'] == 2027].iloc[0]
base_2027 = f_2027['baseline_trend']
opt_2027 = f_2027['forecast']
delta = opt_2027 - base_2027

scenarios = ['Baseline (No Events)', 'Event-Augmented (Forecast)']
values = [base_2027, opt_2027]

plt.figure(figsize=(10, 6))
bars = plt.bar(scenarios, values, color=['gray', 'green'])
plt.title(f'2027 Account Ownership Projection\nDelta: +{delta:.2f} percentage points')
plt.ylabel('Percentage')
plt.ylim(0, 100)
plt.grid(axis='y', alpha=0.3)

# Add labels
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 1,
             f'{height:.1f}%', ha='center', va='bottom', fontsize=12, fontweight='bold')

plt.savefig('../reports/figures/scenario_comparison_2027.png')
plt.show()

  plt.show()


### 4.2 Impact Over Time (Cumulative Boost)

In [8]:
plt.figure(figsize=(12, 5))
plt.fill_between(forecast_df['year'], 0, forecast_df['impact_boost'], color='green', alpha=0.3)
plt.plot(forecast_df['year'], forecast_df['impact_boost'], 'g-', linewidth=2)
plt.title('Cumulative Impact of Events Over Time (Boost to Baseline)')
plt.ylabel('Percentage Points')
plt.xlabel('Year')
plt.grid(True)
plt.savefig('../reports/figures/impact_boost_trend.png')
plt.show()

  plt.show()


### 4.3 Forecast with Confidence Bands (Clean View)

In [9]:
plt.figure(figsize=(12, 6))

# Plot Forecast
plt.plot(forecast_df['year'], forecast_df['forecast'], 'b-', label='Forecast', linewidth=2)

# Plot Uncertainty
uncertainty_grow = (forecast_df['year'] - forecast_df['year'].min()) * 0.5
upper = forecast_df['forecast'] + 5 + uncertainty_grow
lower = forecast_df['forecast'] - 5 - uncertainty_grow

plt.fill_between(forecast_df['year'], lower, upper, color='blue', alpha=0.1, label='95% Confidence Interval')

# Add Target Line if applicable (e.g. 50%)
plt.axhline(y=50, color='red', linestyle='--', alpha=0.5, label='50% Threshold')

plt.title('Account Ownership Forecast with Uncertainty Bounds')
plt.xlabel('Year')
plt.ylabel('Percentage')
plt.legend()
plt.grid(True)
plt.savefig('../reports/figures/forecast_with_ci.png')
plt.show()

  plt.show()
