# Task 4: Forecasting Access and Usage

## Ethiopia Financial Inclusion Forecasting System

**Objective:** Forecast Account Ownership (Access) and Digital Payment Usage for 2025-2027.

### Approach
1. Define target indicators
2. Fit trend models (linear, logarithmic)
3. Generate scenario-based forecasts
4. Quantify uncertainty

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
COLORS = {'primary': '#2E86AB', 'secondary': '#28A745', 'accent': '#FFC107', 'danger': '#DC3545'}
print('Libraries loaded!')

In [None]:
df = pd.read_csv('../data/processed/ethiopia_fi_enriched.csv')
df['observation_date'] = pd.to_datetime(df['observation_date'], errors='coerce')
observations = df[df['record_type'] == 'observation'].copy()
print(f'Loaded {len(observations)} observations')

## Account Ownership Forecast

In [None]:
# Extract historical data
acc = observations[observations['indicator_code'] == 'ACC_OWNERSHIP'].sort_values('observation_date')
years = acc['observation_date'].dt.year.values
values = acc['value_numeric'].values
forecast_years = np.array([2025, 2026, 2027])

# Linear regression
coeffs = np.polyfit(years, values, 1)
slope, intercept = coeffs
linear_forecast = slope * forecast_years + intercept

# Confidence intervals
fitted = slope * years + intercept
residuals = values - fitted
std_error = np.std(residuals)
margin = 1.96 * std_error

# Plot
fig, ax = plt.subplots(figsize=(12, 6))
ax.scatter(years, values, s=100, color=COLORS['primary'], zorder=5, label='Historical')
ax.plot(years, fitted, linestyle='--', color=COLORS['primary'], alpha=0.7)
ax.scatter(forecast_years, linear_forecast, s=100, color=COLORS['secondary'], marker='s', zorder=5, label='Forecast')
ax.fill_between(forecast_years, linear_forecast - margin, linear_forecast + margin, alpha=0.2, color=COLORS['secondary'])
ax.axhline(y=60, color=COLORS['danger'], linestyle='--', label='NFIS-II Target (60%)')
ax.set_title('Account Ownership Forecast (2025-2027)')
ax.set_xlabel('Year')
ax.set_ylabel('Account Ownership (%)')
ax.legend()
plt.savefig('../reports/figures/access_forecast.png', dpi=150)
plt.show()

print(f'\nLinear Forecast: {slope:.2f} * year + {intercept:.2f}')
for i, y in enumerate(forecast_years):
    print(f'  {y}: {linear_forecast[i]:.1f}% [{linear_forecast[i]-margin:.1f}% - {linear_forecast[i]+margin:.1f}%]')

## Digital Payment Forecast

In [None]:
usage = observations[observations['indicator_code'] == 'USG_DIGITAL_PAYMENT'].sort_values('observation_date')
u_years = usage['observation_date'].dt.year.values
u_values = usage['value_numeric'].values

u_coeffs = np.polyfit(u_years, u_values, 1)
u_slope, u_intercept = u_coeffs
u_linear_forecast = u_slope * forecast_years + u_intercept

u_fitted = u_slope * u_years + u_intercept
u_residuals = u_values - u_fitted
u_std_error = np.std(u_residuals)
u_margin = 1.96 * u_std_error

fig, ax = plt.subplots(figsize=(12, 6))
ax.scatter(u_years, u_values, s=100, color=COLORS['secondary'], zorder=5, label='Historical')
ax.plot(u_years, u_fitted, linestyle='--', color=COLORS['secondary'], alpha=0.7)
ax.scatter(forecast_years, u_linear_forecast, s=100, color=COLORS['accent'], marker='s', zorder=5, label='Forecast')
ax.fill_between(forecast_years, u_linear_forecast - u_margin, u_linear_forecast + u_margin, alpha=0.2, color=COLORS['accent'])
ax.axhline(y=50, color=COLORS['danger'], linestyle='--', label='NFIS-II Target (50%)')
ax.set_title('Digital Payment Forecast (2025-2027)')
ax.set_xlabel('Year')
ax.set_ylabel('Digital Payment (%)')
ax.legend()
plt.savefig('../reports/figures/usage_forecast.png', dpi=150)
plt.show()

## Scenario Analysis

In [None]:
# Scenario-based forecasts
current_acc = 49
scenarios = {'Optimistic': 4.0, 'Base': 2.5, 'Pessimistic': 1.0}
years_range = [2024, 2025, 2026, 2027]

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Account Ownership
for name, growth in scenarios.items():
    values = [current_acc + growth * i for i in range(4)]
    axes[0].plot(years_range, values, marker='o', label=name)
axes[0].axhline(y=60, color=COLORS['danger'], linestyle='--')
axes[0].set_title('Account Ownership Scenarios')
axes[0].set_ylabel('Account Ownership (%)')
axes[0].legend()

# Digital Payments
current_usg = 35
for name, growth in scenarios.items():
    values = [current_usg + growth * i for i in range(4)]
    axes[1].plot(years_range, values, marker='o', label=name)
axes[1].axhline(y=50, color=COLORS['danger'], linestyle='--')
axes[1].set_title('Digital Payment Scenarios')
axes[1].set_ylabel('Digital Payment (%)')
axes[1].legend()

plt.tight_layout()
plt.savefig('../reports/figures/scenario_forecasts.png', dpi=150)
plt.show()

## Forecast Summary

In [None]:
# Save forecast results
results = []
for i, y in enumerate(forecast_years):
    results.append({'Indicator': 'Account Ownership Rate', 'Year': f'{y}-01-01', 'Optimistic': 49 + 4*(i+1), 'Base': 49 + 2.5*(i+1), 'Pessimistic': 49 + 1*(i+1)})
    results.append({'Indicator': 'Digital Payment Adoption Rate', 'Year': f'{y}-01-01', 'Optimistic': 35 + 4*(i+1), 'Base': 35 + 2.5*(i+1), 'Pessimistic': 35 + 1*(i+1)})

forecast_df = pd.DataFrame(results)
print('\nFORECAST SUMMARY')
print(forecast_df.to_string(index=False))
forecast_df.to_csv('../data/processed/forecast_results.csv', index=False)
print('\nSaved to: ../data/processed/forecast_results.csv')