In [1]:
import sys
import os
sys.path.append(r'C:/Users/hp/Downloads/KAIM/KAIM WEEK 10/Forecasting-Digital-Finance-Ethiopia/src')
print(f'Added C:/Users/hp/Downloads/KAIM/KAIM WEEK 10/Forecasting-Digital-Finance-Ethiopia/src to sys.path')


Added C:/Users/hp/Downloads/KAIM/KAIM WEEK 10/Forecasting-Digital-Finance-Ethiopia/src to sys.path


# 01. Exploration & EDA

## Objectives
- Load and enrich data.
- Explore account ownership trajectory.
- visualize events and correlations.
- Extract insights.

In [2]:
import sys
import os
import pandas as pd
import matplotlib
matplotlib.use('Agg') # Non-interactive backend
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio

# Ensure reports/figures exists
os.makedirs('../reports/figures', exist_ok=True)

# Add src to path
sys.path.append(os.path.abspath(os.path.join('../src')))
from data_loader import load_raw_data, enrich_data, process_data

# Load and Process
df_u, df_i = load_raw_data()
df_u, df_i = enrich_data(df_u, df_i)
observations, events_enriched, raw_impacts = process_data(df_u, df_i)

observations.head()

Loading data from ../data/raw/ethiopia_fi_unified_data.xlsx...


Unnamed: 0,record_id,record_type,category,pillar,indicator,indicator_code,indicator_direction,value_numeric,value_text,value_type,...,impact_direction,impact_magnitude,impact_estimate,lag_months,evidence_basis,comparable_country,collected_by,collection_date,original_text,notes
0,REC_0001,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,22.0,,percentage,...,,,,,,Example_Trainee,2025-01-20,,Baseline year,
1,REC_0002,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,35.0,,percentage,...,,,,,,Example_Trainee,2025-01-20,,,
2,REC_0003,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,46.0,,percentage,...,,,,,,Example_Trainee,2025-01-20,,,
3,REC_0004,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,56.0,,percentage,...,,,,,,Example_Trainee,2025-01-20,,Gender disaggregated,
4,REC_0005,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,36.0,,percentage,...,,,,,,Example_Trainee,2025-01-20,,Gender disaggregated,


## 1. Account Ownership Trajectory (2011-2024)

In [3]:
# Filter for ACC_OWNERSHIP
acc_own = observations[observations['indicator_code'] == 'ACC_OWNERSHIP'].sort_values('observation_date')

plt.figure(figsize=(10, 6))
sns.lineplot(data=acc_own, x='observation_date', y='value_numeric', marker='o')
plt.title('Account Ownership Trajectory (2011-2024)')
plt.ylabel('Percentage')
plt.grid(True)
plt.savefig('../reports/figures/account_ownership_trajectory.png')
plt.close()

## 2. Event Timeline
Overlay Telebirr Launch and M-Pesa Entry.

In [4]:
# Identify Key Events
key_events = events_enriched[events_enriched['original_text_evt'].str.contains('Telebirr|M-Pesa', case=False, na=False)]

# Using matplotlib for static saving easily, or plotly static image export if available
# For simplicity and robustness, let's use matplotlib for the static report
plt.figure(figsize=(12, 6))
sns.lineplot(data=acc_own, x='observation_date', y='value_numeric', marker='o', label='Account Ownership')

for _, row in key_events.iterrows():
    plt.axvline(x=row['observation_date_evt'], color='r', linestyle='--', alpha=0.7)
    plt.text(row['observation_date_evt'], acc_own['value_numeric'].mean(), row['original_text_evt'], rotation=90, verticalalignment='center')

plt.title('Account Ownership with Key Events')
plt.ylabel('Percentage')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig('../reports/figures/event_timeline.png')
plt.close()

## 3. Correlation Heatmap

In [5]:
# Pivot specific indicators to find correlations
# Filter for numeric indicators only
numeric_obs = observations[observations['value_numeric'].notna()]
pivot_df = numeric_obs.pivot_table(index='observation_date', columns='indicator_code', values='value_numeric')

plt.figure(figsize=(10, 8))
corr = pivot_df.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.savefig('../reports/figures/correlation_heatmap.png')
plt.close()

## Insights

1. **Growth Trend**: Account ownership shows a consistent upward trend from 2011, reaching over 45% in recent years, though the rate of growth appears to have varied.
2. **Event Impact**: The launch of Telebirr in 2021 marks a significant point in the timeline, coinciding (or potentially preceding) continued growth in digital adoption.
3. **M-Pesa Entry**: The entry of M-Pesa introduces a major competitive force, expected to further accelerate financial inclusion metrics as seen in peer markets.
4. **Correlation with Infrastructure**: There is a strong positive correlation between digital payment usage and account ownership, suggesting that payment utility drives adoption.
5. **Recent Slowdown**: The slight plateau or slowdown in growth 2021-2024 suggests market saturation in urban areas or the need for new drivers like policy interventions to reach the next segment of the population.

## 3. Advanced Visualizations
### 3.1 Account Ownership Growth Rate (YoY)

In [6]:
# Filter for Account Ownership
acc_own = observations[observations['indicator_code'] == 'ACC_OWNERSHIP'].sort_values('observation_date').copy()
acc_own['year'] = acc_own['observation_date'].dt.year

# Calculate YoY Growth
acc_own['growth_rate'] = acc_own['value_numeric'].pct_change() * 100

plt.figure(figsize=(12, 6))
sns.barplot(data=acc_own, x='year', y='growth_rate', palette='viridis')
plt.title('YoY Growth Rate: Account Ownership')
plt.ylabel('Growth Rate (%)')
plt.xlabel('Year')
plt.grid(axis='y', alpha=0.3)
plt.savefig('../reports/figures/acc_ownership_growth_yoy.png')
plt.show()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=acc_own, x='year', y='growth_rate', palette='viridis')


  plt.show()


### 3.2 Digital Payment Usage Trends w/ Event Markers

In [7]:
dig_pay = observations[observations['indicator_code'] == 'USG_DIGITAL_PAYMENT'].sort_values('observation_date')

plt.figure(figsize=(14, 7))
sns.lineplot(data=dig_pay, x='observation_date', y='value_numeric', marker='o', linewidth=2.5, color='orange')
plt.title('Usage of Digital Payments Over Time')
plt.ylabel('Percentage')
plt.xlabel('Date')

# Annotate key events if available
for _, event in events_enriched.iterrows():
    plt.axvline(x=event['observation_date_evt'], color='red', linestyle='--', alpha=0.3)
    # plt.text(event['observation_date_evt'], 0, event['original_text_evt'][:10], rotation=90, fontsize=8)

plt.grid(True)
plt.savefig('../reports/figures/digital_payment_trend.png')
plt.show()

  plt.show()


### 3.3 Indicator Correlation Heatmap

In [8]:
# Pivot data to have indicators as columns
pivot_df = observations.pivot_table(index='observation_date', columns='indicator_code', values='value_numeric')

plt.figure(figsize=(10, 8))
sns.heatmap(pivot_df.corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Financial Indicators')
plt.tight_layout()
plt.savefig('../reports/figures/indicator_correlation.png')
plt.show()

  plt.show()
