# Task 2: Exploratory Data Analysis

## Ethiopia Financial Inclusion Forecasting System

**Objective:** Analyze patterns and factors influencing financial inclusion in Ethiopia.

### Key Questions
1. What factors drive financial inclusion?
2. Why did account ownership stagnate (+3pp only) despite 65M+ mobile money accounts?
3. What is the gender gap and how has it evolved?

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
COLORS = {'primary': '#2E86AB', 'secondary': '#28A745', 'accent': '#FFC107', 'danger': '#DC3545'}
print('Libraries loaded!')

In [None]:
df = pd.read_csv('../data/processed/ethiopia_fi_enriched.csv')
df['observation_date'] = pd.to_datetime(df['observation_date'], errors='coerce')
observations = df[df['record_type'] == 'observation'].copy()
events = df[df['record_type'] == 'event'].copy()
print(f'Observations: {len(observations)}, Events: {len(events)}')

## Access Analysis: Account Ownership

In [None]:
# Account Ownership Trajectory
acc = observations[observations['indicator_code'] == 'ACC_OWNERSHIP'].sort_values('observation_date')
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(acc['observation_date'], acc['value_numeric'], marker='o', markersize=12, linewidth=3, color=COLORS['primary'])
for _, row in acc.iterrows():
    ax.annotate(f"{row['value_numeric']:.0f}%", (row['observation_date'], row['value_numeric']), xytext=(0, 12), textcoords='offset points', ha='center', fontweight='bold', fontsize=12)
ax.axhline(y=60, color=COLORS['danger'], linestyle='--', label='NFIS-II Target (60%)')
ax.set_title('Ethiopia Account Ownership Trajectory (2011-2024)', fontsize=14)
ax.set_ylim(0, 70)
ax.legend()
plt.savefig('../reports/figures/account_ownership_trajectory.png', dpi=150)
plt.show()

## Gender Gap Analysis

In [None]:
male = observations[observations['indicator_code'] == 'ACC_OWNERSHIP_M'].sort_values('observation_date')
female = observations[observations['indicator_code'] == 'ACC_OWNERSHIP_F'].sort_values('observation_date')
if len(male) > 0 and len(female) > 0:
    fig, ax = plt.subplots(figsize=(10, 6))
    x = np.arange(len(male))
    width = 0.35
    ax.bar(x - width/2, male['value_numeric'].values, width, label='Male', color=COLORS['primary'])
    ax.bar(x + width/2, female['value_numeric'].values, width, label='Female', color='#E91E63')
    ax.set_xticks(x)
    ax.set_xticklabels(male['observation_date'].dt.year.values)
    ax.set_title('Gender Gap in Account Ownership')
    ax.legend()
    plt.savefig('../reports/figures/gender_gap_analysis.png', dpi=150)
    plt.show()

## Usage Analysis: Digital Payments

In [None]:
usage = observations[observations['indicator_code'] == 'USG_DIGITAL_PAYMENT'].sort_values('observation_date')
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(usage['observation_date'], usage['value_numeric'], marker='s', markersize=10, linewidth=2.5, color=COLORS['secondary'])
for _, row in usage.iterrows():
    ax.annotate(f"{row['value_numeric']:.0f}%", (row['observation_date'], row['value_numeric']), xytext=(0, 10), textcoords='offset points', ha='center', fontweight='bold')
ax.axhline(y=50, color=COLORS['danger'], linestyle='--', label='NFIS-II Target (50%)')
ax.set_title('Digital Payment Adoption Trend (2017-2024)')
ax.legend()
plt.savefig('../reports/figures/usage_analysis.png', dpi=150)
plt.show()

## Mobile Money Growth

In [None]:
telebirr = observations[observations['indicator_code'] == 'ACC_TELEBIRR_USERS'].sort_values('observation_date')
mpesa = observations[observations['indicator_code'] == 'ACC_MPESA_USERS'].sort_values('observation_date')
fig, ax = plt.subplots(figsize=(12, 6))
if len(telebirr) > 0:
    ax.plot(telebirr['observation_date'], telebirr['value_numeric']/1e6, marker='o', linewidth=2.5, color=COLORS['secondary'], label='Telebirr')
if len(mpesa) > 0:
    ax.plot(mpesa['observation_date'], mpesa['value_numeric']/1e6, marker='s', linewidth=2.5, color=COLORS['danger'], label='M-Pesa')
ax.set_ylabel('Registered Users (Millions)')
ax.set_title('Mobile Money User Growth')
ax.legend()
plt.savefig('../reports/figures/mobile_money_growth.png', dpi=150)
plt.show()

## Key Insights

In [None]:
print('KEY INSIGHTS')
print('1. Account ownership: 14% (2011) -> 49% (2024) = +35pp')
print('2. Stagnation: Only +3pp (2021-2024) despite mobile money boom')
print('3. Gender gap narrowing: 8pp (2021) -> 4pp (2024)')
print('4. Digital payments growing: 20% (2017) -> 35% (2024)')
print('5. 64M+ mobile money registrations but only 9.45% account ownership')