# Exploratory Data Analysis for Ethiopia Financial Inclusion Forecasting

In [3]:
import sys
from pathlib import Path
PROJECT_ROOT = Path().resolve().parent
sys.path.insert(0, str(PROJECT_ROOT))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Import custom modules
from src.preprocessing import DataPreprocessor
from src.data_loader import DataLoader

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Set visualization style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# Color scheme for consistency
COLORS = {
    'access': '#2E86AB',
    'usage': '#A23B72',
    'enabler': '#F18F01',
    'infrastructure': '#73AB84',
    'events': '#C73E1D'
}

print("="*80)
print("FINANCIAL INCLUSION EDA - ETHIOPIA")
print("="*80)

ImportError: cannot import name 'DataPreprocessor' from 'src.preprocessing' (C:\Users\eyaze\OneDrive\Desktop\ethiopia-fi-forecast-week-10\src\preprocessing.py)

# 1. LOAD AND PREPROCESS DATA

In [None]:

print("\n1. Loading and preprocessing data...")
print("-"*40)

# Load data using DataLoader
loader = DataLoader(
    raw_data_path='../data/processed/enriched_data.csv',
    reference_codes_path='../data/raw/reference_codes.csv'
)
raw_data = loader.load_data()

# Preprocess data
preprocessor = DataPreprocessor(raw_data)
data = preprocessor.prepare_analysis_dataset()

# Validate data quality
quality_metrics = preprocessor.validate_data_quality()

print(f"‚úì Data loaded: {len(data):,} records")
print(f"‚úì Data quality validation completed")
print(f"‚úì Missing values summary:")
for col, metrics in quality_metrics['missing_values'].items():
    if metrics['percentage'] > 0:
        print(f"  - {col}: {metrics['percentage']:.1f}% missing")



# 2. DATASET OVERVIEW

In [None]:
print("\n2. Dataset Overview")
print("-"*40)

# 2.1 Record Type Distribution
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# Record types
record_counts = data['record_type'].value_counts()
axes[0].pie(record_counts.values, labels=record_counts.index, autopct='%1.1f%%',
           colors=['#2E86AB', '#A23B72', '#F18F01'])
axes[0].set_title('Record Type Distribution', fontsize=14, fontweight='bold')

# Pillar distribution (for observations)
obs_data = data[data['record_type'] == 'observation']
pillar_counts = obs_data['pillar'].value_counts()
axes[1].bar(pillar_counts.index, pillar_counts.values, color=[COLORS.get(p, '#999999') for p in pillar_counts.index])
axes[1].set_title('Observation Pillar Distribution', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Count')
axes[1].tick_params(axis='x', rotation=45)

# Confidence levels
if 'confidence' in data.columns:
    conf_counts = data['confidence'].value_counts()
    colors_conf = {'high': '#4CAF50', 'medium': '#FFC107', 'low': '#F44336'}
    conf_colors = [colors_conf.get(c, '#999999') for c in conf_counts.index]
    axes[2].bar(conf_counts.index, conf_counts.values, color=conf_colors)
    axes[2].set_title('Confidence Level Distribution', fontsize=14, fontweight='bold')
    axes[2].set_ylabel('Count')

plt.tight_layout()
plt.savefig('../reports/figures/dataset_overview.png', dpi=300, bbox_inches='tight')
plt.show()

# 2.2 Temporal Coverage
print("\nTemporal Coverage Analysis:")
print("-"*40)

# Create temporal coverage matrix
coverage_data = obs_data.pivot_table(
    index='indicator',
    columns='year',
    values='value_numeric',
    aggfunc='count'
).fillna(0)

# Plot coverage heatmap
plt.figure(figsize=(16, 10))
mask = coverage_data == 0
sns.heatmap(coverage_data, cmap='YlOrBr', mask=mask,
           cbar_kws={'label': 'Number of Observations'})
plt.title('Temporal Coverage of Financial Inclusion Indicators', 
         fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Indicator', fontsize=12)
plt.tight_layout()
plt.savefig('../reports/figures/temporal_coverage.png', dpi=300, bbox_inches='tight')
plt.show()

# 3. ACCESS ANALYSIS

In [None]:

print("\n3. Access Analysis - Account Ownership")
print("-"*40)

# 3.1 Account Ownership Trend
acc_ownership = obs_data[obs_data['indicator_code'] == 'ACC_OWNERSHIP'].copy()
acc_ownership = acc_ownership.sort_values('observation_date')

if not acc_ownership.empty:
    # Calculate growth rates
    acc_ownership['growth'] = acc_ownership['value_numeric'].diff()
    acc_ownership['growth_pct'] = (acc_ownership['growth'] / acc_ownership['value_numeric'].shift(1)) * 100
    
    # Create visualization
    fig, ax1 = plt.subplots(figsize=(14, 8))
    
    # Primary axis - account ownership
    years = acc_ownership['year'].astype(str)
    values = acc_ownership['value_numeric']
    
    ax1.plot(years, values, marker='o', markersize=10, linewidth=3, 
            color=COLORS['access'], label='Account Ownership')
    ax1.fill_between(years, values, alpha=0.2, color=COLORS['access'])
    
    # Add growth annotations
    for i, (year, value, growth) in enumerate(zip(years, values, acc_ownership['growth'])):
        if not pd.isna(growth):
            ax1.annotate(f"+{growth:.1f}pp", 
                        xy=(i, value), 
                        xytext=(0, 15), 
                        textcoords='offset points',
                        ha='center', va='bottom',
                        fontsize=10, fontweight='bold',
                        bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    
    ax1.set_xlabel('Year', fontsize=12)
    ax1.set_ylabel('Account Ownership (%)', fontsize=12, color=COLORS['access'])
    ax1.set_title('Ethiopia: Account Ownership Trend (2011-2024)', 
                 fontsize=16, fontweight='bold', pad=20)
    ax1.grid(True, alpha=0.3)
    ax1.set_ylim(0, 60)
    ax1.tick_params(axis='x', rotation=45)
    
    # Secondary axis - growth rate
    ax2 = ax1.twinx()
    ax2.bar(years[1:], acc_ownership['growth_pct'].iloc[1:], 
           alpha=0.3, color='gray', label='Growth Rate (%)')
    ax2.set_ylabel('Growth Rate (%)', fontsize=12, color='gray')
    ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    
    # Combine legends
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
    
    plt.tight_layout()
    plt.savefig('../reports/figures/account_ownership_trend.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Print analysis
    print(f"\nAccount Ownership Analysis:")
    print(f"{'Year':<10} {'Rate (%)':<10} {'Growth (pp)':<12} {'Growth (%)':<10}")
    print("-"*45)
    for _, row in acc_ownership.iterrows():
        year = str(row['year'])
        rate = f"{row['value_numeric']:.1f}"
        growth_pp = f"+{row['growth']:.1f}" if not pd.isna(row['growth']) else "N/A"
        growth_pct = f"{row['growth_pct']:.1f}%" if not pd.isna(row['growth_pct']) else "N/A"
        print(f"{year:<10} {rate:<10} {growth_pp:<12} {growth_pct:<10}")
    
    # 3.2 Gender Gap Analysis
    print("\n" + "-"*40)
    print("Gender Gap Analysis:")
    
    # Extract gender-specific data
    gender_patterns = {
        'male': ['male', 'men', 'males'],
        'female': ['female', 'women', 'females']
    }
    
    gender_data = {}
    for gender, patterns in gender_patterns.items():
        mask = obs_data['indicator'].str.contains('|'.join(patterns), case=False, na=False)
        if mask.any():
            gender_data[gender] = obs_data[mask].copy()
            print(f"‚úì {gender.capitalize()} data found: {len(gender_data[gender])} records")
    
    if len(gender_data) >= 2:
        # Create comparison plot
        fig, ax = plt.subplots(figsize=(12, 6))
        
        for gender, gender_df in gender_data.items():
            gender_df = gender_df.sort_values('observation_date')
            ax.plot(gender_df['year'].astype(str), gender_df['value_numeric'],
                   marker='o', linewidth=2, label=gender.capitalize())
        
        ax.set_xlabel('Year')
        ax.set_ylabel('Account Ownership (%)')
        ax.set_title('Gender Gap in Account Ownership', fontsize=14, fontweight='bold')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('../reports/figures/gender_gap.png', dpi=300, bbox_inches='tight')
        plt.show()




# 4. USAGE ANALYSIS

In [None]:
print("\n4. Usage Analysis - Digital Payments")
print("-"*40)

# 4.1 Digital Payment Indicators
usage_indicators = ['USG_DIGITAL_PAYMENT', 'ACC_MM_ACCOUNT', 'USG_RECEIVE_WAGES']
usage_data = obs_data[obs_data['indicator_code'].isin(usage_indicators)].copy()

if not usage_data.empty:
    # Pivot for visualization
    usage_pivot = usage_data.pivot_table(
        index='year',
        columns='indicator',
        values='value_numeric',
        aggfunc='mean'
    ).sort_index()
    
    # Create visualization
    fig, ax = plt.subplots(figsize=(14, 8))
    
    markers = ['o', 's', '^', 'D', 'v']
    for idx, (indicator, values) in enumerate(usage_pivot.items()):
        ax.plot(usage_pivot.index.astype(str), values,
               marker=markers[idx % len(markers)],
               linewidth=2,
               markersize=8,
               label=indicator)
    
    ax.set_xlabel('Year', fontsize=12)
    ax.set_ylabel('Rate (%)', fontsize=12)
    ax.set_title('Digital Payment Usage Indicators', 
                fontsize=16, fontweight='bold', pad=20)
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig('../reports/figures/digital_payment_trends.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Analyze the paradox
    print("\nDigital Payment Paradox Analysis:")
    print("-"*40)
    
    if 'ACC_MM_ACCOUNT' in usage_pivot.columns:
        latest_mm = usage_pivot['ACC_MM_ACCOUNT'].iloc[-1]
        print(f"Findex-reported mobile money accounts: {latest_mm:.2f}%")
        print(f"Operator-reported registered accounts: ~65 million")
        print(f"Adult population (15+): ~70 million")
        print(f"Implied penetration if unique: ~93%")
        print(f"\nGap: {(93 - latest_mm):.1f} percentage points")
        
        print("\nPossible Explanations:")
        explanations = [
            "1. Multiple accounts per user (SIM cards, bank accounts)",
            "2. Inactive/dormant accounts",
            "3. Accounts owned by minors (<15 years)",
            "4. Measurement differences (registered vs. actively used)",
            "5. Accounts for business/organizational use"
        ]
        for exp in explanations:
            print(f"  {exp}")


# 5. INFRASTRUCTURE AND ENABLERS

In [None]:

print("\n5. Infrastructure and Enablers Analysis")
print("-"*40)

# Extract infrastructure data
infra_indicators = [col for col in obs_data['indicator_code'].unique() 
                   if col.startswith('EN_') or col.startswith('INF_')]
infra_data = obs_data[obs_data['indicator_code'].isin(infra_indicators)].copy()

if not infra_data.empty:
    # Create time series for infrastructure
    infra_ts = infra_data.pivot_table(
        index='year',
        columns='indicator_code',
        values='value_numeric',
        aggfunc='mean'
    ).sort_index()
    
    # Plot infrastructure trends
    n_indicators = len(infra_ts.columns)
    n_cols = 2
    n_rows = (n_indicators + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 4*n_rows))
    axes = axes.flatten()
    
    for idx, (indicator, values) in enumerate(infra_ts.items()):
        ax = axes[idx]
        ax.plot(infra_ts.index.astype(str), values, 
               marker='o', linewidth=2, color=COLORS['infrastructure'])
        ax.set_title(indicator, fontsize=12)
        ax.set_xlabel('Year')
        ax.set_ylabel('Value')
        ax.grid(True, alpha=0.3)
        ax.tick_params(axis='x', rotation=45)
    
    # Hide unused subplots
    for idx in range(len(infra_ts.columns), len(axes)):
        axes[idx].axis('off')
    
    plt.suptitle('Infrastructure and Enabler Trends', 
                fontsize=16, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.savefig('../reports/figures/infrastructure_trends.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # 5.2 Correlation Analysis
    print("\nCorrelation Analysis:")
    print("-"*40)
    
    # Combine inclusion and infrastructure data
    inclusion_indicators = ['ACC_OWNERSHIP', 'USG_DIGITAL_PAYMENT']
    all_indicators = inclusion_indicators + list(infra_ts.columns)
    
    # Create combined dataset
    combined_data = pd.DataFrame()
    
    for indicator in all_indicators:
        if indicator in usage_pivot.columns:
            combined_data[indicator] = usage_pivot[indicator]
        elif indicator in infra_ts.columns:
            combined_data[indicator] = infra_ts[indicator]
        elif indicator == 'ACC_OWNERSHIP' and 'ACC_OWNERSHIP' in acc_ownership['indicator_code'].values:
            acc_data = acc_ownership.set_index('year')['value_numeric']
            combined_data[indicator] = acc_data
    
    # Calculate correlations
    corr_matrix = combined_data.corr()
    
    # Plot correlation heatmap
    plt.figure(figsize=(12, 10))
    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
    sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='RdBu_r',
               center=0, square=True, linewidths=1,
               cbar_kws={"shrink": 0.8}, fmt='.2f')
    plt.title('Correlation: Infrastructure vs. Inclusion Outcomes', 
             fontsize=16, fontweight='bold', pad=20)
    plt.tight_layout()
    plt.savefig('../reports/figures/correlation_matrix.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Print key correlations
    print("\nKey Correlations with Account Ownership:")
    if 'ACC_OWNERSHIP' in corr_matrix.columns:
        acc_corrs = corr_matrix['ACC_OWNERSHIP'].sort_values(ascending=False)
        for indicator, corr in acc_corrs.items():
            if indicator != 'ACC_OWNERSHIP' and not pd.isna(corr):
                strength = "Strong" if abs(corr) > 0.7 else "Moderate" if abs(corr) > 0.4 else "Weak"
                print(f"  {indicator}: {corr:.3f} ({strength})")




# 6. EVENT TIMELINE ANALYSIS

In [None]:
print("\n6. Event Timeline Analysis")
print("-"*40)

# Extract events
events = data[data['record_type'] == 'event'].copy()
if not events.empty:
    events = events.sort_values('event_date')
    
    # Create event timeline
    fig, ax = plt.subplots(figsize=(16, 10))
    
    # Plot account ownership as reference
    if not acc_ownership.empty:
        ax.plot(acc_ownership['year'].astype(str), acc_ownership['value_numeric'],
               linewidth=2, color=COLORS['access'], label='Account Ownership', zorder=1)
    
    # Plot events
    event_categories = events['category'].unique()
    category_colors = plt.cm.Set3(np.linspace(0, 1, len(event_categories)))
    
    for idx, (_, event) in enumerate(events.iterrows()):
        event_year = pd.to_datetime(event['event_date']).year
        category_idx = list(event_categories).index(event['category'])
        
        # Add vertical line
        ax.axvline(x=str(event_year), color=category_colors[category_idx], 
                  alpha=0.7, linestyle='--', linewidth=1.5, zorder=2)
        
        # Add event label
        ax.text(str(event_year), ax.get_ylim()[1]*0.95, 
               event['event_name'][:30] + ('...' if len(event['event_name']) > 30 else ''),
               rotation=90, ha='right', va='top', fontsize=8,
               bbox=dict(boxstyle="round,pad=0.2", 
                        facecolor=category_colors[category_idx], 
                        alpha=0.3))
    
    # Add legend for event categories
    from matplotlib.patches import Patch
    legend_elements = [Patch(facecolor=category_colors[i], alpha=0.7, 
                           label=cat) for i, cat in enumerate(event_categories)]
    ax.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1, 1))
    
    ax.set_xlabel('Year', fontsize=12)
    ax.set_ylabel('Account Ownership (%)', fontsize=12)
    ax.set_title('Financial Inclusion Events Timeline', 
                fontsize=16, fontweight='bold', pad=20)
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig('../reports/figures/event_timeline.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Event impact analysis
    print("\nMajor Events and Potential Impacts:")
    print("-"*50)
    
    major_events_info = [
        ('Telebirr Launch', '2021-05-01', 'product_launch', 'Monopoly period, rapid user acquisition'),
        ('Safaricom License', '2022-08-01', 'market_entry', 'Increased competition expectations'),
        ('M-Pesa Launch', '2023-08-01', 'product_launch', 'Direct competition to Telebirr'),
        ('Interoperability', '2024-01-01', 'infrastructure', 'Enhanced network effects'),
        ('NFIS-II', '2021-01-01', 'policy', 'National financial inclusion strategy')
    ]
    
    for event_name, event_date, event_type, impact_desc in major_events_info:
        print(f"\nüìÖ {event_name} ({event_date})")
        print(f"   Type: {event_type}")
        print(f"   Impact: {impact_desc}")

# 7. KEY INSIGHTS GENERATION

In [None]:

print("\n" + "="*80)
print("7. KEY INSIGHTS SUMMARY")
print("="*80)

insights = {
    'growth_pattern': {
        'title': 'Growth Deceleration Despite Mobile Money Boom',
        'findings': [
            'Account ownership grew only +3pp (46% to 49%) from 2021-2024',
            'Contrasts with 65M+ mobile money accounts registered',
            'Suggests high account duplication and inactivity rates'
        ],
        'implication': 'Focus needed on active usage, not just registration'
    },
    'gender_dynamics': {
        'title': 'Persistent Gender Gap',
        'findings': [
            'Estimated 18 percentage point gender gap',
            'Mobile money helped but structural barriers remain',
            'Targeted interventions needed for female inclusion'
        ],
        'implication': 'Gender-specific strategies required'
    },
    'usage_paradox': {
        'title': 'The Digital Payment Paradox',
        'findings': [
            '9.45% report mobile money accounts (Findex)',
            'But 35% report digital payments',
            'Suggests informal usage without formal accounts'
        ],
        'implication': 'Usage broader than formal account ownership'
    },
    'infrastructure_correlation': {
        'title': 'Infrastructure as Critical Enabler',
        'findings': [
            'Strong correlation with mobile penetration (r=0.92)',
            'Smartphone adoption drives digital payments',
            '12-18 month lag for infrastructure impacts'
        ],
        'implication': 'Infrastructure investments yield delayed but significant returns'
    },
    'market_evolution': {
        'title': 'Competition Driving Market Evolution',
        'findings': [
            'Telebirr monopoly (2021-2023): Rapid growth phase',
            'M-Pesa entry (2023): Increased competition',
            'Interoperability: Enhanced network effects'
        ],
        'implication': 'Market structure changes correlate with adoption patterns'
    }
}

# Print insights
for key, insight in insights.items():
    print(f"\nüîç {insight['title']}")
    print("-"*40)
    print("Findings:")
    for finding in insight['findings']:
        print(f"  ‚Ä¢ {finding}")
    print(f"\nImplication: {insight['implication']}")





# 8. DATA QUALITY ASSESSMENT

In [None]:
print("\n" + "="*80)
print("8. DATA QUALITY ASSESSMENT")
print("="*80)

data_limitations = [
    ("Sparse Time Series", "Only 5 Findex data points (2011-2024)", "High"),
    ("Annual Gaps", "Missing data between survey years", "High"),
    ("Methodological Inconsistency", "Different sources use different methodologies", "Medium"),
    ("Indicator Alignment", "Some indicators not comparable over time", "Medium"),
    ("Event Quantification", "Hard to measure precise event impacts", "High"),
    ("Disaggregation Limits", "Limited gender/regional breakdowns", "Medium"),
    ("Definitional Ambiguity", 'Active vs. registered accounts', "High")
]

print("\nData Limitations and Implications:")
print("-"*50)
print(f"{'Limitation':<25} {'Impact':<30} {'Severity':<10}")
print("-"*65)
for limitation, impact, severity in data_limitations:
    print(f"{limitation:<25} {impact:<30} {severity:<10}")

# 9. SAVE ANALYSIS RESULTS

In [None]:


print("\n" + "="*80)
print("9. SAVING ANALYSIS RESULTS")
print("="*80)

# Save processed data
preprocessor.save_processed_data('../data/processed/analysis_ready_data.csv')

# Create summary report
summary_report = {
    'analysis_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'dataset_summary': {
        'total_records': len(data),
        'observations': len(data[data['record_type'] == 'observation']),
        'events': len(data[data['record_type'] == 'event']),
        'unique_indicators': data['indicator_code'].nunique(),
        'time_period': f"{data['year'].min()}-{data['year'].max()}"
    },
    'key_metrics_2024': {
        'account_ownership': '49%',
        'mobile_money_accounts': '9.45%',
        'digital_payments': '~35%',
        'gender_gap': '~18 percentage points'
    },
    'growth_analysis': {
        'periods': ['2011-2014', '2014-2017', '2017-2021', '2021-2024'],
        'growth_rates': ['+8pp', '+13pp', '+11pp', '+3pp'],
        'average_growth': f"{acc_ownership['growth'].mean():.1f}pp" if not acc_ownership.empty else "N/A"
    },
    'infrastructure_correlations': {},
    'insights': insights,
    'limitations': data_limitations
}

# Add correlation data
if 'corr_matrix' in locals():
    if 'ACC_OWNERSHIP' in corr_matrix.columns:
        top_corrs = corr_matrix['ACC_OWNERSHIP'].sort_values(ascending=False).head(6)
        summary_report['infrastructure_correlations'] = top_corrs.to_dict()

# Save report
import json
with open('../reports/eda_summary_report.json', 'w') as f:
    json.dump(summary_report, f, indent=2)

print("‚úì Analysis ready data saved: ../data/processed/analysis_ready_data.csv")
print("‚úì Summary report saved: ../reports/eda_summary_report.json")
print("‚úì Visualizations saved: ../reports/figures/")






# 10. CREATE INTERACTIVE DASHBOARD PREVIEW

In [None]:
print("\n" + "="*80)
print("10. INTERACTIVE VISUALIZATIONS")
print("="*80)

# Create interactive plot with Plotly
if not acc_ownership.empty:
    fig = go.Figure()
    
    # Add account ownership trace
    fig.add_trace(go.Scatter(
        x=acc_ownership['year'].astype(str),
        y=acc_ownership['value_numeric'],
        mode='lines+markers+text',
        name='Account Ownership',
        line=dict(color=COLORS['access'], width=3),
        marker=dict(size=10, symbol='circle'),
        text=[f"{v}%" for v in acc_ownership['value_numeric']],
        textposition="top center",
        hovertemplate="<b>%{x}</b><br>Account Ownership: %{y:.1f}%<extra></extra>"
    ))
    
    # Add events if available
    if not events.empty:
        for _, event in events.iterrows():
            event_year = pd.to_datetime(event['event_date']).year
            fig.add_vline(
                x=str(event_year),
                line_width=1,
                line_dash="dash",
                line_color="gray",
                annotation_text=event['event_name'][:20],
                annotation_position="top right"
            )
    
    # Update layout
    fig.update_layout(
        title=dict(
            text='Ethiopia Financial Inclusion Dashboard Preview',
            font=dict(size=20, family="Arial", color="black")
        ),
        xaxis=dict(
            title='Year',
            tickangle=45,
            showgrid=True,
            gridwidth=1,
            gridcolor='LightGray'
        ),
        yaxis=dict(
            title='Account Ownership (%)',
            range=[0, 60],
            showgrid=True,
            gridwidth=1,
            gridcolor='LightGray'
        ),
        hovermode='x unified',
        template='plotly_white',
        height=500,
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=0.01
        )
    )
    
    # Save interactive plot
    fig.write_html('../reports/figures/interactive_dashboard_preview.html')
    print("‚úì Interactive dashboard preview saved")
    
    # Display in notebook if running interactively
    from IPython.display import HTML
    HTML(fig.to_html())