In [32]:
from pathlib import Path
import pandas as pd
import sys
# Add project root to Python path
PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT))

print("Project root added:", PROJECT_ROOT)



Project root added: c:\Users\user\Desktop\Ethiopia-fi-forecast


In [33]:
from src.data_loader import (
    load_unified_data,
    load_reference_codes,
    load_impact_links
)
from src.validators import run_all_validations
from src.enrichment import add_observation

BASE_PATH = Path("../data/raw")

 Load datasets

In [34]:
DATA_PATH = Path("../data/raw")

data = pd.read_csv(DATA_PATH / "ethiopia_fi_unified_data.csv")
refs = pd.read_csv(DATA_PATH / "reference_codes.csv")
impact_links = pd.read_csv(DATA_PATH / "impact_links.csv")

data["observation_date"] = pd.to_datetime(
    data["observation_date"], errors="coerce"
)

print(data.shape, refs.shape, impact_links.shape)

(43, 34) (71, 4) (35, 8)


In [35]:

# Run validations
run_all_validations(data)

Exploration


In [36]:
display(data["record_type"].value_counts())
display(data.groupby("pillar")["record_id"].count())
display(data["confidence"].value_counts())

print("Temporal range:",
    data["observation_date"].min(),
    "→",
    data["observation_date"].max())

display(
    data.groupby("indicator_code")
        .agg(count=("record_id", "count"),
            first_year=("observation_date", "min"),
            last_year=("observation_date", "max"))
        .reset_index()
)


record_type
observation    30
event          10
target          3
Name: count, dtype: int64

pillar
ACCESS           16
AFFORDABILITY     1
GENDER            5
USAGE            11
Name: record_id, dtype: int64

confidence
high      40
medium     3
Name: count, dtype: int64

Temporal range: 2014-12-31 00:00:00 → 2030-12-31 00:00:00


Unnamed: 0,indicator_code,count,first_year,last_year
0,ACC_4G_COV,2,2023-06-30,2025-06-30
1,ACC_FAYDA,4,2024-08-15,2028-12-31
2,ACC_MM_ACCOUNT,2,2021-12-31,2024-11-29
3,ACC_MOBILE_PEN,1,2025-12-31,2025-12-31
4,ACC_OWNERSHIP,7,2014-12-31,2025-12-31
5,AFF_DATA_INCOME,1,2024-12-31,2024-12-31
6,EVT_CROSSOVER,1,2024-10-01,2024-10-01
7,EVT_ETHIOPAY,1,2025-12-18,2025-12-18
8,EVT_FAYDA,1,2024-01-01,2024-01-01
9,EVT_FX_REFORM,1,2024-07-29,2024-07-29


Enrichment Example

In [37]:
new_obs = {
    "pillar": "usage",
    "indicator": "Smartphone penetration",
    "indicator_code": "ENB_SMARTPHONE",
    "value_numeric": 38.0,
    "observation_date": "2023-12-31",
    "source_name": "GSMA",
    "source_url": "https://www.gsma.com",
    "confidence": "medium",
    "original_text": "Smartphone adoption reached 38% in Ethiopia in 2023",
    "collected_by": "Your Name",
    "collection_date": "2026-01-30",
    "notes": "Key enabler for digital payment usage"
}

data = add_observation(data, new_obs)

Re-validate

In [38]:
run_all_validations(data)

Save processed data

In [31]:
OUT_PATH = Path("../data/processed")
OUT_PATH.mkdir(exist_ok=True)

data.to_csv(OUT_PATH / "ethiopia_fi_unified_data_enriched.csv", index=False)

print("✅ Task 1 completed successfully")

✅ Task 1 completed successfully
