In [3]:
from pathlib import Path
import pandas as pd

ROOT = Path.cwd()
if not (ROOT / "data" / "processed").exists() and (ROOT.parent / "data" / "processed").exists():
    ROOT = ROOT.parent

PROCESSED = ROOT / "data" / "processed"

print("ROOT:", ROOT)
print("Processed files:", [p.name for p in PROCESSED.glob("*.csv")])

df = pd.read_csv(PROCESSED / "ethiopia_fi_unified_data_enriched.csv")
impact_matrix = pd.read_csv(PROCESSED / "event_indicator_matrix.csv")

print("df:", df.shape)
print("impact_matrix:", impact_matrix.shape)
df.head()


ROOT: c:\Users\Hp\Desktop\ethiopia-fi-forecast
Processed files: ['ethiopia_fi_unified_data_enriched.csv', 'event_indicator_matrix.csv']
df: (46, 34)
impact_matrix: (5, 8)


Unnamed: 0,record_id,record_type,category,pillar,indicator,indicator_code,indicator_direction,value_numeric,value_text,value_type,...,impact_direction,impact_magnitude,impact_estimate,lag_months,evidence_basis,comparable_country,collected_by,collection_date,original_text,notes
0,REC_0001,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,22.0,,percentage,...,,,,,,Example_Trainee,2025-01-20 00:00:00,,Baseline year,
1,REC_0002,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,35.0,,percentage,...,,,,,,Example_Trainee,2025-01-20 00:00:00,,,
2,REC_0003,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,46.0,,percentage,...,,,,,,Example_Trainee,2025-01-20 00:00:00,,,
3,REC_0004,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,56.0,,percentage,...,,,,,,Example_Trainee,2025-01-20 00:00:00,,Gender disaggregated,
4,REC_0005,observation,,ACCESS,Account Ownership Rate,ACC_OWNERSHIP,higher_better,36.0,,percentage,...,,,,,,Example_Trainee,2025-01-20 00:00:00,,Gender disaggregated,


In [4]:
events = df[df["record_type"]=="event"].copy()

print("Event rows:", events.shape[0])
display(events["category"].value_counts().head(20))

display(events[["record_id","category","indicator","observation_date","source_name","confidence"]].head(10))


Event rows: 11


category
infrastructure    3
product_launch    2
policy            2
market_entry      1
milestone         1
partnership       1
pricing           1
Name: count, dtype: int64

Unnamed: 0,record_id,category,indicator,observation_date,source_name,confidence
33,EVT_0001,product_launch,Telebirr Launch,2021-05-17 00:00:00,Ethio Telecom,high
34,EVT_0002,market_entry,Safaricom Ethiopia Commercial Launch,2022-08-01 00:00:00,News,high
35,EVT_0003,product_launch,M-Pesa Ethiopia Launch,2023-08-01 00:00:00,Safaricom,high
36,EVT_0004,infrastructure,Fayda Digital ID Program Rollout,2024-01-01 00:00:00,NIDP,high
37,EVT_0005,policy,Foreign Exchange Liberalization,2024-07-29 00:00:00,NBE,high
38,EVT_0006,milestone,P2P Transaction Count Surpasses ATM,2024-10-01 00:00:00,EthSwitch,high
39,EVT_0007,partnership,M-Pesa EthSwitch Integration,2025-10-27 00:00:00,EthSwitch,high
40,EVT_0008,infrastructure,EthioPay Instant Payment System Launch,2025-12-18 00:00:00,NBE/EthSwitch,high
41,EVT_0009,policy,NFIS-II Strategy Launch,2021-09-01 00:00:00,NBE,high
42,EVT_0010,pricing,Safaricom Ethiopia Price Increase,2025-12-15 00:00:00,News,high


In [7]:
event_impacts = events.merge(
    impact_matrix,
    left_on="category",
    right_on="event_category",
    how="inner"
)

print("event_impacts:", event_impacts.shape)
print(event_impacts.columns.tolist())

# After merge, pandas often creates _x/_y columns when names overlap.
# We'll auto-pick the matrix versions (_y) when they exist.

cols = [
    "record_id",
    "category",
    "indicator",           # event title (in your schema)
    "observation_date",    # event date (in your schema)
    "indicator_code_y" if "indicator_code_y" in event_impacts.columns else "indicator_code",
    "impact_direction_y" if "impact_direction_y" in event_impacts.columns else "impact_direction",
    "impact_strength",
    "lag_months_y" if "lag_months_y" in event_impacts.columns else "lag_months",
]

# confidence can appear as confidence_x / confidence_y depending on overlap
if "confidence_y" in event_impacts.columns:
    cols.append("confidence_y")
elif "confidence" in event_impacts.columns:
    cols.append("confidence")
elif "confidence_x" in event_impacts.columns:
    cols.append("confidence_x")

display(event_impacts[cols].head(20))


event_impacts: (13, 42)
['record_id', 'record_type', 'category', 'pillar', 'indicator', 'indicator_code_x', 'indicator_direction', 'value_numeric', 'value_text', 'value_type', 'unit', 'observation_date', 'period_start', 'period_end', 'fiscal_year', 'gender', 'location', 'region', 'source_name', 'source_type', 'source_url', 'confidence_x', 'related_indicator', 'relationship_type', 'impact_direction_x', 'impact_magnitude', 'impact_estimate', 'lag_months_x', 'evidence_basis', 'comparable_country', 'collected_by', 'collection_date', 'original_text', 'notes', 'event_category', 'event_indicator_proxy', 'indicator_code_y', 'impact_direction_y', 'impact_strength', 'lag_months_y', 'confidence_y', 'rationale']


Unnamed: 0,record_id,category,indicator,observation_date,indicator_code_y,impact_direction_y,impact_strength,lag_months_y,confidence_y
0,EVT_0001,product_launch,Telebirr Launch,2021-05-17 00:00:00,USG_DIGITAL_PAYMENT,+,high,3,high
1,EVT_0003,product_launch,M-Pesa Ethiopia Launch,2023-08-01 00:00:00,USG_DIGITAL_PAYMENT,+,high,3,high
2,EVT_0004,infrastructure,Fayda Digital ID Program Rollout,2024-01-01 00:00:00,ACC_ACCOUNT_OWNERSHIP,+,medium,6,medium
3,EVT_0004,infrastructure,Fayda Digital ID Program Rollout,2024-01-01 00:00:00,USG_DIGITAL_PAYMENT,+,low,9,medium
4,EVT_0004,infrastructure,Fayda Digital ID Program Rollout,2024-01-01 00:00:00,ACC_ACCOUNT_OWNERSHIP,+,low,12,low
5,EVT_0005,policy,Foreign Exchange Liberalization,2024-07-29 00:00:00,USG_DIGITAL_PAYMENT,+,medium,6,medium
6,EVT_0008,infrastructure,EthioPay Instant Payment System Launch,2025-12-18 00:00:00,ACC_ACCOUNT_OWNERSHIP,+,medium,6,medium
7,EVT_0008,infrastructure,EthioPay Instant Payment System Launch,2025-12-18 00:00:00,USG_DIGITAL_PAYMENT,+,low,9,medium
8,EVT_0008,infrastructure,EthioPay Instant Payment System Launch,2025-12-18 00:00:00,ACC_ACCOUNT_OWNERSHIP,+,low,12,low
9,EVT_0009,policy,NFIS-II Strategy Launch,2021-09-01 00:00:00,USG_DIGITAL_PAYMENT,+,medium,6,medium


In [9]:
from pathlib import Path

# Ensure we are saving to the correct repo-level data/processed folder
ROOT = Path.cwd()
if not (ROOT / "data" / "processed").exists() and (ROOT.parent / "data" / "processed").exists():
    ROOT = ROOT.parent

PROCESSED = ROOT / "data" / "processed"
PROCESSED.mkdir(parents=True, exist_ok=True)

event_impacts.to_csv(PROCESSED / "event_impacts_expanded.csv", index=False)
print("Saved:", PROCESSED / "event_impacts_expanded.csv")


Saved: c:\Users\Hp\Desktop\ethiopia-fi-forecast\data\processed\event_impacts_expanded.csv
