In [None]:
#!/usr/bin/env python3
import asyncio
import os
from typing import Optional

from config.secrets_config import SecretManager
import polars as pl
import requests

from dotenv import load_dotenv
import os

load_dotenv()

# ——— Configuration ———
from config.secrets_config import SecretManager

sm = SecretManager(region_name="eu-north-1")

API_KEY = sm.get_secret('ACLED-API')
EMAIL   = "s.a.bojilov@umail.leidenuniv.nl" 

ACLED_API_BASE      = "https://api.acleddata.com"
ACLED_EVENT_ENDPOINT = "/acled/read"


def fetch_acled_data(
    api_key: str,
    email: str,
    limit: int = 5000,
    max_pages: int = 2,
    query_params: dict | None = None,
) -> pl.DataFrame:
    all_rows = []
    page = 1
    while page <= max_pages:
        params = {"key": api_key, "email": email, "limit": limit, "page": page}
        if query_params:
            params.update(query_params)

        url = ACLED_API_BASE + ACLED_EVENT_ENDPOINT
        resp = requests.get(url, params=params)
        resp.raise_for_status()

        payload = resp.json()
        rows = payload.get("data", [])
        if not rows:
            break

        all_rows.extend(rows)
        if len(rows) < limit:
            break
        page += 1

    return pl.DataFrame(all_rows)


if __name__ == "__main__":
    # Simple filter: ISO code 100, year 2024
    df = fetch_acled_data(
        api_key=API_KEY,
        email=EMAIL,
        limit=2000,
        max_pages=3,
        query_params={"iso": 804, "year": [2025]},
    )

    print(f"Fetched {len(df)} rows from ACLED")
    print(df.head(), df.describe())
    df.write_excel("acled_data.xlsx")

Fetched 15000 rows from ACLED
shape: (5, 31)
┌─────────────┬────────────┬──────┬─────────────┬───┬─────────────┬────────────┬──────┬────────────┐
│ event_id_cn ┆ event_date ┆ year ┆ time_precis ┆ … ┆ notes       ┆ fatalities ┆ tags ┆ timestamp  │
│ ty          ┆ ---        ┆ ---  ┆ ion         ┆   ┆ ---         ┆ ---        ┆ ---  ┆ ---        │
│ ---         ┆ str        ┆ str  ┆ ---         ┆   ┆ str         ┆ str        ┆ str  ┆ str        │
│ str         ┆            ┆      ┆ str         ┆   ┆             ┆            ┆      ┆            │
╞═════════════╪════════════╪══════╪═════════════╪═══╪═════════════╪════════════╪══════╪════════════╡
│ UKR241471   ┆ 2025-06-27 ┆ 2025 ┆ 1           ┆ … ┆ On 27 June  ┆ 0          ┆      ┆ 1751390103 │
│             ┆            ┆      ┆             ┆   ┆ 2025,       ┆            ┆      ┆            │
│             ┆            ┆      ┆             ┆   ┆ Russian     ┆            ┆      ┆            │
│             ┆            ┆      ┆           

In [4]:
import aiohttp
import asyncio
import polars as pl

async def fetch_page(session: aiohttp.ClientSession, url: str, params: dict) -> list[dict]:
    async with session.get(url, params=params) as resp:
        resp.raise_for_status()
        payload = await resp.json()
        return payload.get("data", [])

async def fetch_acled_data_async(
    api_key: str,
    email: str,
    limit: int = 5000,
    max_pages: int = 2,
    query_params: dict | None = None,
) -> pl.DataFrame:
    all_rows = []
    url = ACLED_API_BASE + ACLED_EVENT_ENDPOINT

    async with aiohttp.ClientSession() as session:
        for page in range(1, max_pages + 1):
            params = {"key": api_key, "email": email, "limit": limit, "page": page}
            if query_params:
                params.update(query_params)

            try:
                rows = await fetch_page(session, url, params)
                if not rows:
                    break
                all_rows.extend(rows)
                if len(rows) < limit:
                    break
            except Exception as e:
                print(f"Error on page {page}: {e}")
                break

    return pl.DataFrame(all_rows)

In [8]:
df = await fetch_acled_data_async(
    api_key=API_KEY,
    email=EMAIL,
    limit=1000,
    max_pages=3,
    query_params={"iso": 804, "year": 2024}
)
df.shape

(15000, 31)