In [1]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os
from pathlib import Path

env_path = Path('..') / '.env'
load_dotenv(dotenv_path=env_path)

#access env variables
key = os.getenv('ACLED_API_KEY')
email = os.getenv('ACLED_API_EMAIL')

# set variables - begin
start_date = "2025-01-01"
end_date = "2025-12-31"
# set variables - end

#per_page = 5000 - API default/fixed value, can't be changed
page = 1
df = pd.DataFrame()

while True:
        
    # API endpoint and parameters
    url = "https://api.acleddata.com/acled/read"
    params = {
        "key": key,
        "email": email,
        "page": page,
        "event_date": start_date+"|"+end_date,
        "event_date_where": "BETWEEN"
    }

    # Make the API request
    response = requests.get(url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()

        # Extract the status attribute as the response.status_code do not cover all error messages
        if data["status"] != 200:
            print("API request failed with status code "+str(data["status"])+": "+response.text)
            break
        
        # Extract the data attribute 
        if "data" in data:
            results = data["data"]
            
            # Convert the results array into a DataFrame
            df_paging = pd.DataFrame(results)

            # Break the loop if no items are returned
            if len(df_paging) == 0:
                break

            dtype_dict = {
                "event_id_cnty": "string",
                "event_date": "datetime64[ns]",
                "year": "int",
                "time_precision": "int",
                "disorder_type": "string",
                "event_type": "string",
                "sub_event_type": "string",
                "actor1": "string",
                "assoc_actor_1": "string",
                "inter1": "string",
                "actor2": "string",
                "assoc_actor_2": "string",
                "inter2": "string",
                "interaction": "string",
                "civilian_targeting": "string",
                "iso": "int",
                "region": "string",
                "country": "string",
                "admin1": "string",
                "admin2": "string",
                "admin3": "string",
                "location": "string",
                "latitude": "float", 
                "longitude": "float",
                "geo_precision": "int",
                "source": "string",
                "source_scale": "string",
                "notes": "string",
                "fatalities": "int",
                "tags": "string",
                "timestamp": "string",
            }
            
            # convert df data types into data dictionary types
            df_paging = df_paging.astype(dtype_dict)

            # merge df pagings
            df = pd.concat([df, df_paging], ignore_index=True)

            print("page: "+str(page)+" - number of rows: "+str(len(df_paging)))

        else:
            print("The 'data' attribute was not found in the response.")
            break
    else:
        print("API request failed with status code {response.status_code}: {response.text}")
        break

    # Move to the next page
    page += 1

print("Total number of rows in the dataframe: "+str(len(df)))
print("Total memory used by the dataframe: "+str(df.memory_usage(deep=True).sum() / (1024**2))+"  MB")  # in MB

df.to_csv("raw_acled_data_"+start_date+"_"+end_date+".csv")

page: 1 - number of rows: 5000
page: 2 - number of rows: 5000
page: 3 - number of rows: 5000
page: 4 - number of rows: 5000
page: 5 - number of rows: 5000
page: 6 - number of rows: 5000
page: 7 - number of rows: 5000
page: 8 - number of rows: 5000
page: 9 - number of rows: 5000
page: 10 - number of rows: 5000
page: 11 - number of rows: 5000
page: 12 - number of rows: 5000
page: 13 - number of rows: 5000
page: 14 - number of rows: 5000
page: 15 - number of rows: 5000
page: 16 - number of rows: 5000
page: 17 - number of rows: 5000
page: 18 - number of rows: 5000
page: 19 - number of rows: 1057
Total number of rows in the dataframe: 91057
Total memory used by the dataframe: 169.01250171661377  MB
