In [9]:
#importing libraries
import requests
import pandas as pd
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas
import geopandas as gpd
from io import StringIO

output_folder_path = os.path.join('..', '..','data','02_intermediate_output')

To access the ACLED API, you need to provide your email and api key. For more information and to acquire data from their API, visit [ACLED](https://acleddata.com/).

In [None]:
# Enter your API key and email
api_key = "your_key"
email = "your_email" 
region = "desired_region" # used>  15 for south america 

# we can filter by country, state & region 
start_date = "start_date" # used> 2017-01-01
end_date = "end_date" #  used>  2023-10-01
limit = "limit"  #default is set to 5000 data , used 10000 for this example

In [6]:
#request api
url = f"https://api.acleddata.com/acled/read.csv?key={api_key}&email={email}&region={region}&event_date={start_date}|{end_date}&event_date_where=BETWEEN&limit={limit}"
response = requests.get(url)

# Read data into a DataFrame
df = pd.read_csv(StringIO(response.text))
df.head()

Unnamed: 0,event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,...,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp
0,BOL4947,2023-10-01,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Bolivia),MAS-IPSP: Movement for Socialism-Political Ins...,6,...,Lauca Ene,-16.9947,-65.2268,1,Los Tiempos (Bolivia); Opinion (Bolivia),National,"On 1 October 2023, in Lauca Ene (Cochabamba), ...",0,crowd size=no report,1696879883
1,BOL4962,2023-10-01,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Bolivia),Indigenous Group (Bolivia),6,...,El Puente,-16.3262,-62.9067,1,El Deber; El Diario (Bolivia); Los Tiempos (Bo...,National,"On 1 October 2023, in El Puente (Santa Cruz), ...",0,crowd size=no report,1696879883
2,BRA75633,2023-10-01,2023,1,Political violence,Violence against civilians,Attack,Unidentified Armed Group (Brazil),,3,...,Caxias do Sul,-29.1681,-51.1794,1,Correio do Povo; G1,Subnational-National,"On 1 October 2023, in Caxias do Sul (Rio Grand...",1,,1696879883
3,BRA75641,2023-10-01,2023,1,Political violence,Battles,Armed clash,TCP: Pure Third Command,,3,...,Duque de Caxias,-22.7856,-43.3117,1,Crimes News RJ,New media,"On 1 October 2023, in Duque de Caxias (Rio de ...",0,,1696879883
4,BRA75650,2023-10-01,2023,1,Political violence,Riots,Mob violence,Rioters (Brazil),Vigilante Group (Brazil),5,...,Girau do Ponciano,-9.8842,-36.8289,1,Alagoas 24 Horas,Subnational,"On 1 October 2023, in Girau do Ponciano (Alago...",0,crowd size=no report,1696879883


In [8]:

# Create a GeoDataFrame from the DataFrame
geometry = gpd.points_from_xy(df['longitude'].astype(float), df['latitude'].astype(float))
gdf = gpd.GeoDataFrame(df, geometry=geometry)

# Count the number of events at each location
location_counts = gdf.groupby(['latitude', 'longitude', 'event_type']).size().reset_index(name='counts')

# Merge the counts back into the GeoDataFrame
gdf = gdf.merge(location_counts, on=['latitude', 'longitude', 'event_type'])

# Sort the GeoDataFrame by counts in descending order
gdf = gdf.sort_values(by='counts', ascending=False)

# Group by location and select the row with the highest count (dominant event type)
agg_gdf = gdf.groupby(['latitude', 'longitude']).apply(lambda x: x.nlargest(1, 'counts')).reset_index(drop=True)

# Create a new GeoDataFrame with aggregated information
agg_gdf = agg_gdf[['latitude', 'longitude', 'geometry', 'counts', 'event_type']].copy()

agg_gdf.head()

Unnamed: 0,latitude,longitude,geometry,counts,event_type
0,-54.8062,-68.3102,POINT (-68.31020 -54.80620),2,Protests
1,-54.5119,-67.1971,POINT (-67.19710 -54.51190),2,Protests
2,-53.7865,-67.7105,POINT (-67.71050 -53.78650),2,Protests
3,-53.1548,-70.9113,POINT (-70.91130 -53.15480),1,Protests
4,-51.6268,-69.2262,POINT (-69.22620 -51.62680),5,Protests


In [None]:
# Save the aggregated GeoDataFrame to a GeoPackage file
output_filepath_aggregated = os.path.join(output_folder_path, 'ACLED_south_america.gpkg')
agg_gdf.to_file(output_filepath_aggregated, driver='GPKG')

In [None]:
# Save the seperate events  to a GeoPackage files
for event_type in agg_gdf['event_type'].unique():
    event_type_gdf = agg_gdf[agg_gdf['event_type'] == event_type]
    output_filepath_event_type = os.path.join(output_folder_path, f'{event_type}_data.gpkg')
    event_type_gdf.to_file(output_filepath_event_type, driver='GPKG')
    print(f"Saved GeoPackage for event type '{event_type}' to {output_filepath_event_type}")