In [1]:
import os
import math
import numpy as np
import pandas as pd
import folium
from folium.plugins import HeatMapWithTime
import geopandas as gpd
from shapely.geometry import Point
import folium
from folium.plugins import HeatMap

from wsi.mapping.iso_name import ISO_NAME
from wsi.mapping.iso_gw import ISO_GW
from wsi.mapping.iso_iso2 import ISO_ISO2
from wsi.utils import raw_data_path, processed_data_path

In [2]:
import os
import math
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.prepared import prep

# Constants
EARTH_RADIUS_KM = 6371
FILE_PATTERN = "gpw_v4_population_count_adjusted_to_2015_unwpp_country_totals_rev11_2020_30_sec_{tile}.asc"

def read_population_count(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return None
    with open(file_path, 'r') as f:
        metadata = {}
        for _ in range(6):
            key, value = f.readline().strip().split()
            metadata[key.lower()] = float(value)
    data = np.loadtxt(file_path, skiprows=6)
    gt = (
        metadata['xllcorner'],
        metadata['cellsize'],
        0,
        metadata['yllcorner'] + metadata['nrows'] * metadata['cellsize'],
        0,
        -metadata['cellsize']
    )
    return {
        "file": file_path,
        "data": data,
        "geotransform": gt,
        "no_data_value": metadata['nodata_value']
    }


def prepare_pixel_grid(geotransform, shape):
    origin_x, pixel_w, _, origin_y, _, pixel_h = geotransform
    rows, cols = shape
    row_grid, col_grid = np.ogrid[0:rows, 0:cols]
    lat_grid = origin_y + row_grid * pixel_h
    lon_grid = origin_x + col_grid * pixel_w
    lat_grid = np.broadcast_to(lat_grid, (rows, cols))
    lon_grid = np.broadcast_to(lon_grid, (rows, cols))
    return lat_grid, lon_grid

def haversine_distance_vector(lat1, lon1, lat2, lon2):
    lat1, lon1 = math.radians(lat1), math.radians(lon1)
    lat2, lon2 = np.radians(lat2), np.radians(lon2)
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    return EARTH_RADIUS_KM * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

def get_population_in_conflict_area(all_data, conflict_coords, radius_km=50):
    total_population = 0
    union_grid_points = []
    radius_deg_lat = radius_km / 111.0

    for dataset in all_data:
        data = dataset["data"]
        gt = dataset["geotransform"]
        nodata = dataset["no_data_value"]
        lat_grid, lon_grid = prepare_pixel_grid(gt, data.shape)
        mask = np.zeros(data.shape, dtype=bool)

        for lat, lon in conflict_coords:
            radius_deg_lon = radius_km / (111.0 * math.cos(math.radians(lat)))
            lat_min, lat_max = lat - radius_deg_lat, lat + radius_deg_lat
            lon_min, lon_max = lon - radius_deg_lon, lon + radius_deg_lon
            conflict_mask = (
                (lat_grid >= lat_min) & (lat_grid <= lat_max) &
                (lon_grid >= lon_min) & (lon_grid <= lon_max)
            )
            dists = haversine_distance_vector(lat, lon, lat_grid[conflict_mask], lon_grid[conflict_mask])
            tmp_mask = np.zeros_like(mask)
            tmp_mask[conflict_mask] = dists <= radius_km
            mask |= tmp_mask

        valid_mask = mask & (data != nodata)
        total_population += data[valid_mask].sum()
        if np.any(valid_mask):
            union_grid_points += np.column_stack((lat_grid[valid_mask], lon_grid[valid_mask], data[valid_mask])).tolist()

    return total_population, union_grid_points

def clip_grid_points_to_country(grid_points, country_polygon):
    prepped = prep(country_polygon)
    return [pt for pt in grid_points if prepped.contains(Point(pt[1], pt[0]))]

def filter_conflicts(df, country_code, year):
    return df[(df['year'] == year) & df['country_id'].astype(str).str.contains(str(country_code))]

def get_conflict_coordinates(df):
    return df[['latitude', 'longitude']].dropna().values.tolist()



In [4]:
import os
import math
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.prepared import prep
from concurrent.futures import ThreadPoolExecutor

# Constants
EARTH_RADIUS_KM = 6371
FILE_PATTERN = "gpw_v4_population_count_adjusted_to_2015_unwpp_country_totals_rev11_2020_30_sec_{tile}.asc"

# --- All function definitions remain unchanged ---
# (read_population_count, prepare_pixel_grid, haversine_distance_vector, 
# get_population_in_conflict_area, clip_grid_points_to_country, 
# filter_conflicts, get_conflict_coordinates)

# Load shared data (outside parallel scope)
years = [1995,2008, 2009, 2011]
all_data = []
for tile in range(1, 9):
    fp = raw_data_path("shocks", "gpw-v4", FILE_PATTERN.format(tile=tile))
    result = read_population_count(fp)
    if result:
        all_data.append(result)

df_pop = pd.read_excel(
    raw_data_path("shocks", 'WPP2024_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT.xlsx'),
    sheet_name="Estimates", skiprows=16
)[['ISO3 Alpha-code', 'Year', 'Total Population, as of 1 January (thousands)']]
df_pop.columns = ['ISO_code', 'Year', 'Population']
df_pop.dropna(inplace=True)
df_pop['Year'] = df_pop['Year'].astype(int)
df_pop['Population'] *= 1000

UcdpPrioConflict_csv = pd.read_csv(raw_data_path("shocks", "UcdpPrioConflict_v24_1.csv"))
event_csv = pd.read_csv(raw_data_path("shocks", "GEDEvent_v24_1.csv"))
event_csv = event_csv[event_csv['conflict_new_id'].isin(UcdpPrioConflict_csv['conflict_id'].unique())]

countries = gpd.read_file(raw_data_path("shocks", "country_shapefiles", "World_Countries_Generalized.shp")).to_crs("EPSG:4326")

import os

def process_country_code(country_code):
    summary_rows = []
    heatmap_points = []

    iso3 = next((iso for iso, code in ISO_GW.items() if str(code) == country_code), None)
    if not iso3:
        return None

    iso2 = ISO_ISO2[iso3]
    country_gdf = countries[countries['ISO'] == iso2]
    if country_gdf.empty:
        return None

    polygon = country_gdf.geometry.iloc[0]

    for yr in years:
        conflict_df = filter_conflicts(event_csv, country_code, yr)
        conflict_df = conflict_df[conflict_df['best'] > 0]
        coords = get_conflict_coordinates(conflict_df)

        if not coords:
            pop_in_conflict = 0
            union_grid_points = []
        else:
            pop_in_conflict, union_grid_points = get_population_in_conflict_area(all_data, coords)
            union_grid_points = clip_grid_points_to_country(union_grid_points, polygon)
            pop_in_conflict = sum(pt[2] for pt in union_grid_points)
            # store grid with year tag
            for pt in union_grid_points:
                heatmap_points.append({
                    'year': yr,
                    'latitude': pt[0],
                    'longitude': pt[1],
                    'population': pt[2]
                })

        national_pop = df_pop[(df_pop['ISO_code'] == iso3) & (df_pop['Year'] == yr)]['Population']
        if not national_pop.empty and national_pop.iloc[0] > 0:
            pct = (pop_in_conflict / national_pop.iloc[0]) * 100
        else:
            pct = None

        summary_rows.append({
            'gw_code': country_code,
            'iso3': iso3,
            'year': yr,
            'pop_in_conflict': pop_in_conflict,
            'national_pop': national_pop.iloc[0] if not national_pop.empty else None,
            'percent': pct
        })

    # Save individual files
    pd.DataFrame(summary_rows).to_csv(processed_data_path("shocks", "proximity_conflict", f"conflict_summary_{iso3}.csv"),index=False)
    pd.DataFrame(heatmap_points).to_csv(processed_data_path("shocks", "proximity_conflict", f"heatmap_grid_{iso3}.csv"),index=False)

    return iso3


# Parallel execution
valid_gw_codes = ["811", "840", "850", "900"]  # Cambodia, Phillipines, Indonesia, Australia
valid_gw_codes = ["900"]

from concurrent.futures import ThreadPoolExecutor, as_completed

completed = []

with ThreadPoolExecutor() as executor:
    futures = {executor.submit(process_country_code, code): code for code in valid_gw_codes}
    for future in as_completed(futures):
        result = future.result()
        if result:
            completed.append(result)
            print(f"✅ Saved results for {result}")



  event_csv = pd.read_csv(raw_data_path("shocks", "GEDEvent_v24_1.csv"))


✅ Saved results for AUS


In [3]:
import os
import math
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.prepared import prep
from concurrent.futures import ThreadPoolExecutor

# Constants
EARTH_RADIUS_KM = 6371
FILE_PATTERN = "gpw_v4_population_count_adjusted_to_2015_unwpp_country_totals_rev11_2020_30_sec_{tile}.asc"

# --- All function definitions remain unchanged ---
# (read_population_count, prepare_pixel_grid, haversine_distance_vector, 
# get_population_in_conflict_area, clip_grid_points_to_country, 
# filter_conflicts, get_conflict_coordinates)

# Load shared data (outside parallel scope)
years = [2008, 2009, 2011]
all_data = []
for tile in range(1, 9):
    fp = raw_data_path("shocks", "gpw-v4", FILE_PATTERN.format(tile=tile))
    result = read_population_count(fp)
    if result:
        all_data.append(result)

df_pop = pd.read_excel(
    raw_data_path("shocks", 'WPP2024_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT.xlsx'),
    sheet_name="Estimates", skiprows=16
)[['ISO3 Alpha-code', 'Year', 'Total Population, as of 1 January (thousands)']]
df_pop.columns = ['ISO_code', 'Year', 'Population']
df_pop.dropna(inplace=True)
df_pop['Year'] = df_pop['Year'].astype(int)
df_pop['Population'] *= 1000

UcdpPrioConflict_csv = pd.read_csv(raw_data_path("shocks", "UcdpPrioConflict_v24_1.csv"))
event_csv = pd.read_csv(raw_data_path("shocks", "GEDEvent_v24_1.csv"))
event_csv = event_csv[event_csv['conflict_new_id'].isin(UcdpPrioConflict_csv['conflict_id'].unique())]

countries = gpd.read_file(raw_data_path("shocks", "country_shapefiles", "World_Countries_Generalized.shp")).to_crs("EPSG:4326")

def process_country_code(country_code):
    iso3 = next((iso for iso, code in ISO_GW.items() if str(code) == country_code), None)
    if not iso3: return
    iso2 = ISO_ISO2[iso3]
    country_gdf = countries[countries['ISO'] == iso2]
    if country_gdf.empty: return
    polygon = country_gdf.geometry.iloc[0]

    for yr in years:
        print(f"Processing {iso3} in {yr}...")
        conflict_df = filter_conflicts(event_csv, country_code, yr)
        conflict_df = conflict_df[conflict_df['best'] > 0]
        coords = get_conflict_coordinates(conflict_df)

        if not coords:
            pop_in_conflict = 0
            union_grid_points = []
        else:
            pop_in_conflict, union_grid_points = get_population_in_conflict_area(all_data, coords)
            union_grid_points = clip_grid_points_to_country(union_grid_points, polygon)
            pop_in_conflict = sum(pt[2] for pt in union_grid_points)

        national_pop = df_pop[(df_pop['ISO_code'] == iso3) & (df_pop['Year'] == yr)]['Population']
        if not national_pop.empty and national_pop.iloc[0] > 0:
            pct = (pop_in_conflict / national_pop.iloc[0]) * 100
            print(f"{yr}: {pop_in_conflict:.0f} people in conflict areas ({pct:.2f}% of national pop)")
        else:
            print(f"{yr}: No national population data")

# Parallel execution
valid_gw_codes = ["811", "840", "850", "900"]  # Cambodia, Phillipines, Indonesia, Australia

with ThreadPoolExecutor() as executor:
    executor.map(process_country_code, valid_gw_codes)


  event_csv = pd.read_csv(raw_data_path("shocks", "GEDEvent_v24_1.csv"))


Processing KHM in 2008...Processing PHL in 2008...

Processing IDN in 2008...
Processing AUS in 2008...
2008: 0 people in conflict areas (0.00% of national pop)
Processing AUS in 2009...
2009: 0 people in conflict areas (0.00% of national pop)
Processing AUS in 2011...
2011: 0 people in conflict areas (0.00% of national pop)
2008: 368435 people in conflict areas (2.64% of national pop)
Processing KHM in 2009...
2008: 583008 people in conflict areas (0.24% of national pop)
Processing IDN in 2009...
2009: 0 people in conflict areas (0.00% of national pop)
Processing IDN in 2011...
2011: 0 people in conflict areas (0.00% of national pop)
2009: 368435 people in conflict areas (2.60% of national pop)
Processing KHM in 2011...
2011: 368435 people in conflict areas (2.52% of national pop)
2008: 52137716 people in conflict areas (56.71% of national pop)
Processing PHL in 2009...
2009: 61766880 people in conflict areas (66.09% of national pop)
Processing PHL in 2011...
2011: 59335016 people in 

In [4]:
# Example setup
valid_gw_codes = ["811", "840", "850", "900"]  # Cambodia, Phillipines, Indonesia, Australia
years = [2008, 2009, 2011]

# Assume raw_data_path, ISO_GW, ISO_ISO2 are predefined
all_data = []
for tile in range(1, 9):
    fp = raw_data_path("shocks", "gpw-v4", FILE_PATTERN.format(tile=tile))
    result = read_population_count(fp)
    if result:
        all_data.append(result)

df_pop = pd.read_excel(raw_data_path("shocks", 'WPP2024_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT.xlsx'), 
    sheet_name="Estimates", skiprows=16)[['ISO3 Alpha-code', 'Year', 'Total Population, as of 1 January (thousands)']]
df_pop.columns = ['ISO_code', 'Year', 'Population']
df_pop.dropna(inplace=True)
df_pop['Year'] = df_pop['Year'].astype(int)
df_pop['Population'] *= 1000

UcdpPrioConflict_csv = pd.read_csv(raw_data_path("shocks", "UcdpPrioConflict_v24_1.csv"))
event_csv = pd.read_csv(raw_data_path("shocks", "GEDEvent_v24_1.csv"))
event_csv = event_csv[event_csv['conflict_new_id'].isin(UcdpPrioConflict_csv['conflict_id'].unique())]

countries = gpd.read_file(raw_data_path("shocks", "country_shapefiles", "World_Countries_Generalized.shp")).to_crs("EPSG:4326")

for country_code in valid_gw_codes:
    iso3 = next((iso for iso, code in ISO_GW.items() if str(code) == country_code), None)
    if not iso3: continue
    iso2 = ISO_ISO2[iso3]

    country_gdf = countries[countries['ISO'] == iso2]
    if country_gdf.empty: continue
    polygon = country_gdf.geometry.iloc[0]

    for yr in years:
        print(f"Processing {iso3} in {yr}...")
        conflict_df = filter_conflicts(event_csv, country_code, yr)
        conflict_df = conflict_df[conflict_df['best'] > 0]
        coords = get_conflict_coordinates(conflict_df)

        if not coords:
            pop_in_conflict = 0
            union_grid_points = []
        else:
            pop_in_conflict, union_grid_points = get_population_in_conflict_area(all_data, coords)
            union_grid_points = clip_grid_points_to_country(union_grid_points, polygon)
            pop_in_conflict = sum(pt[2] for pt in union_grid_points)

        national_pop = df_pop[(df_pop['ISO_code'] == iso3) & (df_pop['Year'] == yr)]['Population']
        if not national_pop.empty and national_pop.iloc[0] > 0:
            pct = (pop_in_conflict / national_pop.iloc[0]) * 100
            print(f"{yr}: {pop_in_conflict:.0f} people in conflict areas ({pct:.2f}% of national pop)")
        else:
            print(f"{yr}: No national population data")


  event_csv = pd.read_csv(raw_data_path("shocks", "GEDEvent_v24_1.csv"))


Processing KHM in 2008...
2008: 368435 people in conflict areas (2.64% of national pop)
Processing KHM in 2009...
2009: 368435 people in conflict areas (2.60% of national pop)
Processing KHM in 2011...
2011: 368435 people in conflict areas (2.52% of national pop)
Processing PHL in 2008...
2008: 52137716 people in conflict areas (56.71% of national pop)
Processing PHL in 2009...
2009: 61766880 people in conflict areas (66.09% of national pop)
Processing PHL in 2011...
2011: 59335016 people in conflict areas (60.94% of national pop)
Processing IDN in 2008...
2008: 583008 people in conflict areas (0.24% of national pop)
Processing IDN in 2009...
2009: 0 people in conflict areas (0.00% of national pop)
Processing IDN in 2011...
2011: 0 people in conflict areas (0.00% of national pop)
Processing AUS in 2008...
2008: 0 people in conflict areas (0.00% of national pop)
Processing AUS in 2009...
2009: 0 people in conflict areas (0.00% of national pop)
Processing AUS in 2011...
2011: 0 people in