In [4]:
#Read Hospital open/close dates from (Hospital_OpenCloseoverTime.xlsx) spreadsheet and create timelines for understanding what hospitals are open across what dates (call this availability timeline)


#Read timelines of interest (string input) that we want to analyze attacks and catchment areas against hospitals for (call this data timeline)
#Timelines of interest: Al Shifa Medical Hospital (Oct 7 2023, to Nov 3 2023, European Hospital: Dec 11 2023 to April 28 2024, Nasser Hospital: Nov 11 2024 to Feb 2 2025

#Split data timeline into two week segments, if the last segment is less that 14 days it is okay (call this data aggregation method)

#For each two week segment, compare data timeline (aka the two week time range selected) against availability timeline to determine when hospitals were open during data timeline

#Determining catchment areas for hospitals that were open during data timeline: Based on the locations of the hospitals, find the catchment area based on the Voronoi method and geojson file. Catchment area of each hospital: Area where that hospital is the closest one and is within 5 km absolute distance. Determine relative to other hospitals open during timeline the areas that each hospital is the closest to, within abs dist of 5 km (call this catchment area method)

#Weighted catchment area method: If the hospital list changes during data timeline segment, split segment into smaller segments where hospital list is constant. For each smaller segment, determine catchment areas for hospitals open during that smaller segment. Weight the catchment areas by the number of days each smaller segment represents in the overall data timeline segment. Combine weighted catchment areas to get overall weighted catchment area for data timeline segment (call this weighted catchment area method)

#Attack count method: For each catchment area (only normal, not weighted), count the number of attacks that occurred within the catchment area of interest and during the data timeline segment of interest. This is based on the ACLED xlsx file.

#Total Counts method: Within data timeline segment of interest, count total number of attacks that occurred during that time period (regardless of location)

#Output method: For each data timeline segment of interest, output the following in an excel:
#- Data timeline segment (start date, end date)
#- For each hospital that was open during that data timeline segment:
#  - Hospital name
#  - Catchment area (km^2) (weighted if needed, but if no weighting needed just normal catchment area is also okay)
#  - Number of attacks within catchment area during data timeline segment
#  - Percentage of total attacks during data timeline segment that occurred within catchment area
#  - Total Attacks during data timeline segment

#Output Test HTML Map: For the first two week data timeline segment of Al Nasser, create an HTML map that shows:
#- Hospital locations (markers with hospital names)
#- Catchment areas (polygons with hospital that are open during that time period, based on Hospital open/close spreadsheet)



**Code Structure Summary**
- **Imports & Config:** load required libraries and set file paths.
- **load_hospitals(path):** read `Hospital_OpenCloseoverTime.xlsx`, parse toggle date columns (start at column D), build a GeoDataFrame with `open_intervals` per hospital.
- **load_attacks(path):** read ACLED Excel (date at column B, lat/lon at W/X) and return GeoDataFrame with parsed dates.
- **split_timeline(start,end,freq_days=14):** return list of (seg_start,seg_end) two-week windows.
- **hospitals_open_in_range(hospitals_gdf,seg_start,seg_end):** select hospitals with >=1 day overlap based on `open_intervals`.
- **subsegments_when_availability_changes(...):** split a segment into subsegments where the open-hospital set is constant and provide day-weights.
- **compute_voronoi_catchments(hosp_gdf,boundary_gdf):** compute Voronoi polygons in a metric CRS, clip to boundary and return per-hospital polygons.
- **limit_by_distance(polygons_gdf,hosp_gdf,max_km=5):** intersect polygons with 5 km geodesic buffers around each hospital.
- **count_attacks_in_polygon(attacks_gdf,polygon,date_range):** count attacks inside polygon and within date range.
- **export_results_excel(df,out_path):** write consolidated results to `02_10_Revised_Catchareas_Attackcounts.xlsx`.
- **make_test_map(...):** produce an interactive `folium` HTML map for the Nasser segment (11/11/2024 â†’ +14 days).

This notebook adds the above functions and a runner cell that orchestrates processing for the three timelines you provided.

In [5]:
# Imports and helper functions
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, shape, mapping
from shapely.ops import unary_union
import numpy as np
from datetime import timedelta
import math
from scipy.spatial import Voronoi
import folium

# File paths (adjust if needed)
HOSPITALS_XLSX = 'Hospitals_OpenCloseoverTime.xlsx'
ACLED_XLSX = 'ACLED_May_09_25_Gaza.xlsx'
GEOJSON_BOUNDARY = 'gaza_boundary.geojson'
OUTPUT_XLSX = '02_10_Revised_Catchareas_Attackcounts.xlsx'
OUTPUT_HTML_NASSER = 'Nasser_2024-11-11_map.html'

# Utility: parse dates robustly
def parse_date(x):
    if pd.isna(x):
        return None
    if hasattr(x, 'date'):
        return pd.Timestamp(x)
    for fmt in ('%m/%d/%Y', '%Y-%m-%d', '%d-%m-%Y'):
        try:
            return pd.to_datetime(str(x), format=fmt, errors='raise')
        except Exception:
            continue
    return pd.to_datetime(x, errors='coerce')

def _find_col(cols, opts):
    for o in opts:
        for c in cols:
            if isinstance(c, str) and c.strip().lower().startswith(o.lower()):
                return c
    return None

def load_hospitals(path):
    """Load hospital data with Open/Closed schedule. Uses column detection and schedule_meta from row 2."""
    raw_head = pd.read_excel(path, header=None, nrows=2)
    full = pd.read_excel(path, header=0)
    cols = list(full.columns)
    hosp_col = _find_col(cols, ['hospital', 'name'])
    lon_col = _find_col(cols, ['longitude (x)', 'longitude', 'lon', 'x'])
    lat_col = _find_col(cols, ['latitude (y)', 'latitude', 'lat', 'y'])
    if hosp_col is None or lon_col is None or lat_col is None:
        raise ValueError(f"Couldn't detect Hospital/lon/lat. Found: {cols}")
    # Detect Open/Closed columns with dates in row 2
    schedule_meta = []
    for c in cols:
        if isinstance(c, str):
            lc = c.strip().lower()
            if lc.startswith('open') or lc.startswith('closed'):
                typ = 'Open' if lc.startswith('open') else 'Closed'
                try:
                    idx = cols.index(c)
                    raw = raw_head.iat[1, idx] if raw_head.shape[0] > 1 else None
                    dt = pd.to_datetime(raw) if pd.notnull(raw) else None
                except Exception:
                    dt = None
                if dt is not None:
                    schedule_meta.append((c, typ, dt))
    schedule_meta = [(c, t, d) for (c, t, d) in schedule_meta if d is not None]
    # Fallback: per-row dates (Open/Closed columns contain dates in each cell)
    open_cols = sorted([c for c in cols if isinstance(c, str) and 'open' in c.lower()], key=lambda x: (x.count('.'), x))
    closed_cols = sorted([c for c in cols if isinstance(c, str) and 'closed' in c.lower()], key=lambda x: (x.count('.'), x))
    records = []
    for _, row in full.iterrows():
        name = str(row[hosp_col]).strip()
        lon = pd.to_numeric(row[lon_col], errors='coerce')
        lat = pd.to_numeric(row[lat_col], errors='coerce')
        if pd.isna(lon) or pd.isna(lat):
            continue
        open_intervals = []
        if schedule_meta:
            events = sorted([(pd.to_datetime(d).to_pydatetime(), col, typ) for (col, typ, d) in schedule_meta], key=lambda x: x[0])
            changes = []
            for dt, col, typ in events:
                val = row.get(col, None) if col in row.index else None
                if pd.notnull(val) and str(val).strip() != '':
                    changes.append((dt, typ))
            if not changes:
                for dt, col, typ in events:
                    changes.append((dt, typ))
            changes = sorted(changes, key=lambda x: x[0])
            compressed = []
            for dt, typ in changes:
                if not compressed or compressed[-1][1] != typ:
                    compressed.append((dt, typ))
            cur_start = None
            for dt, typ in compressed:
                if typ == 'Open':
                    cur_start = pd.Timestamp(dt)
                elif typ == 'Closed' and cur_start is not None:
                    open_intervals.append((cur_start, pd.Timestamp(dt) - pd.Timedelta(days=1)))
                    cur_start = None
            if cur_start is not None:
                open_intervals.append((cur_start, pd.Timestamp('2100-01-01')))
        elif open_cols and closed_cols:
            for oc, cc in zip(open_cols, closed_cols):
                start = parse_date(row.get(oc))
                end = parse_date(row.get(cc))
                if start is not None and end is not None:
                    open_intervals.append((pd.Timestamp(start), pd.Timestamp(end)))
        if not open_intervals:
            open_intervals = [(pd.Timestamp('1900-01-01'), pd.Timestamp('2100-01-01'))]
        records.append({'name': name, 'longitude': float(lon), 'latitude': float(lat), 'open_intervals': open_intervals})
    hg = gpd.GeoDataFrame(records, geometry=[Point(r['longitude'], r['latitude']) for r in records], crs='EPSG:4326')
    return hg

def _find_acled_col(df, opts):
    """Find first column matching any of opts (case-insensitive, allows spaces as underscores)."""
    for col in df.columns:
        norm = str(col).lower().replace(' ', '_').replace('-', '_')
        for opt in opts:
            if norm == opt or norm.startswith(opt + '_') or opt in norm:
                return col
    return None

def load_attacks(path):
    """Load ACLED data with column name detection."""
    df = pd.read_excel(path, header=0)
    date_col = _find_acled_col(df, ['event_date', 'date', 'iso_date', 'eventdate'])
    lat_col = _find_acled_col(df, ['latitude', 'lat', 'y'])
    lon_col = _find_acled_col(df, ['longitude', 'lon', 'long', 'x'])
    if date_col is None or lat_col is None or lon_col is None:
        raise ValueError(f"Could not find date/lat/lon in ACLED. Columns: {list(df.columns)}")
    dates = pd.to_datetime(df[date_col], errors='coerce')
    lats = pd.to_numeric(df[lat_col], errors='coerce')
    lons = pd.to_numeric(df[lon_col], errors='coerce')
    attacks = pd.DataFrame({'date': dates, 'latitude': lats, 'longitude': lons})
    attacks = attacks.dropna(subset=['date', 'latitude', 'longitude']).copy()
    attacks['date'] = pd.to_datetime(attacks['date']).dt.normalize()
    ag = gpd.GeoDataFrame(attacks, geometry=[Point(xy) for xy in zip(attacks.longitude, attacks.latitude)], crs='EPSG:4326')
    return ag

def split_timeline(start, end, freq_days=14):
    start = pd.to_datetime(start)
    end = pd.to_datetime(end)
    segments = []
    cur = start
    while cur <= end:
            seg_end = min(end, cur + pd.Timedelta(days=freq_days-1))
            segments.append((cur, seg_end))
            cur = seg_end + pd.Timedelta(days=1)
    return segments

def intervals_overlap(a_start, a_end, b_start, b_end):
    return (a_start <= b_end) and (b_start <= a_end)

def hospitals_open_in_range(hosp_gdf, seg_start, seg_end):
    seg_start = pd.to_datetime(seg_start).normalize()
    seg_end = pd.to_datetime(seg_end).normalize()
    rows = []
    for idx, row in hosp_gdf.iterrows():
        for (o_start, o_end) in row['open_intervals']:
            o_start = pd.to_datetime(o_start).normalize()
            o_end = pd.to_datetime(o_end).normalize()
            if intervals_overlap(o_start, o_end, seg_start, seg_end):
                # compute overlap days
                os_ = max(o_start, seg_start)
                oe_ = min(o_end, seg_end)
                overlap_days = (oe_ - os_).days + 1
                rows.append((idx, overlap_days))
                break
    if not rows:
        return hosp_gdf.iloc[0:0]  # empty
    idxs = [r[0] for r in rows]
    subset = hosp_gdf.loc[idxs].copy()
    return subset

def voronoi_finite_polygons_2d(vor, radius=None):
    # Credit: adapted from common recipes to construct finite polygons from scipy Voronoi
    if vor.points.shape[1] != 2:
        raise ValueError('Requires 2D input')
    new_regions = []
    new_vertices = vor.vertices.tolist()
    center = vor.points.mean(axis=0)
    if radius is None:
        radius = np.ptp(vor.points, axis=0).max() * 2
    all_ridges = {}
    for (p1, p2), (v1, v2) in zip(vor.ridge_points, vor.ridge_vertices):
        all_ridges.setdefault(p1, []).append((p2, v1, v2))
        all_ridges.setdefault(p2, []).append((p1, v1, v2))
    for p1, region in enumerate(vor.point_region):
        vertices = vor.regions[region]
        if all(v >= 0 for v in vertices):
            new_regions.append(vertices)
            continue
        ridges = all_ridges[p1]
        new_region = [v for v in vertices if v>=0]
        for p2, v1, v2 in ridges:
            if v2 < 0 or v1 < 0:
                v = v1 if v1>=0 else v2
                tangent = vor.points[p2] - vor.points[p1]
                tangent /= np.linalg.norm(tangent)
                normal = np.array([-tangent[1], tangent[0]])
                midpoint = vor.points[[p1,p2]].mean(axis=0)
                direction = np.sign(np.dot(midpoint - center, normal)) * normal
                far_point = vor.vertices[v] + direction * radius
                new_vertices.append(far_point.tolist())
                new_region.append(len(new_vertices)-1)
        new_regions.append(new_region)
    return new_regions, np.asarray(new_vertices)

def compute_voronoi_catchments(hosp_gdf, boundary_gdf=None):
    # Project to metric CRS for Voronoi (Web Mercator)
    hosp_m = hosp_gdf.to_crs(epsg=3857).copy().reset_index(drop=True)
    pts = np.array([[p.x, p.y] for p in hosp_m.geometry])
    if len(pts) < 2:
        # single hospital: entire clipped boundary (or buffer)
        polys = []
        for idx, row in hosp_m.iterrows():
            if boundary_gdf is not None:
                bnd = boundary_gdf if boundary_gdf.crs and boundary_gdf.crs.to_epsg() == 3857 else boundary_gdf.to_crs(epsg=3857)
                poly = bnd.unary_union
            else:
                poly = row.geometry.buffer(50000)
            polys.append(poly)
        poly_gdf = gpd.GeoDataFrame({'geometry': polys, 'name': hosp_gdf['name'].values}, crs='EPSG:3857')
        return poly_gdf, hosp_m
    vor = Voronoi(pts)
    regions, vertices = voronoi_finite_polygons_2d(vor)
    polygons = []
    for region in regions:
        poly = Polygon(vertices[region])
        polygons.append(poly)
    poly_gdf = gpd.GeoDataFrame({'geometry': polygons}, crs='EPSG:3857')
    # Clip to boundary if provided
    if boundary_gdf is not None:
        boundary_m = boundary_gdf.to_crs(epsg=3857)
        clip_poly = boundary_m.unary_union
        poly_gdf['geometry'] = poly_gdf.geometry.intersection(clip_poly)
    # Assign back to hospitals order
    poly_gdf = poly_gdf.reset_index(drop=True)
    poly_gdf['name'] = hosp_gdf['name'].values
    # Return in metric CRS (EPSG:3857) and original hospital points in same CRS for buffering steps
    hosp_m = hosp_m.reset_index(drop=True)
    return poly_gdf, hosp_m

def limit_by_distance(polygons_gdf, hosp_gdf, max_km=5):
    # polygons_gdf and hosp_gdf expected in same metric CRS (e.g., EPSG:3857)
    max_m = max_km * 1000
    clipped = []
    for idx, row in polygons_gdf.iterrows():
        hosp_point = hosp_gdf.loc[idx].geometry
        buf = hosp_point.buffer(max_m)
        inter = row.geometry.intersection(buf)
        clipped.append(inter)
    out = polygons_gdf.copy()
    out['geometry'] = clipped
    return out

def count_attacks_in_polygon(attacks_gdf, poly, start_date, end_date):
    if poly.is_empty:
        return 0
    attacks_clip = attacks_gdf[(attacks_gdf['date'] >= pd.to_datetime(start_date)) & (attacks_gdf['date'] <= pd.to_datetime(end_date))].copy()
    if attacks_clip.empty:
        return 0
    attacks_clip = attacks_clip.to_crs(epsg=3857)
    return int(attacks_clip.within(poly).sum())

def export_results_excel(df, out_path):
    df.to_excel(out_path, index=False)

def make_test_map(hosp_gdf, catch_gdf, out_html, center=None):
    # hosp_gdf and catch_gdf in EPSG:4326 for folium
    if hosp_gdf.empty:
        return
    if center is None:
        center = [float(hosp_gdf.geometry.y.mean()), float(hosp_gdf.geometry.x.mean())]
    m = folium.Map(location=center, zoom_start=11)
    for _, row in catch_gdf.iterrows():
        geom = row.geometry
        if geom is None or geom.is_empty:
            continue
        try:
            geojson = mapping(geom)
            folium.GeoJson(geojson, name=row.get('name', 'catchment')).add_to(m)
        except Exception:
            pass
    for _, row in hosp_gdf.iterrows():
        folium.Marker([float(row.geometry.y), float(row.geometry.x)], popup=str(row['name'])).add_to(m)
    out_path = os.path.abspath(out_html)
    m.save(out_path)

# End of helper definitions

In [6]:
# Runner: orchestrate using the timelines you provided
# Timelines of interest (user-provided):
timelines = {
    'Al Shifa Medical Hospital': ('2023-10-07','2023-11-03'),
    'European Hospital': ('2023-12-11','2024-04-28'),
    'Nasser Hospital': ('2024-11-11','2025-02-02')
}
# Load data
hosp = load_hospitals(HOSPITALS_XLSX)
attacks = load_attacks(ACLED_XLSX)
boundary = None
try:
    if os.path.exists(GEOJSON_BOUNDARY):
        boundary = gpd.read_file(GEOJSON_BOUNDARY)
        if boundary.crs is None:
            boundary = boundary.set_crs('EPSG:4326')
    else:
        boundary = None
except Exception as e:
    print(f"Could not load boundary: {e}")
    boundary = None

results = []
for hosp_name, (tstart, tend) in timelines.items():
    segments = split_timeline(tstart, tend, freq_days=14)
    for seg_start, seg_end in segments:
        open_h = hospitals_open_in_range(hosp, seg_start, seg_end)
        if open_h.empty:
            # record zero results for this segment
            results.append({'hospital_of_interest': hosp_name, 'segment_start': seg_start, 'segment_end': seg_end, 'hospital_name': None, 'catchment_km2': 0.0, 'attacks_in_catchment': 0, 'pct_of_total': 0.0, 'total_attacks_segment': 0})
            continue
        # Compute Voronoi catchments for the open hospitals set
        catch_v, hosp_m = compute_voronoi_catchments(open_h, boundary)
        # Limit by 5 km around each hospital
        catch_limited = limit_by_distance(catch_v, hosp_m, max_km=5)
        # Convert catchment back to 4326 for area calculation and attack counting (we'll compute area in metric CRS)
        catch4326 = catch_limited.to_crs(epsg=4326)
        # total attacks in this segment (anywhere)
        total_attacks = int(((attacks['date'] >= pd.to_datetime(seg_start)) & (attacks['date'] <= pd.to_datetime(seg_end))).sum())
        # For each hospital polygon, compute area and count attacks (non-weighted catchments as requested)
        for idx, row in catch_limited.reset_index(drop=True).iterrows():
            poly_metric = row.geometry
            if poly_metric is None or poly_metric.is_empty:
                area_km2 = 0.0
            else:
                area_km2 = poly_metric.area / 1e6
            # Count attacks (convert polygon to 4326 for spatial test)
            poly_4326 = gpd.GeoSeries([poly_metric], crs='EPSG:3857').to_crs(epsg=4326).iat[0]
            attacks_in = 0
            if not poly_4326.is_empty:
                attacks_in = count_attacks_in_polygon(attacks, poly_metric, seg_start, seg_end)
            pct = (attacks_in / total_attacks * 100.0) if total_attacks>0 else 0.0
            results.append({'hospital_of_interest': hosp_name, 'segment_start': seg_start, 'segment_end': seg_end, 'hospital_name': row.get('name'), 'catchment_km2': area_km2, 'attacks_in_catchment': attacks_in, 'pct_of_total': pct, 'total_attacks_segment': total_attacks})

# Export results as requested: single sheet for all three hospitals
resdf = pd.DataFrame(results)
out_xlsx_path = os.path.abspath(OUTPUT_XLSX)
export_results_excel(resdf, out_xlsx_path)

# Create HTML map for Nasser first two-week segment (11/11/2024 + 14 days)
nasser_start = pd.to_datetime('2024-11-11')
nasser_end = nasser_start + pd.Timedelta(days=13)
open_nasser = hospitals_open_in_range(hosp, nasser_start, nasser_end)
if not open_nasser.empty:
    catch_v, hosp_m = compute_voronoi_catchments(open_nasser, boundary)
    catch_limited = limit_by_distance(catch_v, hosp_m, max_km=5)
    hosp4326 = open_nasser.copy()
    hosp4326 = hosp4326.to_crs(epsg=4326)
    catch4326 = catch_limited.to_crs(epsg=4326)
    if 'name' not in catch4326.columns:
        catch4326['name'] = hosp4326['name'].values
    make_test_map(hosp4326, catch4326, OUTPUT_HTML_NASSER)
    print(f'Nasser map saved to: {os.path.abspath(OUTPUT_HTML_NASSER)}')
else:
    print(f'No hospitals open during Nasser segment {nasser_start.date()} to {nasser_end.date()}; skipping HTML map.')

print('Runner finished. Results written to', out_xlsx_path)

  clip_poly = boundary_m.unary_union


GEOSException: TopologyException: side location conflict at 3810531.8687143112 3716467.8988773813. This can occur if the input geometry is invalid.