In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [2]:
itv_aed = pd.read_csv(
    '/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/CLEANED/intervention_aed_related_distance.csv',
    low_memory=False)

itv_aed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105841 entries, 0 to 105840
Data columns (total 58 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   mission_id                        105841 non-null  int64  
 1   service_name                      93471 non-null   object 
 2   postalcode_permanence             65151 non-null   float64
 3   cityname_permanence               69218 non-null   object 
 4   streetname_permanence             69623 non-null   object 
 5   housenumber_permanence            2667 non-null    float64
 6   latitude_permanence               97007 non-null   float64
 7   longitude_permanence              97690 non-null   float64
 8   permanence_short_name             105670 non-null  object 
 9   permanence_long_name              93486 non-null   object 
 10  vector_type                       104009 non-null  object 
 11  eventtype_firstcall               65569 non-null   o

In [3]:
aed = pd.read_csv('/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/CLEANED/aed_location_latlon.csv')

aed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15775 entries, 0 to 15774
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            15775 non-null  float64
 1   type          5661 non-null   object 
 2   address       15775 non-null  object 
 3   number        13577 non-null  float64
 4   postal_code   15775 non-null  int64  
 5   municipality  15775 non-null  object 
 6   province      15775 non-null  object 
 7   location      8962 non-null   object 
 8   public        8656 non-null   object 
 9   available     4739 non-null   object 
 10  hours         1148 non-null   object 
 11  full_address  15775 non-null  object 
 12  lat           15775 non-null  float64
 13  lon           15775 non-null  float64
dtypes: float64(4), int64(1), object(9)
memory usage: 1.7+ MB


In [4]:
itv_aed.drop_duplicates(subset=['mission_id'], keep='first', inplace=True)
itv_aed.drop_duplicates(subset=['lat_itv', 'lon_itv'], keep='first', inplace=True)
itv_aed.info()

<class 'pandas.core.frame.DataFrame'>
Index: 58901 entries, 0 to 105839
Data columns (total 58 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   mission_id                        58901 non-null  int64  
 1   service_name                      51811 non-null  object 
 2   postalcode_permanence             36008 non-null  float64
 3   cityname_permanence               38572 non-null  object 
 4   streetname_permanence             38721 non-null  object 
 5   housenumber_permanence            1535 non-null   float64
 6   latitude_permanence               54192 non-null  float64
 7   longitude_permanence              54547 non-null  float64
 8   permanence_short_name             58772 non-null  object 
 9   permanence_long_name              51821 non-null  object 
 10  vector_type                       57728 non-null  object 
 11  eventtype_firstcall               36166 non-null  object 
 12  eventlev

In [5]:
speed_running = 100 # m/min  average running speed = 6 km/h
speed_driving = 500 # m/min  average driving speed = 30 km/h (urban area in Belgium)
golden_minutes = 4 # best interval for cpr using AED

itv_covered = itv_aed.loc[((itv_aed['aed_distance']<=speed_running * golden_minutes / 2) |
                           (itv_aed['hospital_distance']<=speed_driving * golden_minutes))]

itv_covered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 27738 entries, 0 to 105836
Data columns (total 58 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   mission_id                        27738 non-null  int64  
 1   service_name                      25047 non-null  object 
 2   postalcode_permanence             18008 non-null  float64
 3   cityname_permanence               20139 non-null  object 
 4   streetname_permanence             20192 non-null  object 
 5   housenumber_permanence            557 non-null    float64
 6   latitude_permanence               25406 non-null  float64
 7   longitude_permanence              25587 non-null  float64
 8   permanence_short_name             27636 non-null  object 
 9   permanence_long_name              25057 non-null  object 
 10  vector_type                       26807 non-null  object 
 11  eventtype_firstcall               18072 non-null  object 
 12  eventlev

In [6]:
## Coverage of AEDs and hospitals

coverage = len(itv_covered) / len(itv_aed)
coverage

0.47092579073360386

In [7]:
itv_uncovered = itv_aed.loc[~(
    (itv_aed['aed_distance'] <= speed_running * golden_minutes / 2) |
    (itv_aed['hospital_distance'] <= speed_driving * golden_minutes))]

itv_uncovered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 31163 entries, 15 to 105839
Data columns (total 58 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   mission_id                        31163 non-null  int64  
 1   service_name                      26764 non-null  object 
 2   postalcode_permanence             18000 non-null  float64
 3   cityname_permanence               18433 non-null  object 
 4   streetname_permanence             18529 non-null  object 
 5   housenumber_permanence            978 non-null    float64
 6   latitude_permanence               28786 non-null  float64
 7   longitude_permanence              28960 non-null  float64
 8   permanence_short_name             31136 non-null  object 
 9   permanence_long_name              26764 non-null  object 
 10  vector_type                       30921 non-null  object 
 11  eventtype_firstcall               18094 non-null  object 
 12  eventle

In [10]:
## Use KMeans to find the optimal location for AEDs

kmeans = KMeans(n_clusters=aed.shape[0], random_state=0).fit(itv_aed[['lat_itv', 'lon_itv']])

aed[['new_lat', 'new_lon']] = kmeans.cluster_centers_
aed.head()

Unnamed: 0,id,type,address,number,postal_code,municipality,province,location,public,available,hours,full_address,lat,lon,new_lat,new_lon
0,13.0,,Blvd. fr. roosevelt,24.0,7060,Soignies,Hainaut,,Y,,,"Blvd. fr. roosevelt, 7060 Soignies, Hainaut",50.576042,4.06574,51.061387,5.072847
1,70.0,,Ch. de wégimont,76.0,4630,Ayeneux,Liège,,,,,"Ch. de wégimont, 4630 Ayeneux, Liège",50.60768,5.730187,51.038101,3.737019
2,71.0,,Place saint-lambert,,4020,Liège,Liège,,,,,"Place saint-lambert, 4020 Liège, Liège",50.645622,5.57362,50.838041,4.443571
3,72.0,,Rue du doyard,,4990,Lierneux,Liège,,,,,"Rue du doyard, 4990 Lierneux, Liège",50.287416,5.786325,50.596113,5.504008
4,73.0,,Fond saint servais,,4000,Liège,Liège,,,,,"Fond saint servais, 4000 Liège, Liège",50.646806,5.571031,51.059662,2.8932


In [11]:
# Calculate distance between itervention and new AED locations

from pyproj import Geod

geod = Geod(ellps='WGS84')


def find_closest_distance(df1, df2):
    distances = []
    for index1, row1 in df1.iterrows():
        min_distance = float('inf')
        for index2, row2 in df2.iterrows():
            # calculate the distance between two points
            _, _, distance = geod.inv(row1['lon_itv'], row1['lat_itv'],
                                      row2['new_lon'], row2['new_lat'])
            # update the min distance
            if distance < min_distance:
                min_distance = distance
        # append the min distance to the list
        distances.append(min_distance)
    return distances

itv_aed['new_aed_distance'] = find_closest_distance(itv_aed, aed)
itv_aed.head()

Unnamed: 0,mission_id,service_name,postalcode_permanence,cityname_permanence,streetname_permanence,housenumber_permanence,latitude_permanence,longitude_permanence,permanence_short_name,permanence_long_name,...,t7_Day,t7_Month,t7_Year,t7_DayName,province,intervention_time_(t1confirmed),departure_time_(t1confirmed),aed_distance,hospital_distance,new_aed_distance
0,20222490015,FB PDS BRUX [PASI CitÈ] SIAMU,1000.0,Brussel (Brussel),Vesaliusstraat,,50.85097,4.36411,ABBRUX03A,AMB HELI 3,...,6.0,9.0,2022.0,Tuesday,Brussels Hoofdstedelijk Gewest,,,61.129528,465.046464,92.988766
1,20222490041,HB UR BRUX CHU St Pierre,1000.0,Brussel (Brussel),Rue Haute,,50.83433,4.34545,ABBRUX08A,AMB HSP 3,...,6.0,9.0,2022.0,Tuesday,Brussels Hoofdstedelijk Gewest,,,71.815838,773.845722,40.126455
3,20222490042,FB PDS BRUX [PASI CitÈ] SIAMU,1000.0,Brussel (Brussel),Vesaliusstraat,,50.85097,4.36411,ABBRUX11A,AMB CITE 2,...,,,,,Brussels Hoofdstedelijk Gewest,,,95.336554,1529.1424,79.125488
4,20222490045,BB FORE Croix Rouge,1190.0,Vorst (Vorst),Rue Marconi,,50.81598,4.341524,ABFORE01A,AMB FOREST 1,...,6.0,9.0,2022.0,Tuesday,Brussels Hoofdstedelijk Gewest,,,165.716925,1306.596211,71.867132
5,20222490053,HB UR BRUX Europe Michel,1040.0,Etterbeek (Etterbeek),Charles Degrouxstraat,,50.84226,4.39925,ABETTE01A,AMB ST MICHEL,...,6.0,9.0,2022.0,Tuesday,Brussels Hoofdstedelijk Gewest,,,149.19511,566.0101,151.673684


In [12]:
## Cvoverage after kmeans optimization

kmeans_covered = itv_aed.loc[(
    (itv_aed['new_aed_distance'] <= speed_running * golden_minutes / 2) |
    (itv_aed['hospital_distance'] <= speed_driving * golden_minutes))]

coverage = len(kmeans_covered) / len(itv_aed)
coverage


0.8376428244002648

In [14]:
## Add province information to the new AED locations

import geopandas as gpd
from shapely import geometry as geo
from shapely.validation import explain_validity

geo_path = '/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/Belgium.provinces.WGS84.geojson'
geo_be = gpd.read_file(geo_path)

# Check if the geometries are valid
for i in range(len(geo_be)):
    if not geo_be.loc[i, 'geometry'].is_valid:
        print(explain_validity(geo_be.loc[i, 'geometry']))
        geo_be.loc[i, 'geometry'] = geo_be.loc[i, 'geometry'].buffer(0)
        print(geo_be.loc[i, 'geometry'].is_valid)

Self-intersection[6.24760990547934 50.640636186645]
True


In [15]:
def get_medical_province(df, geo_df):
    province = []
    missing_province = 0
    for i in range(len(df)):
        point = geo.Point(df.loc[i, 'new_lon'], df.loc[i, 'new_lat'])
        contained = geo_df.loc[geo_df['geometry'].contains(
            point)]['NameDUT'].values
        if contained.size > 0:
            province.append(contained[0])
        else:
            province.append(None)
            missing_province += 1

    df['new_province'] = province
    print(f'{missing_province} coordinates are not located in any province')

    return df


aed = get_medical_province(aed, geo_be)
aed['new_province'].value_counts()

0 coordinates are not located in any province


new_province
Provincie Antwerpen               2245
Provincie Oost-Vlaanderen         2216
Provincie Henegouwen              1952
Provincie West-Vlaanderen         1890
Provincie Vlaams-Brabant          1738
Provincie Luik                    1501
Provincie Limburg                 1369
Provincie Namen                    885
Brussels Hoofdstedelijk Gewest     743
Provincie Luxemburg                663
Provincie Waals-Brabant            573
Name: count, dtype: int64

In [16]:
aed['province'].value_counts()

province
Antwerpen            2355
Bruxelles-brussel    2117
Hainaut              1950
Liège                1771
Oost-vlaanderen      1664
West-vlaanderen      1374
Vlaams-brabant       1282
Limburg               998
Namur                 900
Luxembourg            683
Brabant wallon        681
Name: count, dtype: int64

In [17]:
itv_uncovered.to_csv('/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/CLEANED/intervention_uncovered.csv', index=False)
aed.to_csv('/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/CLEANED/aed_with_KmeansLocation.csv', index=False)
itv_aed.to_csv('/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/CLEANED/intervention_aed_kmeans_distance.csv', index=False)