In [1]:
import pandas as pd
from sklearn.cluster import DBSCAN
import numpy as np
from tqdm import tqdm, trange
from geopy.distance import geodesic


import warnings
warnings.filterwarnings("ignore")

## dbscan

In [4]:
interventions = pd.read_csv('intervention_without_aed_hospital.csv',encoding='utf-8')
interventions[(interventions['lat'].isna()) | (interventions['lon'].isna())]
interventions = interventions.drop_duplicates(subset=['mission_id'])

In [6]:
interventions = interventions[-interventions['lat'].isna()]
interventions = interventions[-interventions['lon'].isna()]

In [7]:
interventions_bra = interventions[(interventions['province']=='Province du Brabant flamand')]

In [9]:
latlon = interventions_bra.loc[:,['lat','lon']]

In [11]:
def convert_cartesian(lat, lon):
    lat = np.deg2rad(lat)
    lon = np.deg2rad(lon)
    R = 6371 
    x = R * np.cos(lat) * np.cos(lon)
    y = R * np.cos(lat) * np.sin(lon)
    z = R * np.sin(lat)
    return x, y, z

def convert_geodetic(x, y, z):
    R = 6371
    lon = np.arctan2(y, x)
    lat = np.arcsin(z / R)
    return np.rad2deg(lat), np.rad2deg(lon)

In [None]:
arr_latlon = np.array(latlon)
distance_matrix = np.zeros((len(arr_latlon), len(arr_latlon)))

# create distance matrix
for i in trange(len(arr_latlon)):
    for j in range(len(arr_latlon)):
        distance_matrix[i, j] = geodesic(arr_latlon[i], arr_latlon[j]).km

In [None]:
dbscan = DBSCAN(eps=2/6371., min_samples=5, algorithm='auto', metric='precomputed')
# clusters = dbscan.fit_predict(np.radians(arr_latlon))
clusters = dbscan.fit_predict(distance_matrix)

latlon['Cluster'] = clusters

print("DBSCAN: ", clusters)
centers = []

for i in np.unique(clusters):
    cluster_data = latlon[latlon['Cluster'] == i]
    if i == -1:  
        for index, row in cluster_data.iterrows():
            centers.append([row['lat'], row['lon']])
    else:  
        x, y, z = convert_cartesian(cluster_data['lat'], cluster_data['lon'])
        center_x, center_y, center_z = np.mean(x), np.mean(y), np.mean(z)
        center_lat, center_lon = convert_geodetic(center_x, center_y, center_z)
        centers.append([center_lat, center_lon])

print(centers)

In [15]:
print(len(centers))

405


## cover

In [13]:
df_intervention = pd.read_csv('interventions_ca_geoloc_with_province.csv')
interventions_bra_all = df_intervention[(df_intervention['province']=='Province du Brabant flamand')]
interventions_bra_all = interventions_bra_all[-interventions_bra_all['lat'].isna()]
interventions_bra_all = interventions_bra_all[-interventions_bra_all['lon'].isna()]
interventions_bra_all = interventions_bra_all.drop_duplicates(subset=['mission_id'])
interventions_latlon = interventions_bra_all[['lat', 'lon']]

In [14]:
df_hospital = pd.read_csv('hospital_with_province.csv')
hospital_bra = df_hospital[(df_hospital['province']=='Province du Brabant flamand')]
hospital_bra = hospital_bra[-hospital_bra['Latitude'].isna()]
hospital_bra = hospital_bra[-hospital_bra['Longitude'].isna()]

In [15]:
df_aed = pd.read_csv('aed_df_with_province.csv')
aed_bra = df_aed[(df_aed['new_province']=='Province du Brabant flamand')]
aed_bra = aed_bra[-aed_bra['lat'].isna()]
aed_bra = aed_bra[-aed_bra['lon'].isna()]

In [None]:
aed_latlon = aed_bra[['lat','lon']]

In [20]:
hospital_latlon = hospital_bra[['Latitude','Longitude']]

cover without new aed

In [24]:
driving_speed = 1 
walking_speed = 0.1 

list_near = []
list_far = []

for i, row_intervention in tqdm(interventions_latlon.iterrows(), total=interventions_latlon.shape[0]):
    is_near = False
    intervention_point = (row_intervention['lat'],row_intervention['lon'])

    # for j, row_hospital in hospital_latlon.iterrows():
    #     hospital_point = (row_hospital['Latitude'],row_hospital['Longitude'])
    #     if geodesic(intervention_point, hospital_point).km <= 4 * driving_speed:  #
    #         is_near = True
    #         break

    if not is_near:
        for k, row_aed in aed_latlon.iterrows():
            aed_point = (row_aed['lat'], row_aed['lon'])
            if geodesic(intervention_point, aed_point).km <= 2 * walking_speed:  #
                is_near = True
                break

    if is_near:
        # df_near = pd.concat(df_near, latlon.loc[i])
        list_near.append(i)
    else:
        # df_far = pd.concat(df_far, latlon.loc[i])
        list_far.append(i)

print(f'cover count: {len(list_near)}')
print(f'uncover count: {len(list_far)}')
print(f'cover rate: {len(list_near)/len(interventions_latlon)}')

100%|██████████| 740/740 [01:35<00:00,  7.72it/s]

cover count: 173
uncover count: 567
cover rate: 0.23378378378378378





add new aed

In [26]:
aed_new = pd.DataFrame(centers, columns=['lat', 'lon'])

In [None]:
aed_latlon_new = pd.concat([aed_latlon, aed_new])

In [28]:
driving_speed = 1 
walking_speed = 0.1

list_near = []
list_far = []

for i, row_intervention in tqdm(interventions_latlon.iterrows(), total=interventions_latlon.shape[0]):
    is_near = False
    intervention_point = (row_intervention['lat'],row_intervention['lon'])

    # for j, row_hospital in hospital_latlon.iterrows():
    #     hospital_point = (row_hospital['Latitude'],row_hospital['Longitude'])
    #     if geodesic(intervention_point, hospital_point).km <= 4 * driving_speed:  #
    #         is_near = True
    #         break

    if not is_near:
        for k, row_aed in aed_latlon_new.iterrows():
            aed_point = (row_aed['lat'], row_aed['lon'])
            if geodesic(intervention_point, aed_point).km <= 2 * walking_speed:  #
                is_near = True
                break

    if is_near:
        list_near.append(i)
    else:
        list_far.append(i)

print(f'cover count: {len(list_near)}')
print(f'uncover count: {len(list_far)}')
print(f'cover rate: {len(list_near)/len(interventions_latlon)}')

100%|██████████| 740/740 [01:54<00:00,  6.46it/s]

cover count: 538
uncover count: 202
cover rate: 0.727027027027027





## dbscan all interventions

In [None]:
all_latlon = np.array(interventions_latlon)
dist_matrix_all = np.zeros((len(all_latlon), len(all_latlon)))

for i in trange(len(all_latlon)):
    for j in range(len(all_latlon)):
        dist_matrix_all[i, j] = geodesic(all_latlon[i], all_latlon[j]).km


In [None]:
dbscan = DBSCAN(eps=2/6371., min_samples=5, algorithm='auto', metric='precomputed')
# clusters = dbscan.fit_predict(np.radians(arr_latlon))
clusters = dbscan.fit_predict(dist_matrix_all)

interventions_latlon['Cluster'] = clusters

# 打印聚类结果
print("DBSCAN: ", clusters)
centers_all = []

for i in np.unique(clusters):
    cluster_data = interventions_latlon[interventions_latlon['Cluster'] == i]
    if i == -1:  
        for index, row in cluster_data.iterrows():
            centers_all.append([row['lat'], row['lon']])
    else: 
        x, y, z = convert_cartesian(cluster_data['lat'], cluster_data['lon'])
        center_x, center_y, center_z = np.mean(x), np.mean(y), np.mean(z)
        center_lat, center_lon = convert_geodetic(center_x, center_y, center_z)
        centers_all.append([center_lat, center_lon])

In [21]:
len(centers_all)

735

In [22]:
aed_loc_new = pd.DataFrame(centers_all, columns=['lat', 'lon'])

In [23]:
driving_speed = 1 
walking_speed = 0.1

# df_near = pd.DataFrame()
# df_far = pd.DataFrame()
list_near = []
list_far = []

# 遍历df_intervention
for i, row_intervention in tqdm(interventions_latlon.iterrows(), total=interventions_latlon.shape[0]):
    is_near = False
    intervention_point = (row_intervention['lat'],row_intervention['lon'])

    # for j, row_hospital in hospital_latlon.iterrows():
    #     hospital_point = (row_hospital['Latitude'],row_hospital['Longitude'])
    #     if geodesic(intervention_point, hospital_point).km <= 4 * driving_speed:  #
    #         is_near = True
    #         break

    if not is_near:
        for k, row_aed in aed_loc_new.iterrows():
            aed_point = (row_aed['lat'], row_aed['lon'])
            if geodesic(intervention_point, aed_point).km <= 2 * walking_speed:  #
                is_near = True
                break

    if is_near:
        # df_near = pd.concat(df_near, latlon.loc[i])
        list_near.append(i)
    else:
        # df_far = pd.concat(df_far, latlon.loc[i])
        list_far.append(i)

# 打印近和远的intervention的数量
print(f'cover count: {len(list_near)}')
print(f'uncover count: {len(list_far)}')
print(f'cover rate: {len(list_near)/len(interventions_latlon)}')

100%|██████████| 740/740 [00:29<00:00, 24.99it/s] 

cover count: 740
uncover count: 0
cover rate: 1.0



