In [410]:
import pandas as pd
import numpy as np

import os

# Create a dictionary to store all dataframes 
path = os.getcwd()
dataset = {}
for file_name in os.listdir(path):
    if file_name.endswith('.parquet.gzip'):
        name = (file_name).split('.')[0]
        dataset[name] = pd.read_parquet(file_name)

In [411]:
import time
from datetime import datetime

dataset["interventions_bxl"] = dataset["interventions_bxl"].rename(columns = {"mission_id":'Mission ID', 
                                                                              "longitude_intervention":"Longitude intervention", "latitude_intervention":"Latitude intervention", "postalcode_intervention": "PostalCode intervention", "cityname_intervention":"CityName intervention", 
                                                                              "waiting_time":"Waiting time", "t0":"T0",'t1':'T1','t1confirmed':'T1confirmed','t2':'T2',"t3":"T3",'t4':'T4','t5':'T5','t6':'T6','t7':'T7', 
                                                                              'intervention_time_t1reported':'Intervention time (T1Reported)','intervention_duration':'Intervention duration','departure_time_t1reported':'Departure time (T1Reported)',
                                                                              'name_destination_hospital':'Name destination hospital','postalcode_destination_hospital':'PostalCode destination hospital','cityname_destination_hospital':'CityName destination hospital','streetname_destination_hospital':'StreetName destination hospital',
                                                                              'calculated_distance_destination_':'Calculated Distance destination',
                                                                              "eventtype_firstcall":"EventType Firstcall","vector_type":"Vector type","eventLevel_firstcall":"EventLevel Firstcall","abandon_reason":"Abandon reason"})
dataset["interventions_bxl2"] = dataset["interventions_bxl2"].rename(columns = {"Cityname Intervention":"CityName intervention", "EventType and EventLevel":"EventType Firstcall","Vector type NL":"Vector type"})

# Create waiting_time for interventions_bxl2 
rows = len(dataset["interventions_bxl2"])
waiting_time = []
for i in range(rows):
    t3 = dataset["interventions_bxl2"].loc[i,'T3']
    t0 = dataset["interventions_bxl2"].loc[i,'T0']            
    if t3 is None:
        waiting_time.append(None)
        continue
    time_interval = round((datetime.strptime(t3,"%d%b%y:%H:%M:%S")-datetime.strptime(t0,"%d%b%y:%H:%M:%S")).total_seconds()/60,1)
    waiting_time.append(time_interval)
dataset["interventions_bxl2"]["Waiting time"] = waiting_time

In [412]:
# get coordinates for interventions_bxl, interventions1, interventions2, interventions3
# as some lat,lon info is incorrect, we use "PostalCode intervention" to get coordinates
import pgeocode

nomi = pgeocode.Nominatim('be')

def PostCode2xy(df,code):
    df.dropna(subset = [code],inplace=True)
    if df[code].dtype == 'float':
        df[code] = [round(i) for i in df[code]]
        df[code] = df[code].astype('str')
    df['Latitude intervention'] = (nomi.query_postal_code(df[code].tolist()).latitude)
    df['Longitude intervention'] = (nomi.query_postal_code(df[code].tolist()).longitude)



In [413]:
# concat data frame and select Cardiac Arrest event type
def map_call(x):
    if x is None:
        return 0
    elif 'cardiac arrest' in x.lower():
        return 1
    else:
        return 0
        
intervention_name = ['interventions_bxl', 'interventions_bxl2', 'interventions1', 'interventions2', 'interventions3']
variables = ["Mission ID","CityName intervention", "Latitude intervention", "Longitude intervention", 'T0', 'T1',
       'T1confirmed', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'Intervention time (T1Reported)', 'Waiting time',
       'Intervention duration', 'Departure time (T1Reported)','Name destination hospital',
       'PostalCode destination hospital', 'CityName destination hospital',
       'StreetName destination hospital','Calculated Distance destination','Vector type','EventLevel Firstcall',"Abandon reason"]
info = pd.DataFrame()
for name in intervention_name:
    dataset[name]['Cardiac Call'] = dataset[name]["EventType Firstcall"].map(lambda x: map_call(x))
    temp = dataset[name][dataset[name]["Cardiac Call"]==1]
    if len(temp) == 0:
        continue
    info = pd.concat([info, temp[variables]])
info.dropna(subset = ["Latitude intervention", "Longitude intervention", "Waiting time"],inplace=True)
info.drop_duplicates(subset=["Mission ID","CityName intervention"],inplace=True)
info.index = range(len(info))

  info = pd.concat([info, temp[variables]])
  info = pd.concat([info, temp[variables]])
  info = pd.concat([info, temp[variables]])


In [414]:
# deal with strange character

modifier = {"√©":"e","√™":"e","√¥":"o","√®":"e", "√¢":"a", "√´":"e", "√º":"u", "√†":"a", '√ª':"u"}
for i in range(len(info)):
    if  '√' in info.loc[i,'CityName intervention']:
        for key in modifier.keys():
            info.loc[i,'CityName intervention'] = info.loc[i,'CityName intervention'].replace(key, modifier[key])
    if info.loc[i,'Name destination hospital']:
        if '√' in info.loc[i,'Name destination hospital']:
            for key in modifier.keys():
                info.loc[i,'Name destination hospital'] = info.loc[i,'Name destination hospital'].replace(key, modifier[key])
    if info.loc[i,'CityName destination hospital']:
        if '√' in info.loc[i,'CityName destination hospital']:
            for key in modifier.keys():
                info.loc[i,'CityName destination hospital'] = info.loc[i,'CityName destination hospital'].replace(key, modifier[key])
    if info.loc[i,'StreetName destination hospital']:
        if '√' in info.loc[i,'StreetName destination hospital']:
            for key in modifier.keys():
                info.loc[i,'StreetName destination hospital'] = info.loc[i,'StreetName destination hospital'].replace(key, modifier[key])
 

In [79]:
from geopy.geocoders import Nominatim

# obtain right gps info for cities
citynames = np.unique(info['CityName intervention']).tolist()

geolocator = Nominatim(user_agent="MyApp")

locations = []
for city in citynames:
    location = geolocator.geocode(city)
    if location:
        locations.append([city,location.latitude,location.longitude])

locations = pd.DataFrame(locations)
locations = locations.rename(columns={0:"CityName intervention", 1:"Latitude intervention", 2:"Longitude intervention"})
locations.to_csv('modified_city_name.csv', index = False)

In [374]:
# obtain right gps info for streets
from geopy.geocoders import Nominatim

street_names = np.unique(info[info['StreetName destination hospital'].notna()]['StreetName destination hospital']).tolist()

geolocator = Nominatim(user_agent="MyApp")
locations = []
for street in street_names:
    location = geolocator.geocode(street)
    if location:
        locations.append([street,location.latitude,location.longitude])
locations = pd.DataFrame(locations)
locations = locations.rename(columns={0:"StreetName destination hospital", 1:"Latitude hospital", 2:"Longitude hospital"})
locations.to_csv('StreetInfo_hospital.csv', index = False)

In [415]:
# all cities gps info related to intervention
CityInfo = pd.read_csv('CityInfo_intervention.csv')

In [416]:
StreetInfo = pd.read_csv('StreetInfo_hospital.csv')

In [417]:
def latlon2xy(df,lat,lon):
    df[lat] = df[lat].astype(int)
    df[lat] = df[lat].astype(str).apply(lambda x: float(x[:2] + '.' + x[2:]))
    
    df[lon] = df[lon].astype(int)
    df[lon] = df[lon].astype(str).apply(lambda x: float(x[:1] + '.' + x[1:]))
latlon2xy(info,'Latitude intervention','Longitude intervention')
info

Unnamed: 0,Mission ID,CityName intervention,Latitude intervention,Longitude intervention,T0,T1,T1confirmed,T2,T3,T4,...,Intervention duration,Departure time (T1Reported),Name destination hospital,PostalCode destination hospital,CityName destination hospital,StreetName destination hospital,Calculated Distance destination,Vector type,EventLevel Firstcall,Abandon reason
0,20222490200,Anderlecht (Anderlecht),50.83808,4.30484,2022-09-06 20:31:42.6662526 +02:00,2022-09-06 18:35:43.1039112 +00:00,,2022-09-06 18:38:18.1305717 +00:00,2022-09-06 18:44:39.2860546 +00:00,,...,31.0,3.0,,,,,,MUG,N1,
1,20222500029,Ixelles (Ixelles),50.82480,4.38185,2022-09-07 02:36:15.8297078 +02:00,2022-09-07 00:38:49.8131263 +00:00,,2022-09-07 00:41:59.5629437 +00:00,2022-09-07 00:42:25.6551236 +00:00,,...,,3.0,,,,,,MUG,N1,
2,20222500165,Woluwe-Saint-Pierre (Woluwe-Saint-Pierre),50.83848,4.44441,2022-09-07 12:32:13.5105864 +02:00,2022-09-07 10:35:15.3074814 +00:00,,2022-09-07 10:38:37.7597092 +00:00,2022-09-07 10:49:43.6539502 +00:00,,...,41.0,3.0,,,,,,MUG,N1,
3,20222510199,Molenbeek-Saint-Jean (Molenbeek-Saint-Jean),50.84948,4.32034,2022-09-08 13:40:50.1678147 +02:00,2022-09-08 11:44:34.9786660 +00:00,,2022-09-08 11:47:11.3325823 +00:00,2022-09-08 11:52:58.2204484 +00:00,,...,,3.0,,,,,,MUG,N1,
4,20222530229,Forest (Forest),50.80868,4.30701,2022-09-10 17:18:45.0032241 +02:00,2022-09-10 15:25:43.5555381 +00:00,,2022-09-10 15:28:24.1267088 +00:00,2022-09-10 15:53:33.2198959 +00:00,,...,38.0,3.0,,,,,,MUG,N1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3710,90230980042,Namur (Namur),50.40000,4.83000,08APR23:08:31:39,08APR23:08:34:57,,,,,...,28.0,1.0,,,,,,Ambulance,N1,Overleden
3711,90231120108,Profondeville (Lesve),50.30000,4.70000,22APR23:14:53:28,22APR23:14:57:34,,,,,...,49.0,,HN UR YVOI CHU,,Yvoir (Godinne),Avenue du Docteur Gaston ThÈrasse,12469.0,Ambulance,N5,
3712,90231170055,Dinant (Dinant),50.20000,4.92000,27APR23:10:43:33,27APR23:10:45:17,,,,,...,14.0,2.0,HN UR DINA CHU,,Dinant (Dinant),Rue Saint-Jacques,0.0,Ambulance,N0,
3713,90231200099,EghezÈe (Aische-En-Refail),50.50000,4.80000,30APR23:14:31:51,30APR23:14:33:03,,,,,...,40.0,6.0,HN UR NAMU CHR,,Namur (Namur),Avenue Albert 1er,0.0,Ambulance,N0,


In [426]:
temp = info.merge(CityInfo,on='CityName intervention',how='outer')
df = temp.merge(StreetInfo,on='StreetName destination hospital',how='outer')

In [427]:
df.index = range(len(df))

for i in range(len(df)):
    if abs(df.loc[i,'Latitude intervention_y']-df.loc[i,'Latitude intervention_x']) >0.2 or abs(df.loc[i,'Longitude intervention_y']-df.loc[i,'Longitude intervention_x'])>0.2:
        df.loc[i,'Latitude intervention_x'] = df.loc[i,'Latitude intervention_y']
        df.loc[i,'Longitude intervention_x'] = df.loc[i,'Longitude intervention_y']
        
df = df[['Mission ID','CityName intervention','Latitude intervention_x','Longitude intervention_x','T0', 'T1',
       'T1confirmed', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'Intervention time (T1Reported)', 'Waiting time',
       'Intervention duration', 'Departure time (T1Reported)','Name destination hospital',
       'PostalCode destination hospital', 'CityName destination hospital',
       'StreetName destination hospital','Latitude hospital','Longitude hospital','Calculated Distance destination','Vector type','EventLevel Firstcall',"Abandon reason"]]
df = df.rename(columns={'Latitude intervention_x':'Latitude intervention','Longitude intervention_x':'Longitude intervention'})
df.to_csv('intervention.csv',index=False)