In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
itv_aed = pd.read_csv(
    '/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/CLEANED/intervention_aed_related_distance.csv',
    low_memory=False)

itv_aed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105841 entries, 0 to 105840
Data columns (total 58 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   mission_id                        105841 non-null  int64  
 1   service_name                      93471 non-null   object 
 2   postalcode_permanence             65151 non-null   float64
 3   cityname_permanence               69218 non-null   object 
 4   streetname_permanence             69623 non-null   object 
 5   housenumber_permanence            2667 non-null    float64
 6   latitude_permanence               97007 non-null   float64
 7   longitude_permanence              97690 non-null   float64
 8   permanence_short_name             105670 non-null  object 
 9   permanence_long_name              93486 non-null   object 
 10  vector_type                       104009 non-null  object 
 11  eventtype_firstcall               65569 non-null   o

In [3]:
## Datetime validation

itv_aed['t0'] = pd.to_datetime(itv_aed['t0'])
itv_aed['t3'] = pd.to_datetime(itv_aed['t3'])
itv_aed['t5'] = pd.to_datetime(itv_aed['t5'])
itv_aed['t7'] = pd.to_datetime(itv_aed['t7'])
itv_aed['t3-t0'] = (itv_aed['t3'] - itv_aed['t0']).dt.total_seconds() / 60
itv_aed['t5-t0'] = (itv_aed['t5'] - itv_aed['t0']).dt.total_seconds() / 60
itv_aed['total_time'] = (itv_aed['t7'] - itv_aed['t0']).dt.total_seconds() / 60

itv_aed.loc[(itv_aed['t3-t0'] <= 0) | (itv_aed['t3-t0'] > 6 * 60), 't3-t0'] = np.nan
itv_aed.loc[(itv_aed['t5-t0'] <= 0) | (itv_aed['t5-t0'] > 12 * 60) | (itv_aed['t5-t0'] <= itv_aed['t3-t0']), 't5-t0'] = np.nan
itv_aed.loc[(itv_aed['waiting_time'] <= 0) | (itv_aed['waiting_time'] > 6 * 60), 'waiting_time'] = np.nan
itv_aed['t3-t0'] = itv_aed.apply(
    lambda x: x['waiting_time']
    if pd.isna(x['t3-t0']) and pd.notna(x['waiting_time']) else x['t3-t0'],
    axis=1)

itv_aed.loc[(itv_aed['total_time']<=0) | (itv_aed['total_time'] <= itv_aed['t5-t0']), 'total_time'] = np.nan
itv_aed.drop(columns=['waiting_time'], inplace=True)
itv_aed.rename(columns={'t3-t0': 'waiting_time', 't5-t0': 'time_to_hospital'}, inplace=True) ## rename to waiting_time & time_to_hospital

itv_aed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105841 entries, 0 to 105840
Data columns (total 60 columns):
 #   Column                            Non-Null Count   Dtype         
---  ------                            --------------   -----         
 0   mission_id                        105841 non-null  int64         
 1   service_name                      93471 non-null   object        
 2   postalcode_permanence             65151 non-null   float64       
 3   cityname_permanence               69218 non-null   object        
 4   streetname_permanence             69623 non-null   object        
 5   housenumber_permanence            2667 non-null    float64       
 6   latitude_permanence               97007 non-null   float64       
 7   longitude_permanence              97690 non-null   float64       
 8   permanence_short_name             105670 non-null  object        
 9   permanence_long_name              93486 non-null   object        
 10  vector_type                     

In [4]:
## drop duplicates and missing values
itv_aed.drop_duplicates(subset=['mission_id'], inplace=True)
itv_aed.dropna(subset=[
    'waiting_time', 'total_time', 'time_to_hospital', 'aed_distance', 'hospital_distance',
    'province', 'vector_type', 'eventlevel_trip'
], inplace=True)

## validate vector_type
itv_aed['vector_type'] = itv_aed['vector_type'].apply(lambda x: "AMBULANCE" if any(
    i in x.lower() for i in ["ambulance", "amb"]) else 'MUG' if any(
    i in x.lower() for i in ['mug']) else 'PIT' if any(
    i in x.lower() for i in ['pit']) else x)

itv_aed.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22651 entries, 5 to 105258
Data columns (total 60 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   mission_id                        22651 non-null  int64         
 1   service_name                      22651 non-null  object        
 2   postalcode_permanence             21667 non-null  float64       
 3   cityname_permanence               22598 non-null  object        
 4   streetname_permanence             22647 non-null  object        
 5   housenumber_permanence            1027 non-null   float64       
 6   latitude_permanence               22629 non-null  float64       
 7   longitude_permanence              22545 non-null  float64       
 8   permanence_short_name             22651 non-null  object        
 9   permanence_long_name              22651 non-null  object        
 10  vector_type                       22651 non-null  

In [5]:
## Delete 'provincie' in province column
itv_aed['province'] = itv_aed['province'].apply(lambda x: x.replace('Provincie ', ''))
itv_aed['province'].value_counts()

province
Antwerpen                         5977
Brussels Hoofdstedelijk Gewest    4413
Henegouwen                        3623
Luik                              2739
Limburg                           2540
Namen                             1252
Waals-Brabant                     1026
Luxemburg                          892
Vlaams-Brabant                     174
Oost-Vlaanderen                      8
West-Vlaanderen                      7
Name: count, dtype: int64

In [6]:
itv_aed.to_csv('/Users/lye/Downloads/MDA/Github-MDA2024/1_Data/CLEANED/clean_itv_aed_for_analysis.csv', index=False)

In [7]:
columns_to_keep = [
    'aed_distance', 'hospital_distance', 'waiting_time', 'time_to_hospital', 'total_time', 'province', 'vector_type', 'eventlevel_trip', 'eventtype_trip'
]

itv_reg = itv_aed[columns_to_keep].copy()
itv_reg.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22651 entries, 5 to 105258
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   aed_distance       22651 non-null  float64
 1   hospital_distance  22651 non-null  float64
 2   waiting_time       22651 non-null  float64
 3   time_to_hospital   22651 non-null  float64
 4   total_time         22651 non-null  float64
 5   province           22651 non-null  object 
 6   vector_type        22651 non-null  object 
 7   eventlevel_trip    22651 non-null  object 
 8   eventtype_trip     22651 non-null  object 
dtypes: float64(5), object(4)
memory usage: 1.7+ MB


In [9]:
itv_reg = pd.get_dummies(itv_reg, columns=['province', 'vector_type', 'eventlevel_trip', 'eventtype_trip'], drop_first=True)
itv_reg.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22651 entries, 5 to 105258
Data columns (total 26 columns):
 #   Column                                                            Non-Null Count  Dtype  
---  ------                                                            --------------  -----  
 0   aed_distance                                                      22651 non-null  float64
 1   hospital_distance                                                 22651 non-null  float64
 2   waiting_time                                                      22651 non-null  float64
 3   time_to_hospital                                                  22651 non-null  float64
 4   total_time                                                        22651 non-null  float64
 5   province_Brussels Hoofdstedelijk Gewest                           22651 non-null  bool   
 6   province_Henegouwen                                               22651 non-null  bool   
 7   province_Limburg                   

In [10]:
itv_reg.columns

Index(['aed_distance', 'hospital_distance', 'waiting_time', 'time_to_hospital',
       'total_time', 'province_Brussels Hoofdstedelijk Gewest',
       'province_Henegouwen', 'province_Limburg', 'province_Luik',
       'province_Luxemburg', 'province_Namen', 'province_Oost-Vlaanderen',
       'province_Vlaams-Brabant', 'province_Waals-Brabant',
       'province_West-Vlaanderen', 'vector_type_MUG', 'vector_type_PIT',
       'eventlevel_trip_N1', 'eventlevel_trip_N2', 'eventlevel_trip_N3',
       'eventlevel_trip_N4', 'eventlevel_trip_N5', 'eventlevel_trip_N6',
       'eventlevel_trip_N7A', 'eventtype_trip_P019 - Unconscious - syncope',
       'eventtype_trip_P039 - Cardiac problem (other than thoracic pain)'],
      dtype='object')

In [12]:
## OLS regression on total_time

import statsmodels.api as sm

X_var = ['aed_distance', 'hospital_distance', 'province_Brussels Hoofdstedelijk Gewest',
       'province_Henegouwen', 'province_Limburg', 'province_Luik',
       'province_Luxemburg', 'province_Namen', 'province_Oost-Vlaanderen',
       'province_Vlaams-Brabant', 'province_Waals-Brabant',
       'province_West-Vlaanderen', 'vector_type_MUG', 'vector_type_PIT',
       'eventlevel_trip_N1', 'eventlevel_trip_N2', 'eventlevel_trip_N3',
       'eventlevel_trip_N4', 'eventlevel_trip_N5', 'eventlevel_trip_N6',
       'eventlevel_trip_N7A', 'eventtype_trip_P019 - Unconscious - syncope',
       'eventtype_trip_P039 - Cardiac problem (other than thoracic pain)']

y_var = 'total_time'

X = itv_reg[X_var]
y = itv_reg[y_var]
X = sm.add_constant(X)

model = sm.OLS(y, X.astype(float)).fit()
model.summary()


0,1,2,3
Dep. Variable:,total_time,R-squared:,0.217
Model:,OLS,Adj. R-squared:,0.216
Method:,Least Squares,F-statistic:,272.9
Date:,"Sat, 04 May 2024",Prob (F-statistic):,0.0
Time:,19:15:42,Log-Likelihood:,-105800.0
No. Observations:,22651,AIC:,211600.0
Df Residuals:,22627,BIC:,211800.0
Df Model:,23,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,72.7558,2.056,35.380,0.000,68.725,76.786
aed_distance,0.0009,0.000,2.807,0.005,0.000,0.002
hospital_distance,0.0027,4.67e-05,58.423,0.000,0.003,0.003
province_Brussels Hoofdstedelijk Gewest,2.8917,0.544,5.317,0.000,1.826,3.958
province_Henegouwen,0.3658,0.558,0.655,0.512,-0.729,1.460
province_Limburg,2.1334,0.629,3.394,0.001,0.901,3.366
province_Luik,-1.9358,0.601,-3.219,0.001,-3.115,-0.757
province_Luxemburg,-0.9236,0.975,-0.947,0.343,-2.835,0.987
province_Namen,-0.3952,0.837,-0.472,0.637,-2.036,1.245

0,1,2,3
Omnibus:,18468.946,Durbin-Watson:,1.995
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1591241.931
Skew:,3.377,Prob(JB):,0.0
Kurtosis:,43.502,Cond. No.,574000.0


In [13]:
## OLS regression on waiting_time

X = itv_reg[X_var]
y = itv_reg['waiting_time']
X = sm.add_constant(X)

model = sm.OLS(y, X.astype(float)).fit()
model.summary()

0,1,2,3
Dep. Variable:,waiting_time,R-squared:,0.052
Model:,OLS,Adj. R-squared:,0.051
Method:,Least Squares,F-statistic:,54.16
Date:,"Sat, 04 May 2024",Prob (F-statistic):,5.259999999999999e-242
Time:,19:16:07,Log-Likelihood:,-81071.0
No. Observations:,22651,AIC:,162200.0
Df Residuals:,22627,BIC:,162400.0
Df Model:,23,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,9.8046,0.690,14.206,0.000,8.452,11.157
aed_distance,0.0009,0.000,7.622,0.000,0.001,0.001
hospital_distance,0.0003,1.57e-05,17.733,0.000,0.000,0.000
province_Brussels Hoofdstedelijk Gewest,1.9930,0.183,10.918,0.000,1.635,2.351
province_Henegouwen,1.2902,0.187,6.884,0.000,0.923,1.657
province_Limburg,-0.0686,0.211,-0.325,0.745,-0.482,0.345
province_Luik,-0.1581,0.202,-0.783,0.433,-0.554,0.238
province_Luxemburg,-0.4420,0.327,-1.351,0.177,-1.083,0.199
province_Namen,-0.4676,0.281,-1.665,0.096,-1.018,0.083

0,1,2,3
Omnibus:,25730.922,Durbin-Watson:,1.982
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5315224.48
Skew:,5.639,Prob(JB):,0.0
Kurtosis:,77.193,Cond. No.,574000.0


In [14]:
## OLS regression on time_to_hospital

X = itv_reg[X_var]
y = itv_reg['time_to_hospital']
X = sm.add_constant(X)

model = sm.OLS(y, X.astype(float)).fit()
model.summary()

0,1,2,3
Dep. Variable:,time_to_hospital,R-squared:,0.24
Model:,OLS,Adj. R-squared:,0.24
Method:,Least Squares,F-statistic:,311.2
Date:,"Sat, 04 May 2024",Prob (F-statistic):,0.0
Time:,19:16:30,Log-Likelihood:,-95054.0
No. Observations:,22651,AIC:,190200.0
Df Residuals:,22627,BIC:,190300.0
Df Model:,23,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,42.3833,1.280,33.123,0.000,39.875,44.891
aed_distance,0.0013,0.000,5.978,0.000,0.001,0.002
hospital_distance,0.0016,2.91e-05,54.707,0.000,0.002,0.002
province_Brussels Hoofdstedelijk Gewest,3.3672,0.338,9.949,0.000,2.704,4.031
province_Henegouwen,2.3711,0.347,6.824,0.000,1.690,3.052
province_Limburg,1.6421,0.391,4.198,0.000,0.875,2.409
province_Luik,0.2130,0.374,0.569,0.569,-0.520,0.947
province_Luxemburg,-0.9284,0.607,-1.530,0.126,-2.118,0.261
province_Namen,1.3772,0.521,2.645,0.008,0.357,2.398

0,1,2,3
Omnibus:,27054.748,Durbin-Watson:,2.006
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19996807.952
Skew:,5.694,Prob(JB):,0.0
Kurtosis:,148.114,Cond. No.,574000.0


In [15]:
itv_aed['abandon_reason'].value_counts()

abandon_reason
Weigering van vervoer    19
Overleden                19
Error                     7
Verzorgd ter plaatse      6
Weigering vervoer         4
Geannuleerd               2
Vervoerd door politie     2
Vervoerd door derden      1
Zonder patient            1
Vervoer door derden       1
Name: count, dtype: int64