In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from ipywidgets import interact, interactive, Dropdown, IntRangeSlider
from IPython.display import display, clear_output

In [2]:
import pip

In [3]:
!pip install plotly



In [4]:
import plotly.express as px
import plotly.io as pio
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

In [5]:
pio.renderers.default = 'iframe_connected'

In [42]:
raw_data_file_path = "C:/Users/linda/Downloads/EMS_Incident_Dispatch_Data.csv"

In [49]:
def import_and_clean_EMS_data(file=raw_data_file_path):
    df=pd.read_csv(file, skiprows=range(1,27000000))#nrows=1000000)
    print('read_csv: ', df.shape)
    
    df.drop(['POLICEPRECINCT','ZIPCODE','CITYCOUNCILDISTRICT', 'COMMUNITYDISTRICT', 'COMMUNITYSCHOOLDISTRICT',
              'CONGRESSIONALDISTRICT', 'REOPEN_INDICATOR','STANDBY_INDICATOR','SPECIAL_EVENT_INDICATOR',
             'TRANSFER_INDICATOR', 'VALID_INCIDENT_RSPNS_TIME_INDC'],axis=1,inplace=True)
    print('drop columns: ', df.shape)
    
    df.dropna(subset=['FIRST_ASSIGNMENT_DATETIME','FIRST_ACTIVATION_DATETIME',
                       'FIRST_ON_SCENE_DATETIME','INCIDENT_DISPATCH_AREA'],inplace=True)
    print('drop na: ', df.shape)
        
    time_columns = ['INCIDENT_DATETIME', 'FIRST_ASSIGNMENT_DATETIME', 'FIRST_ACTIVATION_DATETIME',
                'FIRST_ON_SCENE_DATETIME', 'FIRST_TO_HOSP_DATETIME', 'FIRST_HOSP_ARRIVAL_DATETIME',
                'INCIDENT_CLOSE_DATETIME']
    for column in time_columns:
        df[column] = pd.to_datetime(df[column], format="%m/%d/%Y %I:%M:%S %p", errors='coerce')
    print('change to datetime: ', df.shape)

    df.rename(columns={'DISPATCH_RESPONSE_SECONDS_QY':'INCIDENT_TO_ASSIGNMENT_SEC'}, inplace=True)
    df.rename(columns={'INCIDENT_RESPONSE_SECONDS_QY':'INCIDENT_TO_ON_SCENE_SEC'}, inplace=True)
    
    df['ASSIGNMENT_TO_ACTIVATION_SEC'] = (df['FIRST_ACTIVATION_DATETIME'] - 
                                          df['FIRST_ASSIGNMENT_DATETIME']).dt.total_seconds().astype(float)
    df['ACTIVATION_TO_ON_SCENE_SEC'] = (df['FIRST_ON_SCENE_DATETIME'] - 
                                        df['FIRST_ACTIVATION_DATETIME']).dt.total_seconds().astype(float)
    df['INCIDENT_TO_ACTIVATION_SEC'] = df['INCIDENT_TO_ASSIGNMENT_SEC'] + df['ASSIGNMENT_TO_ACTIVATION_SEC']
    df['INCIDENT_SEC'] = 0
    print('add time columns: ', df.shape)
    
    df = df.loc[(df['INCIDENT_TO_ASSIGNMENT_SEC'] > 0) & 
    (df['ASSIGNMENT_TO_ACTIVATION_SEC'] > 0) & 
    (df['ACTIVATION_TO_ON_SCENE_SEC'] > 0)
    ]
    print('filter for >0: ', df.shape)

    df=df[(sp.stats.zscore(df['INCIDENT_TO_ASSIGNMENT_SEC'], nan_policy='omit').abs()<3) &
    (sp.stats.zscore(df['ASSIGNMENT_TO_ACTIVATION_SEC'], nan_policy='omit').abs()<3) &
    (sp.stats.zscore(df['ACTIVATION_TO_ON_SCENE_SEC'], nan_policy='omit').abs()<3)
    ]
    print('exclude outliers: ', df.shape)

    return df

In [50]:
cleaned_df = import_and_clean_EMS_data()
cleaned_df.describe()

read_csv:  (1140888, 31)
drop columns:  (1140888, 20)
drop na:  (1079714, 20)
change to datetime:  (1079714, 20)
add time columns:  (1079714, 24)
filter for >0:  (1060007, 24)
exclude outliers:  (1031670, 24)


Unnamed: 0,CAD_INCIDENT_ID,INCIDENT_DATETIME,INITIAL_SEVERITY_LEVEL_CODE,FINAL_SEVERITY_LEVEL_CODE,FIRST_ASSIGNMENT_DATETIME,INCIDENT_TO_ASSIGNMENT_SEC,FIRST_ACTIVATION_DATETIME,FIRST_ON_SCENE_DATETIME,INCIDENT_TO_ON_SCENE_SEC,INCIDENT_TRAVEL_TM_SECONDS_QY,FIRST_TO_HOSP_DATETIME,FIRST_HOSP_ARRIVAL_DATETIME,INCIDENT_CLOSE_DATETIME,ASSIGNMENT_TO_ACTIVATION_SEC,ACTIVATION_TO_ON_SCENE_SEC,INCIDENT_TO_ACTIVATION_SEC,INCIDENT_SEC
count,1031670.0,1031670,1031670.0,1031670.0,1031670,1031670.0,1031670,1031670,1031670.0,1031670.0,706805,703224,1031535,1031670.0,1031670.0,1031670.0,1031670.0
mean,216326800.0,2021-12-28 14:04:18.045415168,4.116335,4.102795,2021-12-28 14:06:16.769287936,118.7239,2021-12-28 14:06:42.974023168,2021-12-28 14:15:23.453364992,665.408,546.6841,2021-09-24 21:50:46.995140096,2021-09-29 02:20:28.666271488,2021-12-28 18:21:39.000597504,26.20474,520.4793,144.9286,0.0
min,102962200.0,2010-10-23 15:56:28,1.0,1.0,2010-10-23 15:56:58,1.0,2010-10-23 15:57:02,2010-10-23 16:00:25,10.0,4.0,2010-10-23 16:10:48,2010-10-23 16:14:33,2010-10-23 16:12:03,1.0,1.0,5.0,0.0
25%,242864700.0,2024-10-12 20:32:38,3.0,3.0,2024-10-12 20:34:36.249999872,14.0,2024-10-12 20:35:20,2024-10-12 20:44:16.500000,353.0,314.0,2024-10-08 10:10:51,2024-10-08 13:47:56,2024-10-12 21:59:04.500000,10.0,292.0,32.0,0.0
50%,243525800.0,2024-12-17 23:17:51,4.0,4.0,2024-12-17 23:18:43,27.0,2024-12-17 23:19:00,2024-12-17 23:27:38.500000,509.0,455.0,2024-12-15 15:30:25,2024-12-15 19:00:21,2024-12-18 00:45:16,17.0,429.0,52.0,0.0
75%,250542500.0,2025-02-23 13:27:18.750000128,5.0,5.0,2025-02-23 13:27:43.249999872,61.0,2025-02-23 13:28:04.750000128,2025-02-23 13:36:09,768.0,665.0,2025-02-20 18:38:57,2025-02-20 20:14:45.750000128,2025-02-23 14:41:49,30.0,635.0,98.0,0.0
max,251205800.0,2025-04-30 23:59:53,9.0,8.0,2025-05-01 00:03:04,3122.0,2025-05-01 00:03:29,2025-05-01 00:14:17,5620.0,2782.0,2025-05-01 01:44:30,2025-05-01 02:08:00,2025-05-01 02:37:26,319.0,2504.0,3341.0,0.0
std,59385890.0,,1.67193,1.673536,,336.705,,,530.6656,355.7224,,,,28.91789,352.1203,340.1997,0.0


In [26]:
def create_EMS_line_chart(df=cleaned_df, line_col='BOROUGH'):
    
    pivot = pd.pivot_table(df, values=['INCIDENT_SEC','INCIDENT_TO_ASSIGNMENT_SEC', 'INCIDENT_TO_ACTIVATION_SEC','INCIDENT_TO_ON_SCENE_SEC'], 
                                   columns=line_col, aggfunc='mean', sort=False)
    pivot = round(pivot / 60, 2)
    pivot = pivot.rename(index={'INCIDENT_SEC':'Incident is<br>Reported',
                                'INCIDENT_TO_ASSIGNMENT_SEC':'Incident Assigned to<br>Response Unit',
                                'INCIDENT_TO_ACTIVATION_SEC':'Response Unit<br>Enroute',
                                'INCIDENT_TO_ON_SCENE_SEC':'Response Unit<br>Arrives'})
    
    EMS_line_chart=px.line(pivot, color=line_col, title='Cumulative EMS Response Time by Borough',
                           markers=True)
    
    EMS_line_chart.update_layout(
        xaxis_title='EMS Incident Stage',
        yaxis_title='Time Elapsed in Minutes',
        legend=dict(font=dict(size=9))
    )
    
    return EMS_line_chart

In [27]:
def create_EMS_box_plot_chart(df=cleaned_df, box_x='BOROUGH', box_y='INCIDENT_TO_ON_SCENE_SEC', box_root=3):
    
    df['box_root_trans'] = np.power(df[box_y], 1/box_root)
    
    EMS_box_plot_chart = px.box(df, x=box_x, y='box_root_trans', color=box_x, title='Response Time by<br>{0}'.format(box_x))

    EMS_box_plot_chart.update_layout(
        yaxis_title='1/{0} Power Transformation of<br>{1}'.format(box_root, box_y),
        legend=dict(font=dict(size=9))
    )
    
    return EMS_box_plot_chart

In [28]:
def create_dispatch_area_scatter_plot(df=cleaned_df, scatter_y='INCIDENT_TO_ON_SCENE_SEC'):

    da_counts = df['INCIDENT_DISPATCH_AREA'].value_counts()
    counts_df = da_counts.reset_index()
    counts_df.columns=['Dispatch_Area', 'Count']
    counts_df.set_index('Dispatch_Area', inplace=True)

    da_pivot = pd.pivot_table(df, values=['INCIDENT_TO_ASSIGNMENT_SEC','ASSIGNMENT_TO_ACTIVATION_SEC',
                                                'ACTIVATION_TO_ON_SCENE_SEC', 'INCIDENT_TO_ON_SCENE_SEC'],
                                    index='INCIDENT_DISPATCH_AREA', aggfunc='mean', sort=False)

    da_df = pd.concat([counts_df, da_pivot], axis=1)
    da_df.reset_index(inplace=True, names='Dispatch_Area')

    da_scatter_plot = px.scatter(da_df, x='Count', y=scatter_y, hover_data=['Dispatch_Area'],
                                      title='Number of Incidents vs. Mean Response Time<br>for NYC Dispatch Areas')

    da_scatter_plot.update_layout(
        xaxis_title='Number of Incidents',
        yaxis_title='Mean Response Time',
        legend=dict(font=dict(size=9))
    )

    return da_scatter_plot

In [29]:
def create_hist_kde(df=cleaned_df, hist_borough='MANHATTAN', hist_x='INCIDENT_TO_ON_SCENE_SEC', hist_root=3):
    
    group_label = [hist_borough]
    
    hist_df=df[df['BOROUGH']==hist_borough]
    hist_df['hist_root_trans'] = np.power(hist_df[hist_x], 1/hist_root)

    num_bins = 25
    bin_size = (hist_df['hist_root_trans'].max() - hist_df['hist_root_trans'].min()) / num_bins
    
    hist_df = hist_df['hist_root_trans']
    
    hist_kde = ff.create_distplot([hist_df], group_labels=group_label, bin_size=bin_size)

    return hist_kde

In [30]:
test_chart = create_dispatch_area_scatter_plot()
test_chart.show()

In [36]:
def create_static_dashboard(df=cleaned_df, line_col='BOROUGH', box_x='BOROUGH', box_y='INCIDENT_TO_ON_SCENE_SEC', box_root=3,
                            scatter_y='INCIDENT_TO_ON_SCENE_SEC',hist_borough='MANHATTAN', hist_x='INCIDENT_TO_ON_SCENE_SEC', hist_root=3):

    #CREATE EMPTY DASHBOARD
    subplot_titles = ['Cumulative EMS Response Time', 'Response Time by<br>{0}'.format(box_x),
                      'Number of Incidents vs. Mean Response Time<br>for NYC Dispatch Areas',
                      'Histogram of Response Times for {0}'.format(hist_borough)]
    dashboard = make_subplots(rows=2, cols=2, vertical_spacing=0.3, horizontal_spacing=0.3,
                             subplot_titles=subplot_titles)
    dashboard.update_layout(width=1200, height=800, title_text='EMS Response Times<br>in New York City', title_x=0.4)

    #LINE CHART
    pivot = pd.pivot_table(df, values=['INCIDENT_SEC','INCIDENT_TO_ASSIGNMENT_SEC', 'INCIDENT_TO_ACTIVATION_SEC','INCIDENT_TO_ON_SCENE_SEC'], 
                                   columns=line_col, aggfunc='mean', sort=False)
    pivot = round(pivot / 60, 2)
    pivot = pivot.rename(index={'INCIDENT_SEC':'Incident is<br>Reported',
                                'INCIDENT_TO_ASSIGNMENT_SEC':'Incident Assigned to<br>Response Unit',
                                'INCIDENT_TO_ACTIVATION_SEC':'Response Unit<br>Enroute',
                                'INCIDENT_TO_ON_SCENE_SEC':'Response Unit<br>Arrives'})
    
    EMS_line_chart=px.line(pivot, color=line_col, title='Cumulative EMS Response Time',
                           markers=True)
    
    dashboard.update_xaxes(title_text="EMS Incident Stage", row=1, col=1)
    dashboard.update_yaxes(title_text="Time Elapsed in Minutes", row=1, col=1)


    #BOX PLOT CHART
    df['box_root_trans'] = np.power(df[box_y], 1/box_root)
    
    EMS_box_plot_chart = px.box(df, x=box_x, y='box_root_trans', color=box_x)

    EMS_box_plot_chart.update_layout(
        legend=dict(font=dict(size=9))
    )

    dashboard.update_xaxes(title_text="{0}".format(box_x), row=1, col=2)
    dashboard.update_yaxes(title_text='1/{0} Power Transformation of<br>{1}'.format(box_root, box_y), row=1, col=2)

    
    #SCATTER PLOT
    da_counts = df['INCIDENT_DISPATCH_AREA'].value_counts()
    counts_df = da_counts.reset_index()
    counts_df.columns=['Dispatch_Area', 'Count']
    counts_df.set_index('Dispatch_Area', inplace=True)

    da_pivot = pd.pivot_table(df, values=['INCIDENT_TO_ASSIGNMENT_SEC','ASSIGNMENT_TO_ACTIVATION_SEC',
                                                'ACTIVATION_TO_ON_SCENE_SEC', 'INCIDENT_TO_ON_SCENE_SEC'],
                                    index='INCIDENT_DISPATCH_AREA', aggfunc='mean', sort=False)

    da_df = pd.concat([counts_df, da_pivot], axis=1)
    da_df.reset_index(inplace=True, names='Dispatch_Area')

    da_scatter_plot = px.scatter(da_df, x='Count', y=scatter_y, hover_data=['Dispatch_Area'])

    #da_scatter_plot.update_layout(
    #    legend=dict(font=dict(size=9))
    #)

    dashboard.update_xaxes(title_text='Number of Incidents', row=2, col=1)
    dashboard.update_yaxes(title_text='Mean Response Time of<br>{0}'.format(scatter_y), row=2, col=1)


    #HISTOGRAM
    group_label = [hist_borough]
    
    hist_df=df[df['BOROUGH']==hist_borough]
    hist_df['hist_root_trans'] = np.power(hist_df[hist_x], 1/hist_root)

    num_bins = 25
    bin_size = (hist_df['hist_root_trans'].max() - hist_df['hist_root_trans'].min()) / num_bins
    
    hist_df = hist_df['hist_root_trans']
    
    hist_kde = ff.create_distplot([hist_df], group_labels=group_label, bin_size=bin_size)

    dashboard.update_xaxes(title_text='1/{0} Power Transformation of<br>{1}'.format(hist_root, hist_x), row=2, col=2)
    dashboard.update_yaxes(title_text='Frequency', row=2, col=2)


    #ADD CHARTS TO DASHBOARD
    for trace in EMS_line_chart.data:
        trace.showlegend=False
        dashboard.add_trace(trace, row=1, col=1)

    for trace in EMS_box_plot_chart.data:
        dashboard.add_trace(trace, row=1, col=2)

    for trace in da_scatter_plot.data:
        trace.showlegend=False
        dashboard.add_trace(trace, row=2, col=1)

    for trace in hist_kde.data:
        trace.showlegend=False
        dashboard.add_trace(trace, row=2, col=2)

    return dashboard

create_static_dashboard()

In [37]:
def create_interactive_dashboard():

    categorical_cols = ['BOROUGH', 'INITIAL_SEVERITY_LEVEL_CODE']
    response_time_cols = ['INCIDENT_TO_ASSIGNMENT_SEC','ASSIGNMENT_TO_ACTIVATION_SEC',
                          'ACTIVATION_TO_ON_SCENE_SEC', 'INCIDENT_TO_ON_SCENE_SEC']
    boroughs=cleaned_df['BOROUGH'].unique().tolist()
    
    line_dropdown = Dropdown(
        options=categorical_cols, 
        value='BOROUGH', 
        description='Line Chart:',
        style={'description_width':'initial'})

    box_x_dropdown = Dropdown(
        options=categorical_cols, 
        value='BOROUGH', 
        description='Box Chart x:',
        style={'description_width':'initial'})

    box_y_dropdown = Dropdown(
        options=response_time_cols, 
        value='INCIDENT_TO_ON_SCENE_SEC', 
        description='Box Chart y:',
        style={'description_width':'initial'})

    box_root_dropdown = Dropdown(
        options=[1,2,3,4],
        value=3, 
        description='Box Plot Root Transformation, 1/_',
        style={'description_width':'initial'})

    scatter_y_dropdown = Dropdown(
        options=response_time_cols, 
        value='INCIDENT_TO_ON_SCENE_SEC', 
        description='Scatter Plot y:',
        style={'description_width':'initial'})

    hist_borough_dropdown = Dropdown(
        options=boroughs, 
        value='MANHATTAN', 
        description='Histogram Borough:',
        style={'description_width':'initial'})

    hist_x_dropdown = Dropdown(
        options=response_time_cols, 
        value='INCIDENT_TO_ON_SCENE_SEC', 
        description='Histogram x:',
        style={'description_width':'initial'})

    hist_root_dropdown = Dropdown(
        options=[1,2,3,4],
        value=3, 
        description='Histogram Root Transformation, 1/_',
        style={'description_width':'initial'})

    def update(line_col, box_x, box_y, box_root, scatter_y, hist_borough, hist_x, hist_root):
        
        clear_output(wait=True)
        
        dashboard = create_static_dashboard(
            cleaned_df,
            line_col=line_col,
            box_x=box_x,
            box_y=box_y,
            box_root=box_root,
            scatter_y=scatter_y,
            hist_borough=hist_borough, 
            hist_x=hist_x,
            hist_root=hist_root
        )
        dashboard.show()

    interact(
        update,                    
        line_col=line_dropdown,
        box_x=box_x_dropdown,
        box_y=box_y_dropdown,
        box_root=box_root_dropdown,
        scatter_y=scatter_y_dropdown,
        hist_borough=hist_borough_dropdown, 
        hist_x=hist_x_dropdown, 
        hist_root=hist_root_dropdown
    )

In [38]:
create_interactive_dashboard()

interactive(children=(Dropdown(description='Line Chart:', options=('BOROUGH', 'INITIAL_SEVERITY_LEVEL_CODE'), …

In [18]:
#!pip freeze > requirements.txt