In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from ipywidgets import interact, interactive, Dropdown, IntRangeSlider
from IPython.display import display, clear_output

In [2]:
import pip

In [3]:
!pip install plotly



In [4]:
import plotly.express as px
import plotly.io as pio
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

In [5]:
pio.renderers.default = 'iframe_connected'

In [6]:
raw_data_file_path = "C:/Users/linda/Downloads/EMS_Incident_Dispatch_Data.csv"

In [7]:
def import_and_clean_EMS_data(file=raw_data_file_path):
    df=pd.read_csv(file, nrows=1000000)
    print('read_csv: ', df.shape)
    
    df.drop(['POLICEPRECINCT','ZIPCODE','CITYCOUNCILDISTRICT', 'COMMUNITYDISTRICT', 'COMMUNITYSCHOOLDISTRICT',
              'CONGRESSIONALDISTRICT', 'REOPEN_INDICATOR','STANDBY_INDICATOR','SPECIAL_EVENT_INDICATOR',
             'TRANSFER_INDICATOR', 'VALID_INCIDENT_RSPNS_TIME_INDC'],axis=1,inplace=True)
    print('drop columns: ', df.shape)
    
    df.dropna(subset=['FIRST_ASSIGNMENT_DATETIME','FIRST_ACTIVATION_DATETIME',
                       'FIRST_ON_SCENE_DATETIME','INCIDENT_DISPATCH_AREA','BOROUGH'],inplace=True)
    print('drop na: ', df.shape)

    df=df[df['BOROUGH']!='UNKNOWN']
    print('remove unknown boroughs: ', df.shape)
        
    time_columns = ['INCIDENT_DATETIME', 'FIRST_ASSIGNMENT_DATETIME', 'FIRST_ACTIVATION_DATETIME',
                'FIRST_ON_SCENE_DATETIME', 'FIRST_TO_HOSP_DATETIME', 'FIRST_HOSP_ARRIVAL_DATETIME',
                'INCIDENT_CLOSE_DATETIME']
    for column in time_columns:
        df[column] = pd.to_datetime(df[column], format="%m/%d/%Y %I:%M:%S %p", errors='coerce')
    print('change to datetime: ', df.shape)

    df.rename(columns={'DISPATCH_RESPONSE_SECONDS_QY':'INCIDENT_TO_ASSIGNMENT_SEC'}, inplace=True)
    df.rename(columns={'INCIDENT_RESPONSE_SECONDS_QY':'INCIDENT_TO_ON_SCENE_SEC'}, inplace=True)
    
    df['ASSIGNMENT_TO_ACTIVATION_SEC'] = (df['FIRST_ACTIVATION_DATETIME'] - 
                                          df['FIRST_ASSIGNMENT_DATETIME']).dt.total_seconds().astype(float)
    df['ACTIVATION_TO_ON_SCENE_SEC'] = (df['FIRST_ON_SCENE_DATETIME'] - 
                                        df['FIRST_ACTIVATION_DATETIME']).dt.total_seconds().astype(float)
    df['INCIDENT_TO_ACTIVATION_SEC'] = df['INCIDENT_TO_ASSIGNMENT_SEC'] + df['ASSIGNMENT_TO_ACTIVATION_SEC']
    df['INCIDENT_SEC'] = 0
    print('add time columns: ', df.shape)
    
    df = df.loc[(df['INCIDENT_TO_ASSIGNMENT_SEC'] > 0) & 
    (df['ASSIGNMENT_TO_ACTIVATION_SEC'] > 0) & 
    (df['ACTIVATION_TO_ON_SCENE_SEC'] > 0)
    ]
    print('filter for >0: ', df.shape)

    df=df[(sp.stats.zscore(df['INCIDENT_TO_ASSIGNMENT_SEC'], nan_policy='omit').abs()<3) &
    (sp.stats.zscore(df['ASSIGNMENT_TO_ACTIVATION_SEC'], nan_policy='omit').abs()<3) &
    (sp.stats.zscore(df['ACTIVATION_TO_ON_SCENE_SEC'], nan_policy='omit').abs()<3)
    ]
    print('exclude outliers: ', df.shape)

    return df

In [8]:
cleaned_df = import_and_clean_EMS_data()
cleaned_df.head()

read_csv:  (1000000, 31)
drop columns:  (1000000, 20)
drop na:  (957731, 20)
remove unknown boroughs:  (957729, 20)
change to datetime:  (957729, 20)
add time columns:  (957729, 24)
filter for >0:  (942017, 24)
exclude outliers:  (913354, 24)


Unnamed: 0,CAD_INCIDENT_ID,INCIDENT_DATETIME,INITIAL_CALL_TYPE,INITIAL_SEVERITY_LEVEL_CODE,FINAL_CALL_TYPE,FINAL_SEVERITY_LEVEL_CODE,FIRST_ASSIGNMENT_DATETIME,VALID_DISPATCH_RSPNS_TIME_INDC,INCIDENT_TO_ASSIGNMENT_SEC,FIRST_ACTIVATION_DATETIME,...,FIRST_HOSP_ARRIVAL_DATETIME,INCIDENT_CLOSE_DATETIME,HELD_INDICATOR,INCIDENT_DISPOSITION_CODE,BOROUGH,INCIDENT_DISPATCH_AREA,ASSIGNMENT_TO_ACTIVATION_SEC,ACTIVATION_TO_ON_SCENE_SEC,INCIDENT_TO_ACTIVATION_SEC,INCIDENT_SEC
0,230010001,2023-01-01 00:00:02,UNKNOW,4,UNKNOW,4,2023-01-01 00:23:32,Y,1410,2023-01-01 00:23:47,...,NaT,2023-01-01 02:11:29,N,90,MANHATTAN,M5,15.0,462.0,1425.0,0
2,230010003,2023-01-01 00:00:30,DIFFFC,2,DIFFFC,2,2023-01-01 00:02:24,Y,114,2023-01-01 00:02:33,...,2023-01-01 00:37:31,2023-01-01 01:27:09,N,82,BRONX,B5,9.0,356.0,123.0,0
4,230010007,2023-01-01 00:01:55,DRUG,4,DRUG,4,2023-01-01 00:23:42,Y,1307,2023-01-01 00:23:55,...,NaT,2023-01-01 01:56:31,Y,93,MANHATTAN,M9,13.0,788.0,1320.0,0
5,230010008,2023-01-01 00:02:05,PD13C,3,PD13C,3,2023-01-01 00:10:17,Y,492,2023-01-01 00:10:25,...,NaT,2023-01-01 00:20:49,Y,91,BROOKLYN,K6,8.0,373.0,500.0,0
7,230010011,2023-01-01 00:03:22,UNC,2,UNC,2,2023-01-01 00:08:20,Y,298,2023-01-01 00:08:32,...,2023-01-01 00:47:46,2023-01-01 01:39:16,N,82,MANHATTAN,M1,12.0,785.0,310.0,0


In [9]:
def create_EMS_line_chart(df=cleaned_df, line_col='BOROUGH'):
    
    pivot = pd.pivot_table(df, values=['INCIDENT_SEC','INCIDENT_TO_ASSIGNMENT_SEC', 'INCIDENT_TO_ACTIVATION_SEC','INCIDENT_TO_ON_SCENE_SEC'], 
                                   columns=line_col, aggfunc='mean', sort=False)
    pivot = round(pivot / 60, 2)
    pivot = pivot.rename(index={'INCIDENT_SEC':'Incident is<br>Reported',
                                'INCIDENT_TO_ASSIGNMENT_SEC':'Incident Assigned to<br>Response Unit',
                                'INCIDENT_TO_ACTIVATION_SEC':'Response Unit<br>Enroute',
                                'INCIDENT_TO_ON_SCENE_SEC':'Response Unit<br>Arrives'})
    
    EMS_line_chart=px.line(pivot, color=line_col, title='Cumulative EMS Response Time by<br>{0}'.format(line_col),
                           markers=True)
    
    EMS_line_chart.update_layout(
        xaxis_title='EMS Incident Stage',
        yaxis_title='Time Elapsed in Minutes',
        legend=dict(font=dict(size=9))
    )
    
    return EMS_line_chart

In [10]:
def create_EMS_box_plot_chart(df=cleaned_df, box_x='BOROUGH', box_y='INCIDENT_TO_ON_SCENE_SEC', box_root=3):
    
    df['box_root_trans'] = np.power(df[box_y], 1/box_root)
    
    EMS_box_plot_chart = px.box(df, x=box_x, y='box_root_trans', color=box_x,
                                title='Response Time by<br>{0}'.format(box_x))

    EMS_box_plot_chart.update_layout(
        yaxis_title='1/{0} Power Transformation of<br>{1}'.format(box_root, box_y),
        legend=dict(font=dict(size=9))
    )
    
    return EMS_box_plot_chart

In [11]:
def assign_borough(dispatch_area):
        first=dispatch_area[0]
        if first=='B':
            return 'BRONX'
        elif first=='K':
            return 'BROOKLYN'
        elif first=='M':
            return 'MANHATTAN'
        elif first=='Q':
            return 'QUEENS'
        else:
            return 'RICHMOND / STATEN ISLAND'

In [12]:
def create_dispatch_area_scatter_plot(df=cleaned_df, scatter_y='INCIDENT_TO_ON_SCENE_SEC'):

    da_counts = df['INCIDENT_DISPATCH_AREA'].value_counts()
    counts_df = da_counts.reset_index()
    counts_df.columns=['Dispatch_Area', 'Count']
    counts_df.set_index('Dispatch_Area', inplace=True)

    da_pivot = pd.pivot_table(df, values=['INCIDENT_TO_ASSIGNMENT_SEC','ASSIGNMENT_TO_ACTIVATION_SEC',
                                                'ACTIVATION_TO_ON_SCENE_SEC', 'INCIDENT_TO_ON_SCENE_SEC'],
                                    index='INCIDENT_DISPATCH_AREA', aggfunc='mean', sort=False)

    da_df = pd.concat([counts_df, da_pivot], axis=1)
    da_df = da_df[da_df['Count'] > 10]
    da_df.reset_index(inplace=True, names='Dispatch_Area')

    da_df['Borough'] = da_df['Dispatch_Area'].apply(assign_borough)

    da_scatter_plot = px.scatter(da_df, x='Count', y=scatter_y, color='Borough', hover_data=['Dispatch_Area'],
                                      title='Number of Incidents vs. Mean Response Time<br>for NYC Dispatch Areas')

    da_scatter_plot.update_layout(
        xaxis_title='Number of Incidents',
        yaxis_title='Mean Response Time',
        legend=dict(font=dict(size=9))
    )

    return da_scatter_plot

In [13]:
def create_hist_kde(df=cleaned_df, hist_borough='MANHATTAN', hist_x='INCIDENT_TO_ON_SCENE_SEC', hist_root=3):
    
    group_label = [hist_borough]
    
    hist_df=df[df['BOROUGH']==hist_borough]
    hist_df['hist_root_trans'] = np.power(hist_df[hist_x], 1/hist_root)

    num_bins = 25
    bin_size = (hist_df['hist_root_trans'].max() - hist_df['hist_root_trans'].min()) / num_bins
    
    hist_df = hist_df['hist_root_trans']
    
    hist_kde = ff.create_distplot([hist_df], group_labels=group_label, bin_size=bin_size)

    return hist_kde

In [14]:
test_chart = create_dispatch_area_scatter_plot()
test_chart.show()

In [15]:
def create_static_dashboard(df=cleaned_df, line_col='BOROUGH', box_x='BOROUGH', box_y='INCIDENT_TO_ON_SCENE_SEC', box_root=3,
                            scatter_y='INCIDENT_TO_ON_SCENE_SEC',hist_borough='MANHATTAN', hist_x='INCIDENT_TO_ON_SCENE_SEC', hist_root=3):

    #CREATE EMPTY DASHBOARD
    subplot_titles = ['Cumulative EMS Response Time', 'Response Time by<br>{0}'.format(box_x),
                      'Number of Incidents vs. Mean Response Time<br>for NYC Dispatch Areas',
                      'Histogram of Response Times for<br>{0}'.format(hist_borough)]
    dashboard = make_subplots(rows=2, cols=2, vertical_spacing=0.3, horizontal_spacing=0.3,
                             subplot_titles=subplot_titles)
    dashboard.update_layout(width=1200, height=800, title_text='EMS Response Times<br>in New York City', title_x=0.4)

    #LINE CHART
    pivot = pd.pivot_table(df, values=['INCIDENT_SEC','INCIDENT_TO_ASSIGNMENT_SEC', 'INCIDENT_TO_ACTIVATION_SEC','INCIDENT_TO_ON_SCENE_SEC'], 
                                   columns=line_col, aggfunc='mean', sort=False)
    pivot = round(pivot / 60, 2)
    pivot = pivot.rename(index={'INCIDENT_SEC':'Incident is<br>Reported',
                                'INCIDENT_TO_ASSIGNMENT_SEC':'Incident Assigned to<br>Response Unit',
                                'INCIDENT_TO_ACTIVATION_SEC':'Response Unit<br>Enroute',
                                'INCIDENT_TO_ON_SCENE_SEC':'Response Unit<br>Arrives'})
    
    EMS_line_chart=px.line(pivot, color=line_col, title='Cumulative EMS Response Time',
                           markers=True)
    
    dashboard.update_xaxes(title_text="EMS Incident Stage", row=1, col=1)
    dashboard.update_yaxes(title_text="Time Elapsed in Minutes", row=1, col=1)


    #BOX PLOT CHART
    df['box_root_trans'] = np.power(df[box_y], 1/box_root)
    
    EMS_box_plot_chart = px.box(df, x=box_x, y='box_root_trans', color=box_x)

    EMS_box_plot_chart.update_layout(
        legend=dict(font=dict(size=9))
    )

    dashboard.update_xaxes(title_text="{0}".format(box_x), row=1, col=2)
    dashboard.update_yaxes(title_text='1/{0} Power Transformation of<br>{1}'.format(box_root, box_y), row=1, col=2)

    
    #SCATTER PLOT
    da_counts = df['INCIDENT_DISPATCH_AREA'].value_counts()
    counts_df = da_counts.reset_index()
    counts_df.columns=['Dispatch_Area', 'Count']
    counts_df.set_index('Dispatch_Area', inplace=True)

    da_pivot = pd.pivot_table(df, values=['INCIDENT_TO_ASSIGNMENT_SEC','ASSIGNMENT_TO_ACTIVATION_SEC',
                                                'ACTIVATION_TO_ON_SCENE_SEC', 'INCIDENT_TO_ON_SCENE_SEC'],
                                    index='INCIDENT_DISPATCH_AREA', aggfunc='mean', sort=False)

    da_df = pd.concat([counts_df, da_pivot], axis=1)
    da_df = da_df[da_df['Count'] > 10]
    da_df.reset_index(inplace=True, names='Dispatch_Area')
    da_df['Borough'] = da_df['Dispatch_Area'].apply(assign_borough)

    da_scatter_plot = px.scatter(da_df, x='Count', y=scatter_y, color='Borough', hover_data=['Dispatch_Area'])

    dashboard.update_xaxes(title_text='Number of Incidents', row=2, col=1)
    dashboard.update_yaxes(title_text='Mean Response Time of<br>{0}'.format(scatter_y), row=2, col=1)


    #HISTOGRAM
    group_label = [hist_borough]
    
    hist_df=df[df['BOROUGH']==hist_borough]
    hist_df['hist_root_trans'] = np.power(hist_df[hist_x], 1/hist_root)

    num_bins = 25
    bin_size = (hist_df['hist_root_trans'].max() - hist_df['hist_root_trans'].min()) / num_bins
    
    hist_df = hist_df['hist_root_trans']
    
    hist_kde = ff.create_distplot([hist_df], group_labels=group_label, bin_size=bin_size)

    dashboard.update_xaxes(title_text='1/{0} Power Transformation of<br>{1}'.format(hist_root, hist_x), row=2, col=2)
    dashboard.update_yaxes(title_text='Frequency', row=2, col=2)


    #ADD CHARTS TO DASHBOARD
    for trace in EMS_line_chart.data:
        trace.showlegend=False
        dashboard.add_trace(trace, row=1, col=1)

    for trace in EMS_box_plot_chart.data:
        dashboard.add_trace(trace, row=1, col=2)

    for trace in da_scatter_plot.data:
        trace.showlegend=False
        dashboard.add_trace(trace, row=2, col=1)

    for trace in hist_kde.data:
        trace.showlegend=False
        dashboard.add_trace(trace, row=2, col=2)

    return dashboard

create_static_dashboard()

In [16]:
def create_interactive_dashboard():

    categorical_cols = ['BOROUGH', 'INITIAL_SEVERITY_LEVEL_CODE']
    response_time_cols = ['INCIDENT_TO_ASSIGNMENT_SEC','ASSIGNMENT_TO_ACTIVATION_SEC',
                          'ACTIVATION_TO_ON_SCENE_SEC', 'INCIDENT_TO_ON_SCENE_SEC']
    boroughs=cleaned_df['BOROUGH'].unique().tolist()
    
    line_dropdown = Dropdown(
        options=categorical_cols, 
        value='BOROUGH', 
        description='Line Chart:',
        style={'description_width':'initial'})

    box_x_dropdown = Dropdown(
        options=categorical_cols, 
        value='BOROUGH', 
        description='Box Chart x:',
        style={'description_width':'initial'})

    box_y_dropdown = Dropdown(
        options=response_time_cols, 
        value='INCIDENT_TO_ON_SCENE_SEC', 
        description='Box Chart y:',
        style={'description_width':'initial'})

    box_root_dropdown = Dropdown(
        options=[1,2,3,4],
        value=3, 
        description='Box Plot Root Transformation, 1/_',
        style={'description_width':'initial'})

    scatter_y_dropdown = Dropdown(
        options=response_time_cols, 
        value='INCIDENT_TO_ON_SCENE_SEC', 
        description='Scatter Plot y:',
        style={'description_width':'initial'})

    hist_borough_dropdown = Dropdown(
        options=boroughs, 
        value='MANHATTAN', 
        description='Histogram Borough:',
        style={'description_width':'initial'})

    hist_x_dropdown = Dropdown(
        options=response_time_cols, 
        value='INCIDENT_TO_ON_SCENE_SEC', 
        description='Histogram x:',
        style={'description_width':'initial'})

    hist_root_dropdown = Dropdown(
        options=[1,2,3,4],
        value=3, 
        description='Histogram Root Transformation, 1/_',
        style={'description_width':'initial'})

    def update(line_col, box_x, box_y, box_root, scatter_y, hist_borough, hist_x, hist_root):
        
        clear_output(wait=True)
        
        dashboard = create_static_dashboard(
            cleaned_df,
            line_col=line_col,
            box_x=box_x,
            box_y=box_y,
            box_root=box_root,
            scatter_y=scatter_y,
            hist_borough=hist_borough, 
            hist_x=hist_x,
            hist_root=hist_root
        )
        dashboard.show()

    interact(
        update,                    
        line_col=line_dropdown,
        box_x=box_x_dropdown,
        box_y=box_y_dropdown,
        box_root=box_root_dropdown,
        scatter_y=scatter_y_dropdown,
        hist_borough=hist_borough_dropdown, 
        hist_x=hist_x_dropdown, 
        hist_root=hist_root_dropdown
    )

In [17]:
create_interactive_dashboard()

interactive(children=(Dropdown(description='Line Chart:', options=('BOROUGH', 'INITIAL_SEVERITY_LEVEL_CODE'), …

In [18]:
#!pip freeze > requirements.txt