# EMS Data Analysis

In [4]:
import os.path
import pandas as pd
from bokeh.core.properties import value
from bokeh.io import show, output_file, output_notebook
from bokeh.plotting import figure
from bokeh.io import export_png

output_notebook()

def plot_incidents(data, name):
    year_names = [str(x) for x in data.index.values]
    years =  year_names
    y_axis = ["geom", "no-geo", "bi"]
    colors = ["#5CC9F5", "#e84d60", "#cccccc"]
    
    # Count number without geometry
    data['no-geo'] = data['count'] - data['geom']
    data['bi'] = data['count'] - data['bi_count']


    parsed = {
        'years': years,
        'geom': data['geom'],
        'no-geo': data['no-geo'],
        'bi': data['bi']
    }

    p = figure(x_range=years, plot_height=250, title=name,
               toolbar_location=None)

    p.vbar_stack(y_axis, x='years', width=0.9, color=colors, source=parsed,
                 legend=[value(x) for x in y_axis])

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xgrid.grid_line_color = None
    p.axis.minor_tick_line_color = None
    p.outline_line_color = None
    p.legend.location = "top_left"
    p.legend.orientation = "horizontal"

    show(p)
#     export_png(p, filename="figures/%s.png" % (name,))

def plot_state_incidents(name):
    data = states[name]
    plot_incidents(data, name)
    


## Incidents by year

# SQL Queries

`
select count(1), sum(case when exp_no = 0 then 1 else 0 end), extract(year from inc_date) as year 
from ems.ems group by year;
`

`
SELECT count(1), extract(year from alarm) as year
FROM ems.basicincident a
WHERE a.state = 'DC'
GROUP by year
`

## Incidents by state

In [6]:
# TOTALS
state_codes = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 
         'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 
         'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
         'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 
         'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY',
         'DC']

states = {} # Stores the updated query data
basicincident = {} # Basic incident totals by year
# Load all states in the `codes` list
for code in state_codes:
    filename = 'data/updated/%s.csv' % (code,)
    basicincident_filename = 'data/basicincident/%s.csv' % (code,)
    if os.path.isfile(filename):
        # Load into a dict with the state as key
        states[code] = pd.read_csv(filename)
        basicincident = pd.read_csv(basicincident_filename)
        
        # Merge
        states[code]['bi_count'] = basicincident['count']
        # Set the year as index
        states[code].set_index('year', inplace=True)

    else:
        print("Did not load: %s" % (code,))
        
for code in state_codes:
    plot_state_incidents(code)

In [3]:
total = 0
with_geom = 0
no_geom = 0
bi_count = 0

# Calcultate joined entrie totals
for state in states.values():
    total += sum(state['count'])
    with_geom += sum(state['geom'])
    no_geom += sum(state['no-geo'])
    bi_count += sum(state['bi_count'])
    
    
print("Total %s" % (total,))
print("With %s" % (with_geom,))
print("Without %s" % (no_geom,))
print("Ratio: %s" % (with_geom / total,))  
print("BasicIncidents: %s" % (bi_count))
print("Ratio of entries Updated: %s" % (bi_count/total))

# Chart
x_axis = ["Entries",]
y_axis = ["geom", "no-geo", "bi"]
colors = ["#5CC9F5", "#e84d60", "#cccccc"]
parsed = { 'total': x_axis,
           'geom': [with_geom],
           'no-geo': [no_geom],
           'bi': [(total-bi_count)]}

p = figure(x_range=x_axis, plot_height=250, title="Total Entries",
           toolbar_location=None)

p.vbar_stack(y_axis, x='total', width=0.9, color=colors, source=parsed,
             legend=[value(x) for x in y_axis])

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

show(p)



Total 117383071
With 75929729
Without 41453342
Ratio: 0.6468541703087662
BasicIncidents: 117333294
Ratio of entries Updated: 0.9995759439621409


In [7]:
# Calculate other table totals
total_ems = 0
total_basic = 0
total_address = 0

ems = pd.read_csv('data/ems_totals_by_state.csv')
total_ems = sum(ems['count'])
print("Total EMS entries: %s" % (total_ems,))

basic = pd.read_csv('data/basicincident_totals_by_state.csv')
total_basic = sum(basic['count'])
print("Total basicaddress entries: %s" % (total_basic,))

address = pd.read_csv('data/incidentaddress_total_by_state.csv')
total_address = sum(address['count'])
print("Total incidentaddress entries: %s" % (total_address,))

Total EMS entries: 8131224
Total basicaddress entries: 117353939
Total incidentaddress entries: 90015396


In [8]:
# for state in states:
#     for year in state.years:
allStates = pd.DataFrame()
joined = pd.DataFrame()

for state in states.values():
    allStates = allStates.add(state, fill_value=0)
        
allStates.head()

joined = pd.concat(states)
joined.drop(columns=['bi', 'bi_count'])
    
# plot_state_incidents(allStates)


Unnamed: 0_level_0,Unnamed: 1_level_0,count,geom,no-geo
Unnamed: 0_level_1,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AK,2012,58644,58597,47
AK,2013,61502,61501,1
AK,2014,60638,0,60638
AK,2015,64571,64571,0
AK,2016,67415,0,67415
AL,2012,249674,0,249674
AL,2013,209036,0,209036
AL,2014,260470,0,260470
AL,2015,322581,322579,2
AL,2016,344077,0,344077


In [9]:
# joined.sum()
allStates.loc[2012]['no-geo']

7307606.0

In [10]:
from math import pi

import pandas as pd

from bokeh.io import output_file, show
from bokeh.palettes import Colorblind
from bokeh.plotting import figure
from bokeh.transform import cumsum

geom = allStates.loc[2012]['geom']
non = allStates.loc[2012]['no-geo']


x = {
    '2012': allStates.loc[2012]['no-geo'],
    '2013': allStates.loc[2013]['no-geo'],
    '2014': allStates.loc[2014]['no-geo'],
    '2015': allStates.loc[2015]['no-geo'],
    '2016': allStates.loc[2016]['no-geo'],
}

data = pd.Series(x).reset_index(name='value').rename(columns={'index':'country'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Colorblind[len(x)]

p = figure(plot_height=350, title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@country: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='country', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
show(p)
    

In [11]:
from math import pi

import pandas as pd

from bokeh.io import output_file, show
from bokeh.palettes import Colorblind
from bokeh.plotting import figure
from bokeh.transform import cumsum

geom = allStates.loc[2012]['geom']
non = allStates.loc[2012]['no-geo']

x = {
    'Geom': geom,
    'No Geom': non,
    'Not updated': 0
}

data = pd.Series(x).reset_index(name='value').rename(columns={'index':'country'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Colorblind[len(x)]

p = figure(plot_height=350, title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@country: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='country', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
show(p)

In [12]:
joined.sum()

count       117383071
geom         75929729
bi_count    117333294
no-geo       41453342
bi              49777
dtype: int64

In [13]:
percentage_without_geom = joined.sum()['no-geo'] * 100 / joined.sum()['count']
print("without geom %%%s" % (percentage_without_geom,))

without geom %35.31458296912338


In [14]:
plot_incidents(allStates, "Totals")

In [15]:
# Calculate other table totals
total_ems = 0
total_basic = 0
total_address = 0

ems = pd.read_csv('data/ems_totals_by_state.csv')
total_ems = sum(ems['count'])
print("Total EMS entries: %s" % (total_ems,))

basic = pd.read_csv('data/basicincident_totals_by_state.csv')
total_basic = sum(basic['count'])
print("Total basicaddress entries: %s" % (total_basic,))

address = pd.read_csv('data/incidentaddress_total_by_state.csv')
total_address = sum(address['count'])
print("Total incidentaddress entries: %s" % (total_address,))

Total EMS entries: 8131224
Total basicaddress entries: 117353939
Total incidentaddress entries: 90015396
