### NYC interactive map with total number of fire incidents and top 4 incident type occuring in NYC neighborhood.

In [652]:
import pandas as pd
import numpy as np
import os
import time

start_time = time.time()

path = os.getcwd()
filename = 'Incidents_Responded_to_by_Fire_Companies.csv'
filename_full =  os.path.join(path,filename)

In [653]:
df_zip = pd.read_csv('Zip_lat_long.csv', index_col=0)
tmp_df =  pd.read_csv(filename_full, index_col=0, chunksize=10000, usecols=['IM_INCIDENT_KEY',
                                                                    'ZIP_CODE',
                                                                    'INCIDENT_TYPE_DESC'],
                  dtype={'IM_INCIDENT_KEY': 'Int64',
                        'ZIP_CODE': 'category',
                        'INCIDENT_TYPE_DESC': 'category'})

df = pd.concat(tmp_df)

df.ZIP_CODE.replace('11209-0000', '11209', inplace=True) # cleaning the ZIP_CODE incorrect data format
df.ZIP_CODE.replace('11005.0', '11005', inplace=True) # cleaning the ZIP_CODE incorrect data format
df.head()

Unnamed: 0_level_0,INCIDENT_TYPE_DESC,ZIP_CODE
IM_INCIDENT_KEY,Unnamed: 1_level_1,Unnamed: 2_level_1
55672688,"300 - Rescue, EMS incident, other",10454
55672692,735A - Unwarranted alarm/defective condition o...,10036
55672693,"300 - Rescue, EMS incident, other",11418
55672695,412 - Gas leak (natural gas or LPG),11103
55672697,735A - Unwarranted alarm/defective condition o...,11385


In [654]:
dd = df.groupby('ZIP_CODE').count()
made_new_df = pd.DataFrame(dd)
made_new_df.columns = ['Total_incident_number']
made_new_df.head()

Unnamed: 0_level_0,Total_incident_number
ZIP_CODE,Unnamed: 1_level_1
10001,18517
10002,31242
10003,21994
10004,3700
10005,2794


In [655]:
dd = df.groupby('ZIP_CODE')['INCIDENT_TYPE_DESC'].value_counts()
made_new_df_multi = pd.DataFrame(dd)
made_new_df_multi.index.names

FrozenList(['ZIP_CODE', 'INCIDENT_TYPE_DESC'])

In [656]:
made_new_df_multi[1:20]

Unnamed: 0_level_0,Unnamed: 1_level_0,INCIDENT_TYPE_DESC
ZIP_CODE,INCIDENT_TYPE_DESC,Unnamed: 2_level_1
10001,735A - Unwarranted alarm/defective condition of alarm system,2050
10001,353 - Removal of victim(s) from stalled elevator,1421
10001,"651 - Smoke scare, odor of smoke",1109
10001,"710 - Malicious, mischievous false call, other",804
10001,740A - Unnecessary alarm/construction activities,664
10001,"555 - Defective elevator, no occupants",547
10001,522 - Water or steam leak,501
10001,412 - Gas leak (natural gas or LPG),409
10001,"151 - Outside rubbish, trash or waste fire",393
10001,735B - Unnecessary alarm/alarm system testing or servicing,324


In [657]:
listaa= []
for zip_code, new_sub_df in made_new_df_multi.groupby(level=0):
    a = [''.join(col[0][1]).strip() for col in new_sub_df['INCIDENT_TYPE_DESC'][:4].iteritems()]
    listaa.append([zip_code, a])

df_top_4_discription = pd.DataFrame(listaa, columns=['ZIP_CODE', 'Discription_top_4'])  #TODO:dtype={'ZIP_CODE': 'Int64', 'Discription_top_4': 'np.object_'}
df_top_4_discription.set_index('ZIP_CODE', inplace=True)
df_top_4_discription.index = df_top_4_discription.index.astype('Int64')

In [658]:
df_top_4_discription.head()

Unnamed: 0_level_0,Discription_top_4
ZIP_CODE,Unnamed: 1_level_1
10001,"[300 - Rescue, EMS incident, other, 735A - Unw..."
10002,"[300 - Rescue, EMS incident, other, 353 - Remo..."
10003,"[300 - Rescue, EMS incident, other, 735A - Unw..."
10004,"[300 - Rescue, EMS incident, other, 735A - Unw..."
10005,"[300 - Rescue, EMS incident, other, 735A - Unw..."


In [659]:
df_zip.index = df_zip.index.astype('Int64')
made_new_df.index = made_new_df.index.astype('Int64')
new_df = made_new_df.merge(df_zip, how='left', left_on=made_new_df.index, 
         right_on=df_zip.index, left_index=True,
         right_index=False)

new_df.drop(columns=['key_0'], inplace=True)
new_df.dropna()
new_df.head()

Unnamed: 0_level_0,Total_incident_number,LAT,LNG
ZIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10001,18517,40.750633,-73.997177
10002,31242,40.715775,-73.986212
10003,21994,40.731829,-73.989181
10004,3700,40.68863,-74.018244
10005,2794,40.706027,-74.008835


In [660]:
final_df = new_df.merge(df_top_4_discription, how='left', on=new_df.index, left_index=True)
final_df.drop(columns=['key_0'], inplace=True)
final_df.dropna(inplace=True)
final_df.head()

Unnamed: 0_level_0,Total_incident_number,LAT,LNG,Discription_top_4
ZIP_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10001,18517,40.750633,-73.997177,"[300 - Rescue, EMS incident, other, 735A - Unw..."
10002,31242,40.715775,-73.986212,"[300 - Rescue, EMS incident, other, 353 - Remo..."
10003,21994,40.731829,-73.989181,"[300 - Rescue, EMS incident, other, 735A - Unw..."
10004,3700,40.68863,-74.018244,"[300 - Rescue, EMS incident, other, 735A - Unw..."
10005,2794,40.706027,-74.008835,"[300 - Rescue, EMS incident, other, 735A - Unw..."


In [661]:
import folium
import pandas as pd
 
NYC_COORDINATE = (40.71, -73.90)
 
# for speed purposes
MAX_RECORDS = 260
  
map_1 = folium.Map(location=NYC_COORDINATE, zoom_start=11)
 
# add a marker for every record in the filtered data, use a clustered view
for each in final_df[0:MAX_RECORDS].iterrows():
    folium.CircleMarker(
        popup= '<br>'.join(line for line in each[1]['Discription_top_4']),
        radius=each[1]['Total_incident_number']/1500, # scale total_number of incidents by 1500
        location = [each[1]['LAT'],each[1]['LNG']],
        color='red',
        fill=True, 
        fill_color='red',
        fill_opacity=0.7
        ).add_to(map_1)
  
display(map_1)
map_1.save('map_1.html')