<a href="https://colab.research.google.com/github/JimenaBaripatti/FeatureEngineering/blob/main/Incident_Hydrants_distance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas==1.3.4

Collecting pandas==1.3.4
  Downloading pandas-1.3.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)
[K     |████████████████████████████████| 11.3 MB 8.7 MB/s 
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 1.1.5
    Uninstalling pandas-1.1.5:
      Successfully uninstalled pandas-1.1.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas~=1.1.0; python_version >= "3.0", but you have pandas 1.3.4 which is incompatible.[0m
Successfully installed pandas-1.3.4


In [1]:
# setting up libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

%matplotlib inline

  if sys.path[0] == '':


# Incident - Hydrant distance calculation

Fire hydrants can typically supply a large volume of water. This water is pumped through hoses to the fire truck, where it is pressurized and divided into several streams to supply water to multiple fire hoses at once.

<b>Firehoses</b>

Supply and relay hoses

Supply and relay hoses are large-diameter, fabric-covered, flexible hoses used to bring water from a distant hydrant to the fire pumper, or to relay water from one pumper to another over a long distance. These hoses range in nominal inside diameter from 3.5 to 5.0 in (89 to 127 mm). They are designed to operate at pressures up to about 300 psi (2,070 kPa) for the smaller diameters and up to 200 psi (1,380 kPa) for the larger diameters. The standard length is 100 ft (30.48 m).

Attack

The standard length is 50 ft (15.24 m). Supply and relay hoses are large-diameter, fabric-covered, flexible hoses used to bring water from a distant hydrant to the fire pumper, or to relay water from one pumper to another over a long distance.

https://en.wikipedia.org/wiki/Fire_hose#Types


In [2]:
#load preprocessed datasets from main repository
df = pd.read_csv('https://raw.githubusercontent.com/JimenaBaripatti/FeatureEngineering/main/data/current_dataset/fire_incident_station_weather_demo_combined.csv')
df_hyd = pd.read_csv('https://raw.githubusercontent.com/JimenaBaripatti/FeatureEngineering/main/data/fire_hydrants_clean.csv')

In [None]:
#shape of cartesian product = 735M rows
df.shape[0] * df_hyd.shape[0]

734811008

## Test functions

In [3]:
#test functions

df_test= df[['Incident_Numberdemo', 'Latitude', 'Longitude']][:2].copy()
df_hyd_test = df_hyd[['h_latitude' ,'h_longitude']].copy()

In [None]:
#get cartesian product
df_cross_test = df_test.merge(df_hyd_test, how='cross')

print(df_cross_test.shape)
df_cross_test[:1]

(83806, 5)


Unnamed: 0,Incident_Numberdemo,Latitude,Longitude,h_latitude,h_longitude
0,F18020956,43.686558,-79.599419,43.71809,-79.515645


In [None]:
# Distance Feature - Incident to hydrant

def haversine_distance(row):
    lat_p, lon_p = row['Latitude'], row['Longitude']
    lat_d, lon_d = row['h_latitude'], row['h_longitude']
    radius = 6371000 # m

# convert decimal degrees to radians 
    dlat = np.radians(lat_d - lat_p)
    dlon = np.radians(lon_d - lon_p)
    a = np.sin(dlat/2) * np.sin(dlat/2) + np.cos(np.radians(lat_p)) * np.cos(np.radians(lat_d)) * np.sin(dlon/2) * np.sin(dlon/2)
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    distance = radius * c

    return distance

df_cross_test['distance_m'] = df_cross_test.apply(haversine_distance, axis = 1)
df_cross_test.head()

Unnamed: 0,Incident_Numberdemo,Latitude,Longitude,h_latitude,h_longitude,distance_m
0,F18020956,43.686558,-79.599419,43.71809,-79.515645,7592.440514
1,F18020956,43.686558,-79.599419,43.736525,-79.318938,23218.214594
2,F18020956,43.686558,-79.599419,43.703694,-79.412081,15181.420595
3,F18020956,43.686558,-79.599419,43.792068,-79.418892,18654.333564
4,F18020956,43.686558,-79.599419,43.652879,-79.342184,21025.839954


In [None]:
#option 1 = get min distance
df_hyd_min_dist = df_cross_test[['Incident_Numberdemo', 'distance_m']].groupby(by=['Incident_Numberdemo']).agg(hyd_min_dist_m = ('distance_m', 'min'))
df_hyd_min_dist.head()


Unnamed: 0_level_0,hyd_min_dist_m
Incident_Numberdemo,Unnamed: 1_level_1
F18020956,23.474074
F18020969,57.051513


In [None]:
#option 2 = get min distance and number of hydrants in 50 m radius (related to hose lengths)

df_cross_test_100m = df_cross_test[df_cross_test['distance_m'] <= 100] #warning = Incident lost if distance > 100 for all hydrants
df_hyd_variables = df_cross_test_100m[['Incident_Numberdemo', 'distance_m']].groupby(by=['Incident_Numberdemo']).agg(hyd_min_dist_m = ('distance_m', 'min'), hyd_count_r100m = ('distance_m', 'count'))
df_hyd_variables.head()


Unnamed: 0_level_0,hyd_min_dist_m,hyd_count_r100m
Incident_Numberdemo,Unnamed: 1_level_1,Unnamed: 2_level_1
F18020956,23.474074,4
F18020969,57.051513,3


In [None]:
#add new features
df.join(df_hyd_variables, on='Incident_Numberdemo', how='inner')

Unnamed: 0.1,Unnamed: 0,_id_x,Area_of_Origin,Building_Status,Business_Impact,Civilian_Casualties,Count_of_Persons_Rescued,Estimated_Dollar_Loss,Estimated_Number_Of_Persons_Displaced,Exposures,Ext_agent_app_or_defer_time,Extent_Of_Fire,Final_Incident_Type,Fire_Alarm_System_Impact_on_Evacuation,Fire_Alarm_System_Operation,Fire_Alarm_System_Presence,Fire_Under_Control_Time,Ignition_Source,Incident_Numberinc_,Incident_Station_Area,Incident_Ward,Initial_CAD_Event_Type,Intersection,Last_TFS_Unit_Clear_Time,Level_Of_Origin,Material_First_Ignited,Method_Of_Fire_Control,Number_of_responding_apparatus,Number_of_responding_personnel,Possible_Cause,Property_Use,Smoke_Alarm_at_Fire_Origin,Smoke_Alarm_at_Fire_Origin_Alarm_Failure,Smoke_Alarm_at_Fire_Origin_Alarm_Type,Smoke_Alarm_Impact_on_Persons_Evacuating_Impact_on_Evacuation,Smoke_Spread,Sprinkler_System_Operation,Sprinkler_System_Presence,Status_of_Fire_On_Arrival,TFS_Alarm_Timeinc_,TFS_Arrival_Time,TFS_Firefighter_Casualties,_id_y,ID,NAME,ADDRESS,ADDRESS_POINT_ID,ADDRESS_ID,CENTRELINE_ID,MAINT_STAGE,ADDRESS_NUMBER,LINEAR_NAME_FULL,POSTAL_CODE,GENERAL_USE,CLASS_FAMILY_DESC,ADDRESS_ID_LINK,PLACE_NAME,X,Y,LATITUDE,LONGITUDE,WARD_NAME,MUNICIPALITY_NAME,OBJECTID,geometry,fs_Longitude,fs_Latitude,Station_Area,LABEL,DISTANCE_INCIDENT_FIRESTATION,INCIDENT_DATE,DOW,IS_WEEKEND,YEAR,MONTH,MINUTES_TO_ARRIVE,TFS_ARR_DAY,TFS_ALM_DAY,TFS_ARR_HOUR,TFS_ALM_HOUR,MINUTES_TO_LEAVE,INCIDENT_PERIOD_NUM,INCIDENT_PERIOD_CAT,IS_HOLIDAY,IS_HOLIDAY_LAG1,IS_HOLIDAY_LEAD1,IS_HOLIDAY_SEASON,date,rain_v,snow_v,snow_on_ground_v,temperature_avg,relative_humidity_avg,wind_speed_avg,Incident_Numberdemo,TFS_Alarm_Timedemo,Latitude,Longitude,location,FSA,fsa_imputed,density,poor_building_condition,condo_dwelling,Average_Age,Median_Age,male,hyd_min_dist_m,hyd_count_r100m
0,0,1946929,81 - Engine Area,,,0,0,15000.0,,,2018-02-25 02:12:00,,01 - Fire,,,,2018-02-25 02:15:40,999 - Undetermined,F18020956,441,1.0,Vehicle Fire,Dixon Rd / 427 N Dixon Ramp,2018-02-25 02:38:31,,47 - Vehicle,1 - Extinguished by fire department,1,4,99 - Undetermined,"896 - Sidewalk, street, roadway, highway, hwy (do not use for fire incidents)",,,,,,,,"7 - Fully involved (total structure, vehicle, spreading outdoor fire)",2018-02-25 02:04:29,2018-02-25 02:10:11,0,17,80,FIRE STATION 441,947 MARTIN GROVE RD,6044964,22707,6044958,REGULAR,947,Martin Grove Rd,,Fire Station,"Land, Structure, Structure Entrance",,Fire Station 441,,,,,Etobicoke North (1),Etobicoke,1959089,"{u'type': u'Point', u'coordinates': (-79.5719196489, 43.6949604664)}",-79.57192,43.69496,441,0,13705.717515,2018-02-25 00:00:00,6,1,2018,2,5.7,25,25,2,2,28.333,1,Late Night,0,0,0,0,2018-02-25,0.96,0.77,5.1,-3.415,71.47,18.39,F18020956,2018-02-25 02:04:29,43.686558,-79.599419,M9W 5N4,M9W,0,1411.27,0.066034,0.175848,39.6,38.3,0.490844,23.474074,4
1,1,1946930,"75 - Trash, rubbish area (outside)",,,0,0,50.0,,,2018-02-25 02:29:42,,01 - Fire,,,,2018-02-25 02:32:24,999 - Undetermined,F18020969,116,18.0,Fire - Grass/Rubbish,Sheppard Ave E / Clairtrell Rd,2018-02-25 02:35:58,,97 - Other,1 - Extinguished by fire department,1,4,03 - Suspected Vandalism,"896 - Sidewalk, street, roadway, highway, hwy (do not use for fire incidents)",,,,,,,,2 - Fire with no evidence from street,2018-02-25 02:24:43,2018-02-25 02:29:31,0,46,6,FIRE STATION 116,255 ESTHER SHINER BLVD,8731578,1464220,30005958,REGULAR,255,Esther Shiner Blvd,,Fire Station,"Structure, Structure Entrance",484876.0,Fire Station 116,,,,,Don Valley North (17),North York,2607109,"{u'type': u'Point', u'coordinates': (-79.3650636555, 43.7691459436)}",-79.365064,43.769146,116,0,13691.564674,2018-02-25 00:00:00,6,1,2018,2,4.8,25,25,2,2,6.45,1,Late Night,0,0,0,0,2018-02-25,0.96,0.77,5.1,-3.415,71.47,18.39,F18020969,2018-02-25 02:24:43,43.766135,-79.390039,M2N 3B1,M2N,0,7581.15,0.036021,0.682775,39.9,37.0,0.469236,57.051513,3


## Improve performance

**Ideas**

⬛ Find a latitude and longitude difference between Incident and Hydrant threshold to reduce size of merge cross. For example if difference is greater than xxx, the distance between points is greater than 1km.

⬛Divide the city in cuadrants and do a cross merge on those quadrants

⬛ Find a faster distance functions. Maybe something less accurate to do a first calculation.


In [3]:
#df_test= df[['Incident_Numberdemo', 'Latitude', 'Longitude']][:1].copy()
hyd = df_hyd[['h_latitude' ,'h_longitude']].to_numpy()

In [4]:
#better performing function

from math import radians, cos, sin, asin, sqrt

def haversine2(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in meters is 6371000
    m = 6371000 * c
    return m

In [5]:
def get_min_distance(df_test, hyd):
  """
  build a dict with incident as key and min distance as value
  iterate over the dataframe and the hyd array
  """

  min_dist = {}

  for index, row in df_test.iterrows():
    
    distances = []
    incident = row['Incident_Numberdemo']
    
    lat1, lon1 = row['Latitude'], row['Longitude']
    
    for c in hyd:

      lat2, lon2 = c
      distances.append(haversine2(lon1, lat1, lon2, lat2))

    min_dist.update({incident: min(distances)})

  return min_dist

result = get_min_distance(df, hyd)  

print(len(result))

17536


In [17]:
df_min_dist = pd.DataFrame.from_dict(result, orient="index", columns=['min_dist_to_hydrant']) 

In [18]:
df_min_dist.head()

Unnamed: 0,min_dist_to_hydrant
F18020956,23.474074
F18020969,57.051513
F18021182,17.296318
F18021192,30.40174
F18021271,7.317947


In [19]:
df_final = df.join(df_min_dist, on='Incident_Numberdemo', how='left')

In [20]:
df_final.head()

Unnamed: 0.1,Unnamed: 0,_id_x,Area_of_Origin,Building_Status,Business_Impact,Civilian_Casualties,Count_of_Persons_Rescued,Estimated_Dollar_Loss,Estimated_Number_Of_Persons_Displaced,Exposures,Ext_agent_app_or_defer_time,Extent_Of_Fire,Final_Incident_Type,Fire_Alarm_System_Impact_on_Evacuation,Fire_Alarm_System_Operation,Fire_Alarm_System_Presence,Fire_Under_Control_Time,Ignition_Source,Incident_Numberinc_,Incident_Station_Area,Incident_Ward,Initial_CAD_Event_Type,Intersection,Last_TFS_Unit_Clear_Time,Level_Of_Origin,Material_First_Ignited,Method_Of_Fire_Control,Number_of_responding_apparatus,Number_of_responding_personnel,Possible_Cause,Property_Use,Smoke_Alarm_at_Fire_Origin,Smoke_Alarm_at_Fire_Origin_Alarm_Failure,Smoke_Alarm_at_Fire_Origin_Alarm_Type,Smoke_Alarm_Impact_on_Persons_Evacuating_Impact_on_Evacuation,Smoke_Spread,Sprinkler_System_Operation,Sprinkler_System_Presence,Status_of_Fire_On_Arrival,TFS_Alarm_Timeinc_,TFS_Arrival_Time,TFS_Firefighter_Casualties,_id_y,ID,NAME,ADDRESS,ADDRESS_POINT_ID,ADDRESS_ID,CENTRELINE_ID,MAINT_STAGE,ADDRESS_NUMBER,LINEAR_NAME_FULL,POSTAL_CODE,GENERAL_USE,CLASS_FAMILY_DESC,ADDRESS_ID_LINK,PLACE_NAME,X,Y,LATITUDE,LONGITUDE,WARD_NAME,MUNICIPALITY_NAME,OBJECTID,geometry,fs_Longitude,fs_Latitude,Station_Area,LABEL,DISTANCE_INCIDENT_FIRESTATION,INCIDENT_DATE,DOW,IS_WEEKEND,YEAR,MONTH,MINUTES_TO_ARRIVE,TFS_ARR_DAY,TFS_ALM_DAY,TFS_ARR_HOUR,TFS_ALM_HOUR,MINUTES_TO_LEAVE,INCIDENT_PERIOD_NUM,INCIDENT_PERIOD_CAT,IS_HOLIDAY,IS_HOLIDAY_LAG1,IS_HOLIDAY_LEAD1,IS_HOLIDAY_SEASON,date,rain_v,snow_v,snow_on_ground_v,temperature_avg,relative_humidity_avg,wind_speed_avg,Incident_Numberdemo,TFS_Alarm_Timedemo,Latitude,Longitude,location,FSA,fsa_imputed,density,poor_building_condition,condo_dwelling,Average_Age,Median_Age,male,min_dist_to_hydrant
0,0,1946929,81 - Engine Area,,,0,0,15000.0,,,2018-02-25 02:12:00,,01 - Fire,,,,2018-02-25 02:15:40,999 - Undetermined,F18020956,441,1.0,Vehicle Fire,Dixon Rd / 427 N Dixon Ramp,2018-02-25 02:38:31,,47 - Vehicle,1 - Extinguished by fire department,1,4,99 - Undetermined,"896 - Sidewalk, street, roadway, highway, hwy (do not use for fire incidents)",,,,,,,,"7 - Fully involved (total structure, vehicle, spreading outdoor fire)",2018-02-25 02:04:29,2018-02-25 02:10:11,0,17,80,FIRE STATION 441,947 MARTIN GROVE RD,6044964,22707,6044958,REGULAR,947,Martin Grove Rd,,Fire Station,"Land, Structure, Structure Entrance",,Fire Station 441,,,,,Etobicoke North (1),Etobicoke,1959089,"{u'type': u'Point', u'coordinates': (-79.5719196489, 43.6949604664)}",-79.57192,43.69496,441,0,13705.717515,2018-02-25 00:00:00,6,1,2018,2,5.7,25,25,2,2,28.333,1,Late Night,0,0,0,0,2018-02-25,0.96,0.77,5.1,-3.415,71.47,18.39,F18020956,2018-02-25 02:04:29,43.686558,-79.599419,M9W 5N4,M9W,0,1411.27,0.066034,0.175848,39.6,38.3,0.490844,23.474074
1,1,1946930,"75 - Trash, rubbish area (outside)",,,0,0,50.0,,,2018-02-25 02:29:42,,01 - Fire,,,,2018-02-25 02:32:24,999 - Undetermined,F18020969,116,18.0,Fire - Grass/Rubbish,Sheppard Ave E / Clairtrell Rd,2018-02-25 02:35:58,,97 - Other,1 - Extinguished by fire department,1,4,03 - Suspected Vandalism,"896 - Sidewalk, street, roadway, highway, hwy (do not use for fire incidents)",,,,,,,,2 - Fire with no evidence from street,2018-02-25 02:24:43,2018-02-25 02:29:31,0,46,6,FIRE STATION 116,255 ESTHER SHINER BLVD,8731578,1464220,30005958,REGULAR,255,Esther Shiner Blvd,,Fire Station,"Structure, Structure Entrance",484876.0,Fire Station 116,,,,,Don Valley North (17),North York,2607109,"{u'type': u'Point', u'coordinates': (-79.3650636555, 43.7691459436)}",-79.365064,43.769146,116,0,13691.564674,2018-02-25 00:00:00,6,1,2018,2,4.8,25,25,2,2,6.45,1,Late Night,0,0,0,0,2018-02-25,0.96,0.77,5.1,-3.415,71.47,18.39,F18020969,2018-02-25 02:24:43,43.766135,-79.390039,M2N 3B1,M2N,0,7581.15,0.036021,0.682775,39.9,37.0,0.469236,57.051513
2,2,1946931,,,,0,0,,,,,,"03 - NO LOSS OUTDOOR fire (exc: Sus.arson,vandal,child playing,recycling or dump fires)",,,,,,F18021182,221,21.0,Fire - Highrise Residential,Danforth Rd / Savarin St,2018-02-25 19:14:03,,,,6,22,,891 - Outdoor general auto parking,,,,,,,,,2018-02-25 18:29:59,2018-02-25 18:36:49,0,25,26,FIRE STATION 221,2575 EGLINTON AVE E,330710,317332,111065,REGULAR,2575,Eglinton Ave E,,Restaurant,"Land, Structure, Structure Entrance",,,,,,,Scarborough Southwest (20),Scarborough,2048861,"{u'type': u'Point', u'coordinates': (-79.2550627035, 43.7347901433)}",-79.255063,43.73479,221,0,13676.786172,2018-02-25 00:00:00,6,1,2018,2,6.833,25,25,18,18,37.233,5,Evening,0,0,0,0,2018-02-25,0.96,0.77,5.1,-3.415,71.47,18.39,F18021182,2018-02-25 18:29:59,43.74323,-79.245061,M1J 2E1,M1J,0,5700.21,0.114064,0.191333,38.1,37.2,0.476158,17.296318
3,3,1946932,"75 - Trash, rubbish area (outside)",01 - Normal (no change),1 - No business interruption,0,0,0.0,0.0,,2018-02-25 19:19:25,1 - Confined to object of origin,01 - Fire,9 - Undetermined,8 - Not applicable (no system),9 - Undetermined,2018-02-25 19:20:00,999 - Undetermined,F18021192,133,5.0,Fire - Commercial/Industrial,Keele St / Lawrence Ave W,2018-02-25 20:07:42,999.0,99 - Undetermined (formerly 98),1 - Extinguished by fire department,6,22,99 - Undetermined,511 - Department Store,9 - Floor/suite of fire origin: Smoke alarm presence undetermined,98 - Not applicable: Alarm operated OR presence/operation undetermined,9 - Type undetermined,"8 - Not applicable: No alarm, no persons present",99 - Undetermined,8 - Not applicable - no sprinkler system present,9 - Undetermined,"3 - Fire with smoke showing only - including vehicle, outdoor fires",2018-02-25 19:13:39,2018-02-25 19:18:07,0,52,13,FIRE STATION 133,1505 LAWRENCE AVE W,20035267,1463915,9796670,REGULAR,1505,Lawrence Ave W,,Fire Station,"Structure, Structure Entrance",86391.0,Fire Station 133,,,,,York South-Weston (5),North York,2829802,"{u'type': u'Point', u'coordinates': (-79.4821848791, 43.707184857)}",-79.482185,43.707185,133,0,13698.196885,2018-02-25 00:00:00,6,1,2018,2,4.467,25,25,19,19,49.583,5,Evening,0,0,0,0,2018-02-25,0.96,0.77,5.1,-3.415,71.47,18.39,F18021192,2018-02-25 19:13:39,43.708659,-79.478062,M6M 4A2,M6M,0,4822.54,0.091022,0.125935,39.2,38.7,0.470658,30.40174
4,4,1946933,,,,0,0,,,,,,"03 - NO LOSS OUTDOOR fire (exc: Sus.arson,vandal,child playing,recycling or dump fires)",,,,,,F18021271,132,8.0,Fire - Residential,Replin Rd / Tapestry Lane,2018-02-25 23:34:24,,,,6,22,,860 - Lawn around structure,,,,,,,,,2018-02-25 23:20:43,2018-02-25 23:26:19,0,15,12,FIRE STATION 132,476 LAWRENCE AVE W,9847351,100477,9694792,REGULAR,476,Lawrence Ave W,,Fire Station,"Land, Structure, Structure Entrance",,Fire Station 132,,,,,Eglinton-Lawrence (8),North York,1852933,"{u'type': u'Point', u'coordinates': (-79.4285718669, 43.7197981433)}",-79.428572,43.719798,132,0,13693.287227,2018-02-25 00:00:00,6,1,2018,2,5.6,25,25,23,23,8.083,6,Night,0,0,0,0,2018-02-25,0.96,0.77,5.1,-3.415,71.47,18.39,F18021271,2018-02-25 23:20:43,43.718118,-79.443184,M6A 2N4,M6A,0,3610.31,0.08882,0.275155,40.4,39.0,0.435258,7.317947


In [21]:
df_final.to_csv('fire_incident_station_weather_demo_hydrant.csv')

In [22]:
df_min_dist.to_csv('min_distance_to_hydrant_by_incident.csv')