In [1]:
# Import dependancies
import json
import requests
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, Polygon
from datetime import datetime
from shapely.geometry import shape

In [2]:
# Toronto Open Data is stored in a CKAN instance. It's APIs are documented here:
# https://docs.ckan.org/en/latest/api/

# To hit our API, you'll be making requests to:
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"

# Datasets are called "packages". Each package can contain many "resources"
# To retrieve the metadata for this package and its resources, use the package name in this page's URL:
url = base_url + "/api/3/action/package_show"
params = { "id": "fire-station-locations"}
package = requests.get(url, params = params).json()

# To get resource data:
for idx, resource in enumerate(package["result"]["resources"]):
    
    # for datastore_active resources:
    if resource["datastore_active"]:
        
        # To get all records in CSV format:
        url = base_url + "/datastore/dump/" + resource["id"]
        resource_dump_data = requests.get(url).text

        # To selectively pull records and attribute-level metadata:
        url = base_url + "/api/3/action/datastore_search"
        p = { "id": resource["id"] }
        resource_search_data = requests.get(url, params = p).json()["result"]
           
        # This API call has many parameters. They're documented here:
        # https://docs.ckan.org/en/latest/maintaining/datastore.html

    # To get metadata for non datastore_active resources:
    if not resource["datastore_active"]:
        url = base_url + "/api/3/action/resource_show?id=" + resource["id"]
        resource_metadata = requests.get(url).json()

# Assuming resource_search_data is a dictionary

df_firestation = pd.DataFrame(resource_search_data['records'])


In [3]:
# Toronto Open Data is stored in a CKAN instance. It's APIs are documented here:
# https://docs.ckan.org/en/latest/api/

# To hit our API, you'll be making requests to:
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"

# Datasets are called "packages". Each package can contain many "resources"
# To retrieve the metadata for this package and its resources, use the package name in this page's URL:
url = base_url + "/api/3/action/package_show"
params = { "id": "fire-incidents"}
package = requests.get(url, params = params).json()

# To get resource data:
for idx, resource in enumerate(package["result"]["resources"]):
    
    # for datastore_active resources:
    if resource["datastore_active"]:
        
        # To get all records in CSV format:
        url = base_url + "/datastore/dump/" + resource["id"]
        resource_dump_data = requests.get(url).text

        # To selectively pull records and attribute-level metadata:
        url = base_url + "/api/3/action/datastore_search"
        p = { "id": resource["id"] }
        resource_search_data = requests.get(url, params = p).json()["result"]
        
        total_rows = resource_search_data['total']
        limit = 100  # Number of rows to retrieve per request

        # List to hold all the records
        all_records = []

        # Calculate the number of requests needed based on total rows and limit
        num_requests = (total_rows // limit) + 1

        #Loop through the requests
        for i in range(num_requests):
            offset = i * limit
            # Make the API request with the appropriate offset
            p = {"id": resource["id"], "limit": limit, "offset": offset}
            url = base_url + "/api/3/action/datastore_search"
            response = requests.get(url, params=p).json()["result"]

            # Append the records to the list
            all_records.extend(response["records"])
        
#Convert the list of records to a DataFrame
df_fireincidents = pd.DataFrame(all_records)


In [4]:
# Toronto Open Data is stored in a CKAN instance. It's APIs are documented here:
# https://docs.ckan.org/en/latest/api/

# To hit our API, you'll be making requests to:
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"

# Datasets are called "packages". Each package can contain many "resources"
# To retrieve the metadata for this package and its resources, use the package name in this page's URL:
url = base_url + "/api/3/action/package_show"
params = { "id": "neighbourhoods"}
package = requests.get(url, params = params).json()

# To get resource data:
for idx, resource in enumerate(package["result"]["resources"]):
    # for datastore_active resources:
    if resource["datastore_active"]:
        
        # To get all records in CSV format:
        url = base_url + "/datastore/dump/" + resource["id"]
        resource_dump_data = requests.get(url).text
           

        # To selectively pull records and attribute-level metadata:
        url = base_url + "/api/3/action/datastore_search"
        p = { "id": resource["id"] }
        resource_search_data = requests.get(url, params = p).json()["result"]
           
        total_rows = resource_search_data['total']
        limit = 100  # Number of rows to retrieve per request

        # List to hold all the records
        all_records = []

        # Calculate the number of requests needed based on total rows and limit
        num_requests = (total_rows // limit) + 1

        #Loop through the requests
        for i in range(num_requests):
            offset = i * limit
            # Make the API request with the appropriate offset
            p = {"id": resource["id"], "limit": limit, "offset": offset}
            url = base_url + "/api/3/action/datastore_search"
            response = requests.get(url, params=p).json()["result"]

            # Append the records to the list
            all_records.extend(response["records"])

            
#Save the data to DF
df_neighborhoods = pd.DataFrame(all_records)

In [None]:
#Check DataType

df_neighborhoods.dtypes

In [None]:
#Check DataType

df_fireincidents.dtypes

In [None]:
# Convert polygon geometry to its correct format

df_neighborhoods['geometry'] = df_neighborhoods['geometry'].apply(lambda x: shape(json.loads(x)))
df_neighborhoods['geometry'].head()

In [None]:
# Check null values in the dataframe

df_fireincidents.isna().sum()

In [None]:
# Remove unnesscary columns and fill null values

columnstokeep = []
for x in df_fireincidents:
    if (df_fireincidents[x].isna().sum()<len(df_fireincidents)*.3):
        columnstokeep.append(x)
    if (df_fireincidents[x].dtypes =='object'):
        df_fireincidents[x].fillna('Unknown',inplace = True)
    
columnstokeep

In [None]:
# Fill null values in this column with default value (0)

df_fireincidents['Estimated_Dollar_Loss'].fillna(0, inplace = True)

# Trim existing dataframe

df_new = df_fireincidents[columnstokeep]

In [None]:
# Check null values in the dataframe

df_new.isna().sum()

In [None]:
# Replace empty string values with NAN

df_new.replace("",np.nan)

# Drop rows with null value

df_new.dropna(how = 'any',inplace = True)



In [None]:
# Obtain neighborhoods for each fire incident

df_new['Neighborhood']=None
for index, row in df_new.iterrows():
    a=0
    point = Point(row['Longitude'],row['Latitude'])
    for index1, row1 in df_neighborhoods.iterrows():
        if (row1['geometry'].contains(point)):
            df_new['Neighborhood'][index]=row1['AREA_NAME']
            a=1
        if (a==1):
            break
df_new['Neighborhood'].head(10)

In [None]:
# Convert string to datetime format

df_new['TFS_Alarm_Time'] = pd.to_datetime(df_new['TFS_Alarm_Time'])
df_new['TFS_Arrival_Time'] = pd.to_datetime(df_new['TFS_Arrival_Time'], errors = 'coerce')

df_new.dropna(how = 'any',inplace = True)
#Calculate response time of Fire Stations

df_new['Response'] = df_new["TFS_Arrival_Time"]-df_new['TFS_Alarm_Time']

response = []
for x in df_new['Response']:
    time =int(x.total_seconds())
    response.append(time)


df_new['Response']=response

In [None]:
# Convert polygon geometry to its correct format

df_firestation['geometry'] = df_firestation['geometry'].apply(lambda x: shape(json.loads(x)))
df_firestation.head(10)

In [None]:
df_new.head(10)

In [None]:
# Change datatype of column

df_new['Incident_Station_Area']=df_new['Incident_Station_Area'].astype(float)

# Obtain the fire station name for each fire incident

df_new['Fire_Station_Name']=None
for index, row in df_new.iterrows():
    for index1, row1 in df_firestation.iterrows():
        if(row['Incident_Station_Area']==row1['STATION']):
            df_new['Fire_Station_Name'][index]=row1['WARD_NAME']

df_new['Fire_Station_Name'].head(10)

In [None]:
# Clean the Neighborhood Dataset

df_neighborhoods.drop(columns=['PARENT_AREA_ID','CLASSIFICATION','CLASSIFICATION_CODE', '_id'], inplace = True)
df_neighborhoods.sort_values('AREA_NAME', inplace = True)
df_neighborhoods['Total_Incidents']=df_new.groupby('Neighborhood')['_id'].count().values
gdf = gpd.GeoDataFrame(df_neighborhoods)
gdf.head()

In [None]:
# Save file

gdf1 = gpd.GeoDataFrame(df_firestation)
gdf1.to_file('Fire_Station.geojson', driver='GeoJSON')
df_new.to_csv('Fire_Incidents_Data.csv',index = False)
gdf.to_file('Neighborhoods.geojson', driver='GeoJSON')