In [26]:
import os
from bokeh.models import ColumnDataSource,Legend
from bokeh.io import output_notebook, show
from bokeh.palettes import Category10
from bokeh.plotting import figure, show
from bokeh.io import output_file
import numpy as np
from ipyleaflet import Map, GeoJSON, Marker, AwesomeIcon, FullScreenControl
import requests
import json
import random
import plotly.graph_objs as go
import plotly.offline as pyo
from datetime import datetime
import seaborn as sns
from folium.plugins import HeatMap
import pandas as pd
import matplotlib.pyplot as plt
import mplcursors
import calplot
import mpld3
import folium


In [32]:
##
#
# Author: 
# Salim Omar
#
##

# cleaning and preprocessing

csv_path = "../Fire_Incidents.csv"

if not os.path.exists(csv_path):
    # Download CSV from API if it doesn't exist locally
    url = "https://data.sfgov.org/resource/wr8u-xric.json"
    response = requests.get(url)
    df = pd.read_json(response.text)
    df.to_csv(csv_path, index=False)
    print("The data has been readed from ", url)
else:
    # Load CSV from local file
    df = pd.read_csv(csv_path)
    print("The data is found locally")

#df.head()


# cleaning and preprocessing


df['Incident Date'] = pd.to_datetime(df['Incident Date'])
df['Alarm DtTm'] = pd.to_datetime(df['Alarm DtTm'])
df['Arrival DtTm'] = pd.to_datetime(df['Arrival DtTm'])
df['Close DtTm'] = pd.to_datetime(df['Close DtTm'])

df['Incident Time'] = df['Incident Date'].dt.time
df['Incident month'] = df['Incident Date'].dt.month
df['Incident year'] = df['Incident Date'].dt.year


# Get the data from date 2010-04-01 to 2023-04-01
df = df[(df['Incident year'] >= 2010) &
        (df['Incident year'] <= 2022)]


# print the number of rows
num_rows = df.shape[0]
print("The number of rows is:", num_rows)

# get just the data from Sf city
df = df[(df['City'] == 'SF') | (df['City'] == 'San Francisco')
        | (df['City'] == 'SAN FRANCISCO')]

# Deleting all unnaseccary columns
df.drop(columns=['Exposure Number',
                'Box',
                'Fire Fatalities',
                'Fire Injuries',
                'Civilian Fatalities',
                'Civilian Injuries',
                'Number of Alarms',
                'Mutual Aid',
                'Action Taken Secondary',
                'Action Taken Other',
                'Area of Fire Origin',
                'Ignition Cause',
                'Ignition Factor Primary',
                'Ignition Factor Secondary',
                'Item First Ignited',
                'Human Factors Associated with Ignition',
                'Structure Type',
                'Structure Status',
                'Floor of Fire Origin',
                'Fire Spread',
                'No Flame Spead',
                'Number of floors with minimum damage',
                'Number of floors with significant damage',
                'Number of floors with heavy damage',
                'Number of floors with extreme damage',
                'Detectors Present',
                'Detector Type',
                'Detector Operation',
                'Detector Effectiveness',
                'Detector Failure Reason',
                'Automatic Extinguishing System Present',
                'Automatic Extinguishing Sytem Type',
                'Automatic Extinguishing Sytem Perfomance',
                'Automatic Extinguishing Sytem Failure Reason',
                'Number of Sprinkler Heads Operating'
                ], inplace=True)
# the code for Primary Situation
df['focuse_Situation_by_number'] = df['Primary Situation'].str[:3]

# error rows
df = df[df['Battalion'] != 'B99']
df.drop(df[df['ID'] == 140383810 ].index, axis=0, inplace=True)
df.drop(df[df['ID'] == 140660390 ].index, axis=0, inplace=True)
df.head()




Columns (10,12,13,20,29,38,45,46,55,59) have mixed types. Specify dtype option on import or set low_memory=False.



The data is found locally
The number of rows is: 408335


Unnamed: 0,Incident Number,ID,Address,Incident Date,Call Number,Alarm DtTm,Arrival DtTm,Close DtTm,City,zipcode,...,Detector Alerted Occupants,Property Use,Heat Source,Supervisor District,neighborhood_district,point,Incident Time,Incident month,Incident year,focuse_Situation_by_number
205932,10001468,100014680,274 Brannan St.,2010-01-05,100050086,2010-01-05 08:26:41,2010-01-05 08:32:04,2010-01-05 08:41:49,SF,94107,...,-,429 - Multifamily dwellings,,,Financial District/South Beach,POINT (-122.39169312 37.78233494),00:00:00,1,2010,745
205933,10001472,100014720,Brotherhood Wy. / Thomas More Wy.,2010-01-05,100050091,2010-01-05 08:52:51,2010-01-05 08:57:14,2010-01-05 08:57:36,SF,94112,...,-,"960 - Street, other",,7.0,Lakeshore,POINT (-122.472734 37.712926),00:00:00,1,2010,711
205934,10001473,100014730,684 48th Av.,2010-01-05,100050092,2010-01-05 08:54:09,2010-01-05 09:23:33,2010-01-05 09:23:48,SF,94121,...,-,429 - Multifamily dwellings,,,Outer Richmond,POINT (-122.509025 37.775448),00:00:00,1,2010,353
205935,10001476,100014760,690 Market St.,2010-01-05,100050098,2010-01-05 09:07:38,2010-01-05 09:11:47,2010-01-05 09:20:48,SF,94105,...,-,599 - Business office,,,Financial District/South Beach,POINT (-122.403309 37.787838),00:00:00,1,2010,740
205936,10001477,100014770,1300 Junipero Serra Bl.,2010-01-05,100050099,2010-01-05 09:10:42,2010-01-05 09:15:48,2010-01-05 09:21:06,SF,94112,...,-,429 - Multifamily dwellings,,7.0,Lakeshore,POINT (-122.472036 37.711842),00:00:00,1,2010,700


# Part 1

## 3. Map to show the distribution for 4 Primary Situations in different Neighborhood

In [22]:
##
#
# Author: 
# Salim Omar
#
##

# dataset for map
df_2022 = df[(df['Incident Date'] >= '2022-12-01') &
            (df['Incident Date'] <= '2022-12-31')]
df_2022.head()
# len(df_2022)
focuse_Situation2 = df_2022['Primary Situation']
ListOfSituation2 = focuse_Situation2.unique()
#print(ListOfSituation2)
len(ListOfSituation2)

focuse_Situation = df_2022['focuse_Situation_by_number']
ListOfSituation = focuse_Situation.unique()
#print(ListOfSituation)
from shapely import wkt
# df_2022.loc[df_2022['focuse_Situation_by_number'].str.startswith('1'), 'focuse_Situation_by_number'] = 'Fire/explosion'
# df_2022.loc[df_2022['focuse_Situation_by_number'].str.startswith('5'), 'focuse_Situation_by_number'] = 'Public service'
# df_2022.loc[df_2022['focuse_Situation_by_number'].str.startswith('7'), 'focuse_Situation_by_number'] = 'Alarm'
# df_2022['focuse_Situation_by_number'].replace(['322','324'], 'Motor vehicle accident', inplace=True)
# df_2022['focuse_Situation_by_number'].replace(['311'], 'Medical assist', inplace=True)
# df_2022['focuse_Situation_by_number'].replace(['700'], 'False alarm/call', inplace=True)
# df_2022['focuse_Situation_by_number'].replace(['322'], 'Motor vehicle accident with injuries', inplace=True)
# df_2022['focuse_Situation_by_number'].replace(['311'], 'Medical assist', inplace=True)
df_2022 = df_2022[df_2022['focuse_Situation_by_number'].isin(['111', '700', '113','150'])]
df_2022['focuse_Situation_by_number'].replace(['150'], 'Outside rubbish fire', inplace=True)
df_2022['focuse_Situation_by_number'].replace(['111'], 'Building fire', inplace=True)
df_2022['focuse_Situation_by_number'].replace(['700'], 'False alarm/call', inplace=True)
df_2022['focuse_Situation_by_number'].replace(['113'], 'Cooking fire', inplace=True)

# keep the important coulmn and drop other 
df_2022 = df_2022.loc[:, ['ID', 'point', 'Incident year', 'focuse_Situation_by_number']]


df_2022['point'] = df_2022['point'].apply(wkt.loads)
df_2022['lon'] = df_2022['point'].apply(lambda p: p.x)
df_2022['lat'] = df_2022['point'].apply(lambda p: p.y)

focuse_Situation = df_2022['focuse_Situation_by_number']
ListOfSituation = focuse_Situation.unique()
print(ListOfSituation)

df_2022.head()
len(df_2022)


['Cooking fire' 'Outside rubbish fire' 'False alarm/call' 'Building fire']


560

In [24]:
##
#
# Author: 
# Salim Omar
#
##

with open('./geo_map_data/Planning Neighborhood Groups Map.geojson', 'r') as f:
    data = json.load(f)

data["features"][0]


# Create a dictionary to map focuse_Situation_by_number values to colors
situation_color_dict = {
    # 'Fire/explosion': 'green',
    'Outside rubbish fire': 'orange',
    'False alarm/call': 'purple',
    'Cooking fire': 'blue',
    'Building fire': 'red'
}

color_icon_dict = {
    'red': 'fa-fire',
    # 'green': 'fa-aulance',
    'blue': 'fa-building',
    'orange': 'fa-free-code-camp',
    'purple': 'fa-bell'
}


def random_color(fea):
    return {
        'color': 'black',
        'fillColor': random.choice(['red', 'yellow', 'green', 'orange']),
    }


my_map = Map(center=(37.7749, -122.4194), zoom=12,
    layout={'height': '600px', 'width': '100%'})


# Add GeoJSON layer to the map
geojson_layer = GeoJSON(
    data=data,
    style={
        'color': 'gray',
        'weight': 3,
        'fillOpacity': 0.2

    },
    hover_style={
        'color': 'white', 'dashArray': '0', 'fillOpacity': 0.4,
    },
    style_callback=random_color,
    name='Neighborhoods',
)


my_map.add_layer(geojson_layer)

# Add markers to the map for each incident in the data
for index, row in df_2022.iterrows():
    location = (row['lat'], row['lon'])
    marker_color = situation_color_dict[row['focuse_Situation_by_number']]
    marker = Marker(location=location, draggable=False,
                    title=row['focuse_Situation_by_number'])
    marker.icon = AwesomeIcon(
        name=color_icon_dict[marker_color], marker_color=marker_color, icon_color='black')
    my_map.add_layer(marker)


my_map.add_control(FullScreenControl())

# Display the map
my_map


Map(center=[37.7749, -122.4194], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', '…

_________________________________________________________________________________________________________________________________

# Part 2

## 1. Bar chart

This code is changing some columns to a alarm date and arrival time format and figuring out the time difference between two of those columns. 

This code calculates the average (mean) arrival time for a dataset. It uses the mean() method to calculate the average of the "Arrive time_minutes" column of the DataFrame df, which was created in the previous code block by subtracting the "Alarm DtTm" column from the "Arrival DtTm" column.

The code down below calculates the average arrival time of fire department units to emergency incidents, grouped by battalion.

This code generates a bar chart using the Plotly library to visualize the average arrival time of different fire departments or "battalions" to a fire scene.

## 3. Polar bar chart.

This code is creating a polar plot that shows the hourly counts of alarms in the year 2022. 

_____________________________________

## Part 3


1. Filter the data from a DataFrame "df" by Battalion and Incident Date, selecting only the rows with a Battalion value within a certain list and within a certain date range.
2. Create a new column "Year" in the filtered DataFrame with the year of the incident.
3. Group the filtered DataFrame by Battalion and Year and count the number of incidents per group, resetting the index to create a new DataFrame.
4. Pivot the grouped DataFrame to create a table with Battalion as rows and Year as columns.
5. Create a stacked bar chart with the pivoted DataFrame, with the year on the x-axis and the number of incidents on the y-axis, and with each Battalion's contribution to the total number of alarms stacked on top of each other.
6. Add a title and labels to the chart.
7. Move the legend outside the plot area.
8. Add hover effects to the plot using mplcursors. When hovering over a stacked bar in the plot, display a label with the Battalion name, Year, and Count of alarms for that Battalion in that year.
9.
10. Show the completed plot.

In summary, this code filters and pivots data from a DataFrame to create a stacked bar chart showing the total number of alarms for all battalions per year, and adds hover effects to show more information about each bar.

This code generates a calendar plot showing the number of fire incidents that occurred on each day within a given date range. It also adds a hover effect to display the exact count for each date.

Here's what the code does step by step:

1. Import necessary libraries: matplotlib.pyplot, calplot, and mplcursors.
2. Group the fire incidents by date and count the number of incidents on each date.
3. Filter out dates with zero incidents.
4. Create a Pandas Series using the date range from "2010-04-01" to "2023-04-01" with a frequency of one day.
5. Create a calendar plot using calplot, using the counts from step 2 as input data and setting the colormap to 'cool'.
6. Add a hover effect to the plot using mplcursors. The function on_add() is called when a cursor is added to the plot. It extracts the index and value of the selected date and sets the text of the annotation to display the date and its count.

This code reads in a dataframe df and calculates the average response time for the top 10 most frequent unique values in the 'Primary Situation' column, broken down by year. It then creates a line chart showing the trend of average response time for each situation from 2010 to 2023.

To achieve this, the code first gets the top 10 most frequent unique values in the 'Primary Situation' column and replaces the missing values in the 'Arrival DtTm' column with the string 'Missing'. It then creates a dictionary situation_data to store the average response time data for each situation.

The code then loops through each year from 2010 to 2023, selects rows from the dataframe that begin with the current year, and calculates the average response time for each situation in that year. It appends the average response time to the corresponding list in situation_data.

Finally, the code creates a line chart using matplotlib to show the trend of average response time for each situation from 2010 to 2023. It adds a legend, x-axis label and tick labels, y-axis label, and an information box. It also adds hover effects using mplcursors to display the details for each point on the chart. The resulting chart allows the viewer to compare the trends in average response time for different situations over time.

This code creates a new Pandas DataFrame called neighborhoods_df that contains the unique values from the "neighborhood_district" column of an existing DataFrame called df. The pd.DataFrame() function is used to create the new DataFrame, passing in the array of unique values from the "neighborhood_district" column as the first argument, and specifying the name of the new column as "neighborhood_district" using the columns parameter.

Finally, the print() function is used to display the new DataFrame to the console. This will output the unique values from the "neighborhood_district" column of the original DataFrame df, with each unique value appearing in its own row under the "neighborhood_district" column header.

End