In [None]:
import altair as alt
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime

pd.set_option('display.max_columns', None)

In [None]:
#https://data.sfgov.org/Public-Safety/Fire-Incidents/wr8u-xric
df = pd.read_csv('https://data.sfgov.org/api/views/wr8u-xric/rows.csv?accessType=DOWNLOAD')
df.head(1)

## clean up data

### sort out dates and times

In [None]:
#convert dates to datetime
df['Alarm DtTm'] = pd.to_datetime(df['Alarm DtTm'])
df['Arrival DtTm'] = pd.to_datetime(df['Arrival DtTm'])

#same for incident date
df['Incident Date'] = pd.to_datetime(df['Incident Date'])

#figure out response times
df['time_to_respond'] = df['Arrival DtTm'] - df['Alarm DtTm']
df['time_to_respond'] = df['time_to_respond'].dt.total_seconds()

#grab year
df['incident_year'] = df['Incident Date'].astype(str).str[:4]
#grab month
df['incident_month'] = df['Incident Date'].astype(str).str[5:7]

## mapping fire incidents

In [None]:
#extract longs and lats from point column
df['longitude'] = df['point'].str.extract(r'POINT \((.*) ')
df['latitude'] = df['point'].str.extract(r'(\S*)\)$')

#convert longs and lats to float
df['longitude'] = df['longitude'].astype(float)
df['latitude'] = df['latitude'].astype(float)

### points map - 2021, fires, up-to-Oct, SF

In [None]:
#just fires
df1 = df[df['Primary Situation'].astype(str).str[0] == "1"]
#just 2021
df1_2021 = df1[df1['incident_year'] == "2021"]
#excluding Nov
df1_2021 = df1_2021[df1_2021['incident_month'] != "11"]

#label each type of fire
typeList = []
for row in df1_2021['Primary Situation']:
    if row[0:4] == "1600":
        typeList.append("encampment")
    elif row[0:2] == "15":
        typeList.append("trash")
    elif row[0:2] == "13":
        typeList.append("vehicle")
    elif row[0:2] == "11":
        typeList.append("structure")
    else:
        typeList.append("other")
        
df1_2021['type'] = typeList

In [None]:
#grab just the most pertinent columns
df1_2021Honed = df1_2021[['Incident Number','Address','Incident Date','time_to_respond','zipcode','Suppression Units','Suppression Personnel','Estimated Property Loss','Estimated Contents Loss','Primary Situation','Property Use','Area of Fire Origin','Ignition Cause','Ignition Factor Primary','neighborhood_district','type','latitude','longitude']]

#remove number codes from situation descriptions
df1_2021Honed['Primary Situation'] = df1_2021Honed['Primary Situation'].str.replace('^(\d+ )', '')
df1_2021Honed['Property Use'] = df1_2021Honed['Property Use'].str.replace('^(\d+ )', '')

#change column names
df1_2021Honed.columns = ['id','address','date','time_to_respond','zipcode','suppression_units','suppression_personnel','est_property_loss','est_contents_loss','situation','property_use','fire_origin','ignition_cause','ignition_factor','neighborhood_district','type','latitude','longitude']

#make date into string
df1_2021Honed['date'] = df1_2021Honed['date'].astype(str)
#get rid of year in date
df1_2021Honed['date_string'] = pd.to_datetime(df1_2021Honed.date).dt.strftime('%d-%b-%Y').str[0:6]
#get rid of hyphen in date
df1_2021Honed['date_string'] = df1_2021Honed['date_string'].str.replace("-"," ")

#make into geodataframe
df1_2021Honed = gpd.GeoDataFrame(df1_2021Honed, geometry=gpd.points_from_xy(df1_2021Honed.longitude, df1_2021Honed.latitude))

In [None]:
#sort out minutes and seconds in response time
def convert(seconds):
    seconds = seconds % (24 * 3600)
    seconds %= 3600
    minutes = seconds // 60
    seconds %= 60
      
    return "%2dm %2ds" % (minutes, seconds)
      
stringTTR = []
for row in df1_2021Honed['time_to_respond']:
    stringTTR.append(convert(row).replace("  ", " "))
    
df1_2021Honed['time_to_respond_string'] = stringTTR

In [None]:
# export as separate files
df1_2021HonedTrash = df1_2021Honed[df1_2021Honed['type'] == "trash"]
df1_2021HonedVehicle = df1_2021Honed[df1_2021Honed['type'] == "vehicle"]
df1_2021HonedEncampment = df1_2021Honed[df1_2021Honed['type'] == "encampment"]
df1_2021HonedStructure = df1_2021Honed[df1_2021Honed['type'] == "structure"]
df1_2021HonedOther = df1_2021Honed[df1_2021Honed['type'] == "other"]

df1_2021HonedTrash.to_file("fires2021_trash.geojson", driver="GeoJSON")
df1_2021HonedVehicle.to_file("fires2021_vehicle.geojson", driver="GeoJSON")
df1_2021HonedEncampment.to_file("fires2021_encampment.geojson", driver="GeoJSON")
df1_2021HonedStructure.to_file("fires2021_structure.geojson", driver="GeoJSON")
df1_2021HonedOther.to_file("fires2021_other.geojson", driver="GeoJSON")

### change since 2015 by type, SF - STACKED BAR

In [None]:
#between 2015 and end of Oct 2021
df2 = df1[df1['Incident Date'].dt.date < datetime.date(2021,11,1)]
df2 = df2[df2['Incident Date'].dt.date >= datetime.date(2015,1,1)]

In [None]:
#label each type of fire and sort out order
typeList = []
for row in df2['Primary Situation']:
    if row[0:4] == "1600":
        typeList.append("encampment")
    elif row[0:2] == "15":
        typeList.append("trash")
    elif row[0:2] == "13":
        typeList.append("vehicle")
    elif row[0:2] == "11":
        typeList.append("structure")
    else:
        typeList.append("other")
df2['type'] = typeList

In [None]:
#just grab pertinent columns
df2graph = df2.groupby(['incident_year','type']).count().reset_index()[['incident_year','type','Incident Number']]
df2graph.columns = [['incident_year','type','value']]

#save to csv and reload to remove weird string bug
df2graph.to_csv('temp.csv')
df2graph = pd.read_csv('temp.csv')

#label each type of fire and sort out order
orderList = []
for row in df2graph['type']:
    if row == "trash":
        orderList.append(0)
    elif row == "encampment":
        orderList.append(1)
    elif row == "structure":
        orderList.append(2)
    elif row == "vehicle":
        orderList.append(3)
    else:
        orderList.append(4)
df2graph['order'] = orderList

#made incident year into a string
df2graph['incident_year'] = df2graph['incident_year'].astype(str)

In [None]:
#create bar chart
alt.Chart(df2graph).mark_bar().encode(
    x='incident_year',
    y='value',
    color='type',
    order=alt.Order('order',sort='descending')
).properties(width=400)

### just the mission - STACKED BAR

In [None]:
df3 = df2[df2['neighborhood_district'] == "Mission"]

In [None]:
#just grab pertinent columns
df3graph = df3.groupby(['incident_year','type']).count().reset_index()[['incident_year','type','Incident Number']]
df3graph.columns = [['incident_year','type','value']]

#save to csv and reload to remove weird string bug
df3graph.to_csv('temp.csv')
df3graph = pd.read_csv('temp.csv')

#label each type of fire and sort out order
orderList = []
for row in df3graph['type']:
    if row == "trash":
        orderList.append(0)
    elif row == "encampment":
        orderList.append(1)
    elif row == "structure":
        orderList.append(2)
    elif row == "vehicle":
        orderList.append(3)
    else:
        orderList.append(4)
df3graph['order'] = orderList

#made incident year into a string
df3graph['incident_year'] = df3graph['incident_year'].astype(str)

In [None]:
#create bar chart
alt.Chart(df3graph).mark_bar().encode(
    x='incident_year',
    y='value',
    color='type',
    order=alt.Order('order',sort='descending')
).properties(width=400)