In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

# Testing Response/Result

In [None]:
endpoint = "https://data.nashville.gov/resource/fuaa-r5cm.json"

In [None]:
response = requests.get(endpoint)
response

In [None]:
result = response.json()

In [None]:
result

# Question 1
### Make an API request that returns the months where "fire" was searched in 2016, which month had the most searches?

In [None]:
endpoint = "https://data.nashville.gov/resource/fuaa-r5cm.json"
fire_params = {'query_text':'fire','year': '2016'}

In [None]:
fire_response = requests.get(endpoint,params=fire_params)

In [None]:
fire_response

In [None]:
fire_result = fire_response.json()

which month had the most searches?

In [None]:
month_list = []
query_count_list = []
query_text_list = []
for month in fire_result:
    for key,val in month.items():
        if key == 'month_name':
            month_list.append(val)
        elif key == 'query_count':
            query_count_list.append(val)
#make dataframe outside of forloop
fire_2016 = pd.DataFrame({'month':month_list, 'query_count':query_count_list})


In [None]:
fire_2016.sort_values(by = 'query_count', ascending=False)

Answer: August had the highest amount of searches

# Question 2 
### Make an API request that returns all the times a query was run more that 100 times in a month. How many times did this occur?

In [None]:
params = {'$where':'query_count > 100', '$limit':'100000'}
response = requests.get(endpoint, params=params)
result = response.json()
result

In [None]:
len(result)

Answer: This occured 1262 times

# Question 3
### Make another API request that returns all the times "codes" was serached more than 100 times in a month. How many times did this occur?

In [None]:
params = {'$where':'query_count > 100', 'query_text':'codes'}

response = requests.get(endpoint,params=params)
result = response.json()
result

In [None]:
len(result)

Answer: This occured 56 times

# Question 4
### Make an API request that returns the entire top 500 monthly searches dataset. Make a chart that shows the number of times "maps" was searched in a month across the entire time frame.

In [None]:
#step one: bring in the entire top 500 dataset
params = {'$limit':'100000'}
response = requests.get(endpoint, params=params)
result = response.json()
result

In [None]:
len(result)

In [None]:
# step two: turn into a dataframe?
top500 = pd.DataFrame(result)

In [None]:
top500

In [None]:
# step three: loc onto searches that are related to maps
maps = top500.loc[top500['query_text'] == 'maps']

In [None]:
#step four: fix data types of maps dataframe
maps['query_count'] = maps['query_count'].astype('int')

In [None]:
#step five: sum by month
month_counts = maps.groupby('month_name').sum()

new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
month_counts = month_counts.reindex(new_order, axis=0)

#need to reset index
month_counts = month_counts.rename_axis('month_name').reset_index()
#show
month_counts

In [None]:
#step 6: plot months dataframe
fig,ax = plt.subplots(figsize=(12,6))
ax.plot(month_counts['month_name'],month_counts['query_count'])

# Stretch Questions


# Question 5 
### Open Ended: Compare pull from 311 service requests dataset with pull from top 500 searches dataset

In [None]:
endpoint = "https://data.nashville.gov/resource/7qhx-rexh.json"

#params = {'$limit': '1000000'} #commented out because it takes awhile to run this cell with limit at
#one million
response = requests.get(endpoint) #,params=params #if you want to see that there is more than million
result = response.json()
len(result)

In [None]:
result

**Observations**
- the dataset is significantly larger, with more than 1,000,000 cases
- there is a lot of information with each entry, including geographic information

# **Question Six**
### Find 2 new datasets on data.nashville.gov, make API requests to pull the data, and do an analysis that combines the datasets

**Combining Fire Stations and Active Fire Service Dispatch Requests**
1. Fire stations
2. Active Requests

#### Fire Stations

In [None]:
## Bring in FireStations Data
endpoint = "https://data.nashville.gov/resource/frq9-a5iv.json" #fire stations endpoint

response = requests.get(endpoint)
stations = response.json()
stations_df = pd.DataFrame(stations) 

In [None]:
delcol = (['mapped_location', ':@computed_region_gisn_y5cm', 
           ':@computed_region_sjpq_96s8',':@computed_region_v3ji_vzam',
           ':@computed_region_c9xn_skx3',':@computed_region_cfa7_hbpz',
            ':@computed_region_f73m_vb2k', 'street_address','city','state'])
station_count = stations_df.groupby('zip_code').count().drop(columns = delcol)
station_count = station_count.rename_axis('zip_code').reset_index()
station_count = station_count.rename(columns = {'station_number':'num_stations'})
station_count

#### Active Requests

In [None]:
endpoint = "https://data.nashville.gov/resource/jwgg-8gg4.json"

response = requests.get(endpoint)
incidents = response.json()
incidents_df = pd.DataFrame(incidents)

incident_counts = incidents_df.groupby('postal_code').count().drop(columns = 
                                                ['dispatch_time','incident_type','units_dispatched'])
incident_counts = incident_counts.rename(columns={'incident_number':'num_incidents'})

incident_counts = incident_counts.rename_axis('zip_code').reset_index()

In [None]:
incident_counts

#### Combine the two dataframes

In [None]:
station_workload = pd.merge(station_count, incident_counts, how='left', on='zip_code')
station_workload['incidents_to_stations_ratio'] = station_workload['num_incidents'] / station_workload['num_stations']

In [None]:
station_workload.fillna(0)

#### Make a map showing incidents to station ratio, this should show which areas are under the most stress
- going to need to switch to geospatial environment to achieve this

In [None]:
#geospatial map imports
from shapely.geometry import Point
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

In [None]:
#bring in zipcodes geometry
zipcodes = gpd.read_file('C:/Users/theoc/Documents/NSS/Python/data/zipcodes.geojson')

In [None]:
zipcodes.head(1)

In [None]:
#change zip_code to zip in station_workload to help with cleaner join and mapping later
station_workload = station_workload.rename(columns = {'zip_code':'zip'})

In [None]:
# Join the dataframes
zipmap = pd.merge(zipcodes, station_workload, how='left', left_on = 'zip', right_on = 'zip')

In [None]:
zipmap = zipmap.fillna(0)

In [None]:
#make the plot using matplot lib
fig,ax = plt.subplots(figsize=(12,6))
zipmap.plot(column = 'incidents_to_stations_ratio', ax=ax, edgecolor='black',cmap = 'Reds',legend=True)
ax.axis('off')
None

## Folium Map

In [None]:
#make plot using folium

#calculate center of nashville
cenx =zipmap.geometry.centroid.x.agg('mean')
ceny = zipmap.geometry.centroid.y.agg('mean')

In [None]:
#whats the center of 37221
centx = zipcodes.loc[zipcodes['zip'] == '37221'].geometry.centroid.x
centy = zipcodes.loc[zipcodes['zip'] == '37221'].geometry.centroid.y
centy = centy.to_list()
centx = centx.to_list()
print(centx)

In [None]:
## Need to clear up the nan in mapped location
## for station 34 in zip 37221
geostations = pd.DataFrame(stations)

delcol = ([ ':@computed_region_gisn_y5cm', 
           ':@computed_region_sjpq_96s8',':@computed_region_v3ji_vzam',
           ':@computed_region_c9xn_skx3',':@computed_region_cfa7_hbpz',
            ':@computed_region_f73m_vb2k', 'street_address','city','state'])
geostations = geostations.drop(columns=delcol)
geostations = geostations.rename(columns = {'zip_code':'zip'})
cent37 = {'latitude':centy[0],'longitude':centx[0] }
#this is a really important step so when we rerun the notebook the station with null geography can
#be dropped
geostations = geostations.sort_values(by = 'zip').reset_index(drop=True)
#perform the drop
geostations = geostations.drop(37)
#reset index last time
geostations = geostations.reset_index(drop=True)

In [None]:
geostations

In [None]:

station_list = []
zip_list = []
long_list = []
lat_list = []
for index, row in geostations.iterrows():
    long_list.append(row['mapped_location'].get('longitude'))
    lat_list.append(row['mapped_location'].get('latitude'))
    station_list.append(row['station_number'])
    zip_list.append(row['zip'])
geostations_clean = pd.DataFrame({'station_number':station_list,
                                  'zip':zip_list,
                                  'long':long_list,
                                  'lat':lat_list})
geostations_clean.head()

In [None]:
nashmap = folium.Map(location = [ceny,cenx],zoom_start=10,tiles='stamentoner')

folium.Choropleth(geo_data = zipmap, 
                  data = zipmap, 
                  columns=['zip','incidents_to_stations_ratio'],
                  key_on="feature.properties.zip",
                  fill_color='Reds',
                  nan_fill_color="White", #Use white color if there is no data available for the county
                  fill_opacity=0.8,
                  line_opacity=1,
                  legend_name='Station Strain', #title of the legend
                  highlight=True,
                  line_color='black'
                  ).add_to(nashmap)
# Add fire station markers
for index, row in geostations_clean.iterrows():
    loc = [row['lat'], row['long']]
    pop = str(row['station_number'])

   # icon = folium.Icon(color='darkred',icon = 'fire-extinguisher', prefix = 'fa',edgecolor='black')
    marker = folium.Circle(location = loc, 
                           tooltip = pop,
                           radius = 350,
                           color='black', fill=True, fill_opacity=.6,opacity=.1, 
                           size=(10,10))
    marker.add_to(nashmap)

#show map
nashmap