<a href="https://colab.research.google.com/github/ProfessorPatrickSlatraigh/CST3512/blob/main/Folium_map_NYPD_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Folium to Map NYPD Police Data by ZIP Code    


Louis Casanave
Louis is a Data Scientist from Brooklyn, NY. 

[Using Folium on Police Data](https://towardsdatascience.com/using-folium-on-police-data-3207e505c649)
Say YES to the MESS with Choropleths

In [None]:
# the following `curl` statement does not work on the URL for a large Excel file on Github
# !curl https://github.com/casanave/stop_and_frisk/blob/main/data/stop_and_frisk_2020.xlsx -o "stop_and_frisk_2020.xlsx"

In [None]:
# load geojson file of NYC map areas from Louis Casanove Github raw file
!curl https://raw.githubusercontent.com/casanave/stop_and_frisk/main/data/nyc-zip-code-tabulation-areas-polygons.geojson -o "nyc-zip-code-tabulation-areas-polygons.geojson"

In [None]:
import pandas as pd
import folium

data = pd.read_excel('stop_and_frisk_2020.xlsx')

# uploading the 2020 data from 
# https://www1.nyc.gov/site/nypd/stats/reports-analysis/stopfrisk.page 


In [None]:
shape = data.shape
shape

# saving the shape so we can use it later 

data.columns

# taking a look at what columns we have in the data

In [None]:
zipcode_frequency = data.drop(columns = ['STOP_FRISK_DATE', 'STOP_FRISK_TIME', 'YEAR2', 'MONTH2',
       'DAY2', 'STOP_WAS_INITIATED', 'RECORD_STATUS_CODE',
       'ISSUING_OFFICER_RANK', 'ISSUING_OFFICER_COMMAND_CODE',
       'SUPERVISING_OFFICER_RANK', 'SUPERVISING_OFFICER_COMMAND_CODE',
       'LOCATION_IN_OUT_CODE', 'JURISDICTION_CODE', 'JURISDICTION_DESCRIPTION',
       'OBSERVED_DURATION_MINUTES', 'SUSPECTED_CRIME_DESCRIPTION',
       'STOP_DURATION_MINUTES', 'OFFICER_EXPLAINED_STOP_FLAG',
       'OFFICER_NOT_EXPLAINED_STOP_DESCRIPTION', 'OTHER_PERSON_STOPPED_FLAG',
       'SUSPECT_ARRESTED_FLAG', 'SUSPECT_ARREST_OFFENSE',
       'SUMMONS_ISSUED_FLAG', 'SUMMONS_OFFENSE_DESCRIPTION',
       'OFFICER_IN_UNIFORM_FLAG', 'ID_CARD_IDENTIFIES_OFFICER_FLAG',
       'SHIELD_IDENTIFIES_OFFICER_FLAG', 'VERBAL_IDENTIFIES_OFFICER_FLAG',
       'FRISKED_FLAG', 'SEARCHED_FLAG', 'ASK_FOR_CONSENT_FLG',
       'CONSENT_GIVEN_FLG', 'OTHER_CONTRABAND_FLAG', 'FIREARM_FLAG',
       'KNIFE_CUTTER_FLAG', 'OTHER_WEAPON_FLAG', 'WEAPON_FOUND_FLAG',
       'PHYSICAL_FORCE_CEW_FLAG', 'PHYSICAL_FORCE_DRAW_POINT_FIREARM_FLAG',
       'PHYSICAL_FORCE_HANDCUFF_SUSPECT_FLAG',
       'PHYSICAL_FORCE_OC_SPRAY_USED_FLAG', 'PHYSICAL_FORCE_OTHER_FLAG',
       'PHYSICAL_FORCE_RESTRAINT_USED_FLAG',
       'PHYSICAL_FORCE_VERBAL_INSTRUCTION_FLAG',
       'PHYSICAL_FORCE_WEAPON_IMPACT_FLAG',
       'BACKROUND_CIRCUMSTANCES_VIOLENT_CRIME_FLAG',
       'BACKROUND_CIRCUMSTANCES_SUSPECT_KNOWN_TO_CARRY_WEAPON_FLAG',
       'SUSPECTS_ACTIONS_CASING_FLAG',
       'SUSPECTS_ACTIONS_CONCEALED_POSSESSION_WEAPON_FLAG',
       'SUSPECTS_ACTIONS_DECRIPTION_FLAG',
       'SUSPECTS_ACTIONS_DRUG_TRANSACTIONS_FLAG',
       'SUSPECTS_ACTIONS_IDENTIFY_CRIME_PATTERN_FLAG',
       'SUSPECTS_ACTIONS_LOOKOUT_FLAG', 'SUSPECTS_ACTIONS_OTHER_FLAG',
       'SUSPECTS_ACTIONS_PROXIMITY_TO_SCENE_FLAG',
       'SEARCH_BASIS_ADMISSION_FLAG', 'SEARCH_BASIS_CONSENT_FLAG',
       'SEARCH_BASIS_HARD_OBJECT_FLAG',
       'SEARCH_BASIS_INCIDENTAL_TO_ARREST_FLAG', 'SEARCH_BASIS_OTHER_FLAG',
       'SEARCH_BASIS_OUTLINE_FLAG', 'DEMEANOR_CODE',
       'DEMEANOR_OF_PERSON_STOPPED', 'SUSPECT_REPORTED_AGE', 'SUSPECT_SEX',
       'SUSPECT_RACE_DESCRIPTION', 'SUSPECT_HEIGHT', 'SUSPECT_WEIGHT',
       'SUSPECT_BODY_BUILD_TYPE', 'SUSPECT_EYE_COLOR', 'SUSPECT_HAIR_COLOR',
       'SUSPECT_OTHER_DESCRIPTION', 'STOP_LOCATION_PRECINCT',
       'STOP_LOCATION_SECTOR_CODE', 'STOP_LOCATION_APARTMENT',
        'STOP_LOCATION_X', 'STOP_LOCATION_Y',
       'STOP_LOCATION_FULL_ADDRESS', 'STOP_LOCATION_STREET_NAME',
       'STOP_LOCATION_PATROL_BORO_NAME', 'STOP_LOCATION_BORO_NAME']) 


In [None]:
# looking for null values
zipcode_frequency.info()


In [None]:
zipcode_frequency['STOP_LOCATION_ZIP_CODE'].describe()

# found the null values as "(null)" in dataframe

In [None]:
percent_missing = zipcode_frequency['STOP_LOCATION_ZIP_CODE'].describe()[3]/shape[0]

print(f' This data has {percent_missing:.2f} percent of its zipcode data missing.')

# displaying the percent of zipcode data missing, rounded to the 0.00% 

In [None]:
zipcode_frequency['STOP_ID'] = 1

# this will change all stop ids to 1 for aggregation

zipcode_frequency = zipcode_frequency.groupby("STOP_LOCATION_ZIP_CODE").count()

# this will aggregate our data

zipcode_frequency.head()

# now we can see the dataframe as it is now

In [None]:
zipcode_frequency = zipcode_frequency.rename(columns = {'STOP_ID' : 'FREQUENCY'})

# this will rename our column for clarity and sanity 

zipcode_frequency.index = zipcode_frequency.index.astype(str)

# making the zipcode information into a string so the map doesn't break

zipcode_frequency = zipcode_frequency.loc[zipcode_frequency.index != '(null)']

# this will get rid of those null values that we decided to delete and keep track of

In [None]:
print(zipcode_frequency.head())
print(zipcode_frequency.tail())

In [None]:
# the following original statement blows up on kwarg `default_zoom_start`
# map = folium.Map(location=[40.693943, -73.985880], default_zoom_start=15)

# map = folium.Map(location=[40.693943, -73.985880], zoom_start=10)
map = folium.Map(location=[40.695000, -74.000000], zoom_start=10)

# making the base map of nyc

# the following .choropleth() method is being deprecated in folium  
# map.choropleth(geo_data="nyc-zip-code-tabulation-areas-polygons.geojson",
#                data=zipcode_frequency,
#                columns=[zipcode_frequency.index, 'FREQUENCY'],
#                key_on='feature.properties.postalCode', 
#                fill_color='YlOrBr', fill_opacity=0.7, line_opacity=0.2,
#                legend_name='Frequency')


# use GeoJson instead of .choropleth()
# g = folium.GeoJson("nyc-zip-code-tabulation-areas-polygons.geojson",
#                    data=zipcode_frequency,
#                    columns=[zipcode_frequency.index, 'FREQUENCY'],
#                    key_on='feature.properties.postalCode', 
#                    fill_color='YlOrBr', fill_opacity=0.7, line_opacity=0.2,
#                    ).add_to(map)

g = folium.Choropleth(
    geo_data="nyc-zip-code-tabulation-areas-polygons.geojson",
    name='choropleth',
    data=zipcode_frequency,
    columns=[zipcode_frequency.index, 'FREQUENCY'],
    key_on='feature.properties.postalCode',
    fill_color='YlOrBr',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Stop Rate').add_to(map)

# folium.GeoJsonTooltip(fields=["postalCode"]).add_to(g)

# taking the geojson zipcode file, assigning data to my dataframe, 
# feeding index and instances into columns, key_on zipcode within properties 
# within feature, setting the color and legend name

map



---



*NOTE: Because the **folium** `.choropleth()` method is being deprecated, this notebook should be updated to use  the newer `.GeoJson()` method.*