**Data Source:** https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-2019/63jg-8b9z

**Crm Cd 1**

Indicates the crime committed. Crime Code 1 is the primary and most serious one. Crime Code 2, 3, and 4 are respectively less serious offenses. Lower crime class numbers are more serious.

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 50)
import googlemaps
import watermark
from tqdm.notebook import tqdm
from datetime import datetime

In [2]:
# Version of packages used
%reload_ext watermark
%watermark -v --iv

watermark  2.0.2
pandas     1.0.5
googlemaps 4.4.1
CPython 3.7.7
IPython 7.16.1


In [3]:
# Google Maps API
gmaps = googlemaps.Client(key = '')

In [4]:
# Load data
df = pd.read_csv('Crime_Data_from_2010_to_2019.csv')

In [5]:
# Convert DATE OCCURANCE to format 'datetime'
df['DATE OCC'] = pd.to_datetime(df['DATE OCC'], infer_datetime_format=True)

In [6]:
# Keep only 2019 crimes
df = df[df['DATE OCC'] >= '2019']
df.reset_index(inplace=True, drop=True)

In [7]:
# Keep only some columns
df = df[['DR_NO', 'DATE OCC', 'Crm Cd 1', 'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'LAT', 'LON']]

In [8]:
# Check
df.head(2)

Unnamed: 0,DR_NO,DATE OCC,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LAT,LON
0,191907191,2019-03-08,510.0,,,,34.2991,-118.4211
1,190125334,2019-10-17,330.0,,,,34.0363,-118.2314


In [9]:
# Loop through each row in the crime dataframe
for idx, row in tqdm(df.iterrows(), total=len(df)):
    
    # Make a temporary dict (json) with data extracted with google maps' reverse_geocode
    tmp = gmaps.reverse_geocode((row.LAT, row.LON))
    
    # Take the length at [0]['address_components'] and create a range of attempts to find the neighborhood
    # [0]['address_components'] contains all address elements, eg: county, city, neighborhood
    # Each in one position in a list whose positions are denoted by the values in attempt_range
    try:
        attempt_range = range(len(tmp[0]['address_components']))
    
    # Some empty data points (lat and long = 0) will have len(tmp[0]) == 0, in those cases just skip the data point
    except:
        continue
    
    # Loop through each possible position where the neighborhood data can be
    for attempt in attempt_range:
        
        # For each possible neighborhood location, check [0]['address_components'][attempt_range]['types']
        # To see if type == neighborhood
        if 'neighborhood' in tmp[0]['address_components'][attempt]['types']:
            
            # If yes, then extract the neighborhood name, putting it in a new column in the dataframe
            df.at[idx, 'Neighborhood'] = tmp[0]['address_components'][attempt]['long_name']
            
            # And go to next iteration (no need to check the other positions in the tmp, as the answer has been found)
            continue
        
        # Else continue the loop until neighborhood is found
        else: 
            continue
            
    # After each 20000 rows, save the dataframe as csv
    if idx%20000 == 0:
        df.to_csv('Crime_Data_2019_Neighborhoods_v1.csv', index=False)

HBox(children=(FloatProgress(value=0.0, max=216102.0), HTML(value='')))




ApiError: REQUEST_DENIED (You must enable Billing on the Google Cloud Project at https://console.cloud.google.com/project/_/billing/enable Learn more at https://developers.google.com/maps/gmp-get-started)

In [10]:
# Save final dataset
df.to_csv('Crime_Data_2019_Neighborhoods_v2.csv', index=False)