# Lat Lon to Zip Code for 2020 Crime Data
2023-09-29<br>
Evangeline Chang

In [1]:
# %pip install geopy

In [1]:
import pandas as pd
import numpy as np
from typing import Optional
from geopy.geocoders import Nominatim
from datetime import datetime
import time
import os

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [3]:
crime = pd.read_csv('data/Crime_Data_head20.csv', index_col=0)
print(crime.shape)
crime

(20, 28)


Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,10304468,01/08/2020 12:00:00 AM,01/08/2020 12:00:00 AM,2230,3,Southwest,377,2,624,BATTERY - SIMPLE ASSAULT,...,AO,Adult Other,624.0,,,,1100 W 39TH PL,,34.0141,-118.2978
1,190101086,01/02/2020 12:00:00 AM,01/01/2020 12:00:00 AM,330,1,Central,163,2,624,BATTERY - SIMPLE ASSAULT,...,IC,Invest Cont,624.0,,,,700 S HILL ST,,34.0459,-118.2545
2,200110444,04/14/2020 12:00:00 AM,02/13/2020 12:00:00 AM,1200,1,Central,155,2,845,SEX OFFENDER REGISTRANT OUT OF COMPLIANCE,...,AA,Adult Arrest,845.0,,,,200 E 6TH ST,,34.0448,-118.2474
3,191501505,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,1730,15,N Hollywood,1543,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,IC,Invest Cont,745.0,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019
4,191921269,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,415,19,Mission,1998,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",...,IC,Invest Cont,740.0,,,,14400 TITUS ST,,34.2198,-118.4468
5,200100501,01/02/2020 12:00:00 AM,01/01/2020 12:00:00 AM,30,1,Central,163,1,121,"RAPE, FORCIBLE",...,IC,Invest Cont,121.0,998.0,,,700 S BROADWAY,,34.0452,-118.2534
6,200100502,01/02/2020 12:00:00 AM,01/02/2020 12:00:00 AM,1315,1,Central,161,1,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),...,IC,Invest Cont,442.0,998.0,,,700 S FIGUEROA ST,,34.0483,-118.2631
7,200100504,01/04/2020 12:00:00 AM,01/04/2020 12:00:00 AM,40,1,Central,155,2,946,OTHER MISCELLANEOUS CRIME,...,IC,Invest Cont,946.0,998.0,,,200 E 6TH ST,,34.0448,-118.2474
8,200100507,01/04/2020 12:00:00 AM,01/04/2020 12:00:00 AM,200,1,Central,101,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,IC,Invest Cont,341.0,998.0,,,700 BERNARD ST,,34.0677,-118.2398
9,201710201,06/19/2020 12:00:00 AM,05/26/2020 12:00:00 AM,1925,17,Devonshire,1708,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,AO,Adult Other,341.0,,,,11900 BALBOA BL,,34.2864,-118.5021


In [4]:
def get_year(in_file):
    for index, row in in_file.iterrows():
        date_str = row['DATE OCC']
        date_obj = datetime.strptime(date_str, "%m/%d/%Y %I:%M:%S %p")
        year = date_obj.year

        in_file.at[index, 'Year'] = int(year)

In [5]:
get_year(crime)

crime_2020 = crime.loc[crime['Year'] == 2020]

crime_2020.to_csv('data/crime_data_2020_head20.csv')

In [6]:
crime_2020_split = np.array_split(crime_2020, 10)

data_directory = 'data/Crime_split/'

if not os.path.exists(data_directory):
    os.makedirs(data_directory)

for i in range(0, 10):
    split_file_path = os.path.join(data_directory, f'crime_data_2020_split{i+1:02}_zip.csv')
    crime_2020_split[i].to_csv(split_file_path)

In [7]:
def latlong_zip(in_file, start_index=0, save_interval=1000, output_file=None):

    for index, row in in_file.iterrows():
        if index < start_index:
            continue
        
        try:
            lat = row['LAT']
            lon = row['LON']
            coordinates = f"{lat}, {lon}"
            geolocator = Nominatim(user_agent="geoapizipcodepractice")
            location = geolocator.reverse(coordinates)
            location_dict = location.raw
            zipcode = location_dict.get('address', {}).get('postcode', None)
            
            in_file.at[index, 'ZIP'] = zipcode

            if (index + 1) % 100 == 0:
                print(f"Processed {index + 1} rows. Lat is {lat}, long is {lon}, zipcode is {zipcode}. Sleeping for 1 second.")
                time.sleep(1)

            if (index + 1) % save_interval == 0:
                if output_file:
                    in_file.to_csv(output_file)
                    print(f'File saved till row {index + 1}')

        except Exception as e:
            print(str(e))

In [8]:
output_directory = 'data/Crime_zip/'

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for i in range(0, 10):
    print(f'crime_2020_split_{i+1:02}')
    output_file_path = os.path.join(output_directory, f'crime_data_2020_split{i+1:02}_zip.csv')
    latlong_zip(crime_2020_split[i], 0, output_file=output_file_path)
    crime_2020_split[i].to_csv(output_file_path)

# it should take less than 15 seconds to run 20 rows

crime_2020_split_01
crime_2020_split_02
crime_2020_split_03
crime_2020_split_04
crime_2020_split_05
crime_2020_split_06
crime_2020_split_07
crime_2020_split_08
crime_2020_split_09
crime_2020_split_10


In [9]:
combined_df = pd.concat(crime_2020_split)
combined_df.to_csv('data/crime_data_2020_zipcode_final.csv')
combined_df

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON,Year,ZIP
0,10304468,01/08/2020 12:00:00 AM,01/08/2020 12:00:00 AM,2230,3,Southwest,377,2,624,BATTERY - SIMPLE ASSAULT,...,624.0,,,,1100 W 39TH PL,,34.0141,-118.2978,2020.0,90037
1,190101086,01/02/2020 12:00:00 AM,01/01/2020 12:00:00 AM,330,1,Central,163,2,624,BATTERY - SIMPLE ASSAULT,...,624.0,,,,700 S HILL ST,,34.0459,-118.2545,2020.0,90013
2,200110444,04/14/2020 12:00:00 AM,02/13/2020 12:00:00 AM,1200,1,Central,155,2,845,SEX OFFENDER REGISTRANT OUT OF COMPLIANCE,...,845.0,,,,200 E 6TH ST,,34.0448,-118.2474,2020.0,90013
3,191501505,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,1730,15,N Hollywood,1543,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,745.0,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,2020.0,91607
4,191921269,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,415,19,Mission,1998,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",...,740.0,,,,14400 TITUS ST,,34.2198,-118.4468,2020.0,91402
5,200100501,01/02/2020 12:00:00 AM,01/01/2020 12:00:00 AM,30,1,Central,163,1,121,"RAPE, FORCIBLE",...,121.0,998.0,,,700 S BROADWAY,,34.0452,-118.2534,2020.0,90013
6,200100502,01/02/2020 12:00:00 AM,01/02/2020 12:00:00 AM,1315,1,Central,161,1,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),...,442.0,998.0,,,700 S FIGUEROA ST,,34.0483,-118.2631,2020.0,90017
7,200100504,01/04/2020 12:00:00 AM,01/04/2020 12:00:00 AM,40,1,Central,155,2,946,OTHER MISCELLANEOUS CRIME,...,946.0,998.0,,,200 E 6TH ST,,34.0448,-118.2474,2020.0,90013
8,200100507,01/04/2020 12:00:00 AM,01/04/2020 12:00:00 AM,200,1,Central,101,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,341.0,998.0,,,700 BERNARD ST,,34.0677,-118.2398,2020.0,90012
9,201710201,06/19/2020 12:00:00 AM,05/26/2020 12:00:00 AM,1925,17,Devonshire,1708,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,341.0,,,,11900 BALBOA BL,,34.2864,-118.5021,2020.0,91342
