In [1]:
#Use 
import csv
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
from scipy.stats import linregress
from pprint import pprint
from geopy.geocoders import Nominatim

output_data_file = "output/newark_abandoned_data.csv"


In [2]:
#Get the JSON from Newark
url = 'http://data.ci.newark.nj.us/datastore/odata3.0/796e2a01-d459-4574-9a48-23805fe0c3e0?$format=json'
repsonse = requests.get(url).json()
#Drill down to the value results we need from the API
repsonse2 = repsonse['value']

#Build a DataFrame
df_part1 = pd.DataFrame.from_dict(repsonse2)

#Clean up column headers (keys)
df_part1 = df_part1.rename(columns={"Vital Address":"Address"})
df_part1 = df_part1.rename(columns={"_id":"ID"})

#Add missing city and state keys
df_part1['City'] = "Newark"
df_part1['State'] = "NJ"

#On screen
df_part1

Unnamed: 0,Address,Owner Name,Most Recent Inspection,Lot,ID,Open,Block,City,State
0,1098-1124 MCCARTER HWY,"1100 MCCARTER LLC,",2019-08-30T00:00:00,14.0,1,Vacant Only,5.0,Newark,NJ
1,589 BROAD ST,"HG JETSON URBAN RENE,",2020-01-23T00:00:00,19.0,2,Vacant Only,18.0,Newark,NJ
2,29 BURNET ST,"FOREST HILLS HOLDINGS, LLC",2020-01-24T00:00:00,24.0,3,Vacant Only,43.0,Newark,NJ
3,24 EAGLES ST,"CAMPBELL, FREDERICK",2020-02-10T00:00:00,55.0,4,Vacant Only,45.0,Newark,NJ
4,7 WARREN ST,"WEST, MICHAEL",2019-03-29T00:00:00,29.0,5,Vacant Only,64.0,Newark,NJ
...,...,...,...,...,...,...,...,...,...
495,885-887 S 17TH,"MERSABA PROPERTIES,LLC",2019-08-06T00:00:00,25.0,496,Vacant Only,3013.0,Newark,NJ
496,921 S 17TH ST,CITY OF NEWARK,2019-08-08T00:00:00,43.0,497,Vacant Only,3013.0,Newark,NJ
497,863 S 17TH ST,"MONTALVO, ALFREDO",2019-08-08T00:00:00,14.0,498,Vacant Only,3013.0,Newark,NJ
498,918 S 18TH ST,CITY OF NEWARK,2019-08-12T00:00:00,67.0,499,Vacant Only,3013.0,Newark,NJ


In [3]:
#Now we're going to get the zip codes, latitudes, and longitudes.

#define variables
indices = []
drop_indices = []
lat_list = []
lon_list = []
zip_code = []

#Grab indicies for loopin'
indices = list(df_part1.index.values)

# initialize Nominatim API (geopy.geocoders)
geolocator = Nominatim(user_agent="geoapiExercises")

#Loop it
for index in indices:
    try:
        #place is the inputs for geolocator, namely address, city, state by index number
        place = f"{df_part1['Address'][index]} {df_part1['City'][index]} {df_part1['State'][index]}"
        #Grab location data results from geolocator
        location = geolocator.geocode(place)
        #geolocator results to a variable for next steps
        data = location.raw
        #Split the 'display_name' key's results by comma
        loc_data = data['display_name'].split()
        #Get the zip code we want by split position namely, fourth position from the right
        #and append the zip code the zip_code list
        zip_code.append(loc_data[-3].replace(",",""))
        #append latitude and longitude to the appropriate list
        lat_list.append(location.latitude)
        lon_list.append(location.longitude)
    except:
        #populate "NULL" for loop failures
        zip_code.append("NULL")
        lat_list.append("NULL")
        lon_list.append("NULL")
        #capture index of loop failures in order to drop rows later on
        drop_indices.append(index)

#Build the dictionary and related DataFrame        
new_dict = {"Zip Code": zip_code,"Latitude":lat_list,"Longitude":lon_list}
new_df = pd.DataFrame(new_dict)


In [4]:
#Join those suckers
df_part2 = df_part1.join(new_df, how='left')

#Drop rows with "NULL" results
newark_abandoned_df = df_part2.drop(drop_indices, inplace=False)

#Reorder columns sensibly
col = ['ID','Lot','Block','Open','Owner Name','Address','City', 'State','Zip Code','Latitude','Longitude','Most Recent Inspection']
newark_abandoned_df = newark_abandoned_df[col]

#On sceen
newark_abandoned_df

Unnamed: 0,ID,Lot,Block,Open,Owner Name,Address,City,State,Zip Code,Latitude,Longitude,Most Recent Inspection
1,2,19.0,18.0,Vacant Only,"HG JETSON URBAN RENE,",589 BROAD ST,Newark,NJ,07102,40.7416,-74.1689,2020-01-23T00:00:00
2,3,24.0,43.0,Vacant Only,"FOREST HILLS HOLDINGS, LLC",29 BURNET ST,Newark,NJ,07102,40.7463,-74.1737,2020-01-24T00:00:00
3,4,55.0,45.0,Vacant Only,"CAMPBELL, FREDERICK",24 EAGLES ST,Newark,NJ,07102,40.746,-74.1725,2020-02-10T00:00:00
4,5,29.0,64.0,Vacant Only,"WEST, MICHAEL",7 WARREN ST,Newark,NJ,07102,40.7391,-74.1724,2019-03-29T00:00:00
5,6,19.0,98.0,Vacant Only,MADISON DEVELOPMENT LLC,789 SOUTH ORANGE AVE,Newark,NJ,07106,40.745,-74.2199,2019-10-18T00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...
494,495,15.0,3013.0,Vacant Only,GLOVER LAURENCE,865 S 17TH ST,Newark,NJ,07108,40.7258,-74.2132,2019-08-08T00:00:00
496,497,43.0,3013.0,Vacant Only,CITY OF NEWARK,921 S 17TH ST,Newark,NJ,07108,40.7248,-74.2136,2019-08-08T00:00:00
497,498,14.0,3013.0,Vacant Only,"MONTALVO, ALFREDO",863 S 17TH ST,Newark,NJ,07108,40.7259,-74.2132,2019-08-08T00:00:00
498,499,67.0,3013.0,Vacant Only,CITY OF NEWARK,918 S 18TH ST,Newark,NJ,07108,40.7244,-74.2147,2019-08-12T00:00:00


In [5]:
#Export to CSV
newark_abandoned_df.to_csv(output_data_file, index=False, header=True)