In [141]:
# Import dependencies
import json
import pandas as pd
import requests
import time
import urllib.parse

In [175]:
# Define parameters for query
begin_date = '2015-05-05' # Enter the YYYY-MM-DD
end_date = '2015-05-06'
borough = 'MANHATTAN' #Choose one of the five boroughs

In [None]:
# String for query
query_string = f"boro_nm = '{borough}' AND rpt_dt >= '{begin_date}' AND rpt_dt <= '{end_date}'"

# Use urllib.parse.quote_plus to format the string for URL
safe_string = urllib.parse.quote_plus(query_string)

In [173]:
# Define variables for the While Loop. Create empty list to house the dictionaries created during the loop
records_count = 1
records = 0
offset = 0
set = 1
limit = 1000
data_list = []

In [None]:
# While loop. While len of json_results > 0, query the URL.
while records_count != 0:
    # Define the URL for the JSON.
    url = f"https://data.cityofnewyork.us/resource/qgea-i56i.json?$where={safe_string}&$limit={limit}&$offset={offset}&$order=cmplnt_num"
    # Get the JSON
    json_results = requests.get(url).json()
    # Add to the number of results to track total number
    records += len(json_results)
    # Print message to console showing number of rows extracted, show set number.
    print(f"Extracting {len(json_results)} rows of data, Set # {set}")
    
    # Extract and store requested data into lists
    for x in range(len(json_results)):

        # Parse data into variables
        complaint_no = json_results[x]['cmplnt_num']
        reported_date = json_results[x]['rpt_dt']
        borough_name = json_results[x]['boro_nm']
        ofns = json_results[x]['ofns_desc']
        pd_desc = json_results[x]['pd_desc']
        law_cat_cd = json_results[x]['law_cat_cd']
        # Some records contain blank values in the lat/lon fields; as a result, the JSON for this [x] does not have 'latitude' or 'longitude' keys. Use try/except to store None into the variables for these records
        try:
            lat = json_results[x]['latitude']
        except:
            lat = None
        try:
            lon = json_results[x]['longitude']
        except: 
            lon = None

        # Append above data as a dictionary into data list
        data_list.append({
            "Complaint_No":complaint_no,
            "Reported_Dt":reported_date,
            "Borough_Name":borough_name,
            "Ofns_Desc": ofns,
            "PD_Desc":pd_desc,
            "Law_Cat": law_cat_cd,
            "Latitude": lat,
            "Longitude": lon
        })

    # Update the offset number by 1000
    offset = limit + offset
    # Update the set count
    set += 1
    # Update the records_count variable with count of json_results. Once it hits zero, loop will terminate
    records_count = len(json_results)
    # Delay of 1 second to avoid rate limiting
    time.sleep(1)
# Once loop condition is no longer true, print total number of records    
else: print(f"Total number of records extracted: {records}")

In [170]:
# Create DataFrame
crime_df = pd.DataFrame(data_list)

In [None]:
# DataFrame info
crime_df.info()

In [None]:
# Save crime_df to CSV file -- @Dalton, change the file name before running
crime_df.to_csv('Output/NPYD_Complaints_01-01-13_12-31-18.csv', index=False)