In [1]:
# from https://github.com/socrata/dev.socrata.com/blob/39c6581986466edb5e7f72f5beea5ce69238f8de/snippets/pandas.py

import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# First 50000 results, returned as JSON from API 
# Connverted to Python list of dictionaries by sodapy.
# Column names converted to snake case, special chars removed
# Dates and location formatted
results = client.get("4ijn-s7e5", limit=50000)

# Convert to pandas DataFrame
inspections = pd.DataFrame.from_records(results)



In [2]:
# Download remaining food inspections (limit 50000 / call)
start = 50000
while results:
    print(start)
    results = client.get("4ijn-s7e5", limit=50000, offset=start)
    inspections = inspections.append(pd.DataFrame.from_records(results))
    start += 50000


50000
100000
150000
200000


In [3]:
inspections.inspection_type.value_counts()

Canvass                                      90500
License                                      22175
Canvass Re-Inspection                        18024
Complaint                                    15692
License Re-Inspection                         7930
Complaint Re-Inspection                       6507
Short Form Complaint                          6167
Suspected Food Poisoning                       747
Consultation                                   669
License-Task Force                             605
Tag Removal                                    603
Out of Business                                284
Task Force Liquor 1475                         254
Recent Inspection                              244
Suspected Food Poisoning Re-inspection         171
Complaint-Fire                                 161
Short Form Fire-Complaint                      113
Special Events (Festivals)                      63
No Entry                                        60
Complaint-Fire Re-inspection   

In [3]:
# Remove trailing backslash (left over from sodapy conversion of "License #")
inspections.rename(columns={"license_": "license"}, inplace=True)

In [4]:
# Drop rows with missing data
inspections.dropna(subset=["inspection_date", "license"], inplace=True)

In [5]:
# Drop duplicates (currently none)
inspections.drop_duplicates("inspection_id", inplace=True)

In [6]:
# Drop "0" licenses
inspections = inspections[inspections.license != "0"]

In [7]:
# Only consider canvas inspections (not complaints or re-inspections)
inspections = inspections[inspections.inspection_type == "Canvass"]

In [8]:
# Only consider successful inspections
inspections = inspections[~inspections.results.isin(["Out of Business", "Business Not Located", "No Entry"])]

In [9]:
# Convert latitude & longitude to floats
inspections.latitude = inspections.latitude.astype(float)
inspections.longitude = inspections.longitude.astype(float)

In [10]:
import os.path
root_path = os.path.dirname(os.getcwd())

# Save result
inspections.to_csv(os.path.join(root_path, "DATA/food_inspections.csv"), index=False)