In [1]:
!pip install sodapy

[31mdistributed 1.21.8 requires msgpack, which is not installed.[0m


In [2]:
# from https://github.com/socrata/dev.socrata.com/blob/39c6581986466edb5e7f72f5beea5ce69238f8de/snippets/pandas.py

import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# First 50000 results, returned as JSON from API 
# Connverted to Python list of dictionaries by sodapy.
# Column names converted to snake case, special chars removed
# Dates and location formatted
results = client.get("4ijn-s7e5", limit=50000)

# Convert to pandas DataFrame
inspections = pd.DataFrame.from_records(results)



In [3]:
# Download remaining food inspections (limit 50000 / call)
start = 50000
while results:
    print(start)
    results = client.get("4ijn-s7e5", limit=50000, offset=start)
    inspections = inspections.append(pd.DataFrame.from_records(results))
    start += 50000


50000
100000
150000
200000


In [4]:
# Remove trailing backslash (left over from sodapy conversion of "License #")
inspections.rename(columns={"license_": "license"}, inplace=True)

In [5]:
# Drop rows with missing data
inspections.dropna(subset=["inspection_date", "license"], inplace=True)

In [6]:
# Drop duplicates (currently none)
inspections.drop_duplicates("inspection_id", inplace=True)

In [7]:
# Drop "0" licenses
inspections = inspections[inspections.license != "0"]

In [8]:
# Filter by date > 2011-09-01?

In [9]:
# Only consider canvas inspections (not complaints or re-inspections)
inspections = inspections[inspections.inspection_type == "Canvass"]

In [10]:
# Only consider successful inspections
inspections = inspections[~inspections.results.isin(["Out of Business", "Business Not Located", "No Entry"])]

In [11]:
# Only consider restaurants and grocery stores (subject to change)
inspections = inspections[inspections.facility_type.isin(["Restaurant", "Grocery Store"])]

In [12]:
import os.path
root_path = os.path.dirname(os.getcwd())

# Save result
inspections.to_csv(os.path.join(root_path, "DATA/food_inspections.csv"), index=False)