In [1]:
!pip install sodapy

[33mYou are using pip version 9.0.1, however version 10.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [11]:
# from https://github.com/socrata/dev.socrata.com/blob/39c6581986466edb5e7f72f5beea5ce69238f8de/snippets/pandas.py

import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# First 50000 results, returned as JSON from API 
# Connverted to Python list of dictionaries by sodapy.
# Column names converted to snake case, special chars removed
# Dates and location formatted
results = client.get("4ijn-s7e5", limit=50000)

# Convert to pandas DataFrame
inspections = pd.DataFrame.from_records(results)



In [12]:
# Download remaining food inspections (limit 50000 / call)
start = 50000
while results:
    print(start)
    results = client.get("4ijn-s7e5", limit=50000, offset=start)
    inspections = inspections.append(pd.DataFrame.from_records(results))
    start += 50000


50000
100000
150000
200000


In [14]:
# Remove trailing backslash (left over from sodapy conversion of "License #")
inspections.rename(columns={"license_": "license"}, inplace=True)

In [19]:
inspections.license.value_counts()

0          467
1354323    198
14616      172
1574001     80
1974745     59
1884255     46
1490035     46
20481       44
1596210     44
1142451     43
2083833     40
1476553     40
1302136     40
60184       39
1094        38
2013962     38
1000572     38
2108657     37
9154        36
55054       36
1909713     33
4190        33
1448266     33
25152       33
1042888     33
1879470     32
75883       32
80690       32
22811       32
1273274     32
          ... 
1817191      1
1302057      1
1515725      1
2060887      1
77267        1
2493097      1
1300852      1
1718137      1
2003079      1
9267         1
2124766      1
2137676      1
1578002      1
39958        1
2162364      1
13101        1
1620928      1
1045971      1
2350420      1
68134        1
2003057      1
84419        1
2252644      1
2590110      1
1021807      1
1045997      1
1474033      1
8026         1
55665        1
1696651      1
Name: license, Length: 34461, dtype: int64

In [16]:
# Drop rows with missing data
inspections.dropna(subset=["inspection_date", "license"], inplace=True)

In [25]:
# Drop duplicates (currently none)
inspections.drop_duplicates("inspection_id", inplace=True)

In [26]:
# Drop "0" licenses
inspections = inspections[inspections.license != "0"]

In [None]:
# Filter by date > 2011-09-01?

In [30]:
# Only consider canvas inspections (not complaints or re-inspections)
inspections = inspections[inspections.inspection_type == "Canvass"]

In [35]:
# Only consider successful inspections (added a f)
inspections = inspections[~inspections.results.isin(["Out of Business", "Business Not Located", "No Entry"])]

Unnamed: 0,address,aka_name,city,dba_name,facility_type,inspection_date,inspection_id,inspection_type,latitude,license,location,longitude,results,risk,state,violations,zip
11,10354 S HALSTED ST,SUBWAY,CHICAGO,SUBWAY,Restaurant,2018-06-01T00:00:00,2176589,Canvass,41.705296492547895,1719251,"{'latitude': '41.705296492547895', 'needs_reco...",-87.64293095094614,Fail,Risk 1 (High),IL,18. NO EVIDENCE OF RODENT OR INSECT OUTER OPEN...,60628
15,11601 W TOUHY AVE,CHILI'S TOO (T2 F4),CHICAGO,"HOST INTERNATIONAL INC, CHILIS T-2",Restaurant,2018-05-31T00:00:00,2176568,Canvass,42.008536400868735,34192,"{'latitude': '42.008536400868735', 'needs_reco...",-87.91442843927047,Fail,Risk 1 (High),IL,2. FACILITIES TO MAINTAIN PROPER TEMPERATURE -...,60666
17,4835 N WESTERN AVE,ELIZABETH RESTAURANT,CHICAGO,ELIZABETH RESTAURANT,Restaurant,2018-05-31T00:00:00,2176564,Canvass,41.969693264065235,2049249,"{'latitude': '41.969693264065235', 'needs_reco...",-87.68879318966334,Pass w/ Conditions,Risk 1 (High),IL,29. PREVIOUS MINOR VIOLATION(S) CORRECTED 7-42...,60625
18,1449 E 57TH ST,SUBWAY,CHICAGO,SUBWAY,Restaurant,2018-05-31T00:00:00,2176561,Canvass,41.791404735052566,1447435,"{'latitude': '41.791404735052566', 'needs_reco...",-87.58977783510586,Fail,Risk 1 (High),IL,33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSI...,60637
29,3922 N ELSTON AVE,THAI THAI RESTAURANT,CHICAGO,"PERFECT TJA, INC.",Restaurant,2018-05-30T00:00:00,2176518,Canvass,41.95230870594275,2196203,"{'latitude': '41.95230870594275', 'needs_recod...",-87.71734716272289,Pass w/ Conditions,Risk 1 (High),IL,"30. FOOD IN ORIGINAL CONTAINER, PROPERLY LABEL...",60618
33,11601 W TOUHY AVE,ARGO TEA (T3-L Near Lotto and TSA),CHICAGO,ARGO TEA,Restaurant,2018-05-30T00:00:00,2176507,Canvass,42.008536400868735,2565062,"{'latitude': '42.008536400868735', 'needs_reco...",-87.91442843927047,Pass w/ Conditions,Risk 1 (High),IL,33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSI...,60666
35,342 W 75TH ST,A & J FOOD MARKET INCORPORATED,CHICAGO,A & J FOOD MARKET INCORPORATED,Grocery Store,2018-05-30T00:00:00,2176510,Canvass,41.7581997027466,69143,"{'latitude': '41.7581997027466', 'needs_recodi...",-87.63375706271778,Fail,Risk 1 (High),IL,19. OUTSIDE GARBAGE WASTE GREASE AND STORAGE A...,60621
42,3877 N ELSTON AVE,LEONA'S RESTAURANT,CHICAGO,LEONA'S RESTAURANT,Restaurant,2018-05-30T00:00:00,2176498,Canvass,41.951897553868875,2313672,"{'latitude': '41.951897553868875', 'needs_reco...",-87.71631400456782,Pass,Risk 1 (High),IL,36. LIGHTING: REQUIRED MINIMUM FOOT-CANDLES OF...,60618
46,7352 S HALSTED ST,GEORGIA'S FOOD DEPOT,CHICAGO,GEORGIA'S FOOD DEPOT,Restaurant,2018-05-30T00:00:00,2176491,Canvass,41.76000277235905,2419039,"{'latitude': '41.76000277235905', 'needs_recod...",-87.64442750483087,Fail,Risk 1 (High),IL,40. REFRIGERATION AND METAL STEM THERMOMETERS ...,60621
48,11601 W TOUHY AVE,NUTS ON CLARK (T5 ARRIVAL),CHICAGO,"NUTS ON CLARK T5 O'HARE INTERNATIONAL AIRPORT,...",Restaurant,2018-05-30T00:00:00,2176484,Canvass,42.008536400868735,2517018,"{'latitude': '42.008536400868735', 'needs_reco...",-87.91442843927047,Pass w/ Conditions,Risk 2 (Medium),IL,28. * INSPECTION REPORT SUMMARY DISPLAYED AND ...,60666


In [37]:
# Only consider restaurants and grocery stores (subject to change)
inspections[inspections.inspection_type.isin(["Restaurant", "Grocery Store"])]

In [38]:
inspections

Unnamed: 0,address,aka_name,city,dba_name,facility_type,inspection_date,inspection_id,inspection_type,latitude,license,location,longitude,results,risk,state,violations,zip


In [9]:
import os.path
root_path = os.path.dirname(os.getcwd())

# Save result
inspections.to_csv(os.path.join(root_path, "DATA/food_inspections.csv"), index=False)