### Import NYC Restaurant Inspection Dataset
We retrieved the latest NYC Restaurant Inspection Data from NYC Open Data

In [1]:
import pandas as pd
import numpy as np
df_rest = pd.read_csv('DOHMH_New_York_City_Restaurant_Inspection_Results.csv')
df_rest.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,...,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA
0,40395525,MCDONALD AVENUE DINER,Brooklyn,1111,MCDONALD AVENUE,11230.0,7189518475,American,05/29/2018,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Initial Inspection,40.626251,-73.976281,312.0,44.0,46201.0,3170385.0,3065040000.0,BK42
1,40982677,STARBUCKS,Manhattan,2,BROADWAY,10004.0,2123444290,Café/Coffee/Tea,07/16/2018,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Re-inspection,40.704111,-74.013186,101.0,1.0,900.0,1000029.0,1000110000.0,MN25
2,41236580,DUNKIN',Queens,10005,QUEENS BOULEVARD,11375.0,7187933690,Donuts,09/26/2019,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Initial Inspection,40.726899,-73.853544,406.0,29.0,71303.0,4050969.0,4021190000.0,QN17
3,50069385,GOLDEN BIRD CHINESE RESTAURANT,Brooklyn,1669,NOSTRAND AVE,11226.0,7186938810,Chinese,10/25/2017,Violations were cited in the following area(s).,...,12/16/2019,Pre-permit (Operational) / Initial Inspection,40.6455,-73.948992,317.0,45.0,82600.0,3110400.0,3049150000.0,BK95
4,50066345,TINA'S PLACE,Brooklyn,1002,FLUSHING AVE,11206.0,7184976890,American,09/14/2018,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Initial Inspection,40.703724,-73.931235,304.0,34.0,42500.0,3071854.0,3031480000.0,BK78


In [2]:
df_rest.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398348 entries, 0 to 398347
Data columns (total 26 columns):
CAMIS                    398348 non-null int64
DBA                      397706 non-null object
BORO                     398348 non-null object
BUILDING                 398096 non-null object
STREET                   398338 non-null object
ZIPCODE                  392816 non-null float64
PHONE                    398331 non-null object
CUISINE DESCRIPTION      398348 non-null object
INSPECTION DATE          398348 non-null object
ACTION                   396709 non-null object
VIOLATION CODE           392352 non-null object
VIOLATION DESCRIPTION    389066 non-null object
CRITICAL FLAG            389066 non-null object
SCORE                    381138 non-null float64
GRADE                    201420 non-null object
GRADE DATE               199687 non-null object
RECORD DATE              398348 non-null object
INSPECTION TYPE          396709 non-null object
Latitude                

### Let's clean the deduped dataset a bit more...

In [3]:
# format date fields as datetime
date_cols = ['RECORD DATE', 'INSPECTION DATE']

for col in date_cols:
    df_rest[col] = pd.to_datetime(df_rest[col])

In [4]:
# drop restaurants that don't have restaurant grade ABC 
df_rest = df_rest[df_rest['GRADE'].isin(['A','B','C'])]

In [5]:
# drop records where lat/lng are nulls
df_rest = df_rest[(df_rest['Latitude'].notnull() & df_rest['Longitude'].notnull())]

In [6]:
# drop restaurants where DBA (name of restaurant) is null
df_rest = df_rest[df_rest['DBA'].notnull()]

In [9]:
# check for number of unique restaurants
df_rest['CAMIS'].nunique()

24997

In [10]:
df_rest.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 192568 entries, 1 to 398345
Data columns (total 26 columns):
CAMIS                    192568 non-null int64
DBA                      192568 non-null object
BORO                     192568 non-null object
BUILDING                 192563 non-null object
STREET                   192568 non-null object
ZIPCODE                  189694 non-null float64
PHONE                    192558 non-null object
CUISINE DESCRIPTION      192568 non-null object
INSPECTION DATE          192568 non-null datetime64[ns]
ACTION                   192568 non-null object
VIOLATION CODE           191825 non-null object
VIOLATION DESCRIPTION    191033 non-null object
CRITICAL FLAG            191033 non-null object
SCORE                    192568 non-null float64
GRADE                    192568 non-null object
GRADE DATE               192568 non-null object
RECORD DATE              192568 non-null datetime64[ns]
INSPECTION TYPE          192568 non-null object
Latitude

### Let's visualize the number of restaurants by the Inspection Grade and Borough using Altair

In [11]:
import altair as alt
from vega_datasets import data

In [12]:
df_rest.groupby(['BORO', 'GRADE'])['CAMIS'].nunique().reset_index()

Unnamed: 0,BORO,GRADE,CAMIS
0,Bronx,A,2212
1,Bronx,B,564
2,Bronx,C,197
3,Brooklyn,A,6110
4,Brooklyn,B,1310
5,Brooklyn,C,519
6,Manhattan,A,9781
7,Manhattan,B,1889
8,Manhattan,C,736
9,Queens,A,5569


In [13]:
grade_count = df_rest.groupby(['BORO', 'GRADE'])['CAMIS'].nunique().reset_index()

alt.Chart(grade_count).mark_bar(cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3).encode(
    x='GRADE',
    y='CAMIS',
    color='GRADE',
    column='BORO')

### Let's visualize the number of restaurants by grade over time by Inspection Date

In [17]:
df_dates = df_rest.copy()
df_dates.index = df_dates['INSPECTION DATE']
grades_ts = df_dates.groupby('GRADE').resample('MS')['CAMIS'].count().reset_index()

In [18]:
grades_ts

Unnamed: 0,GRADE,INSPECTION DATE,CAMIS
0,A,2013-06-01,1
1,A,2013-07-01,0
2,A,2013-08-01,0
3,A,2013-09-01,0
4,A,2013-10-01,0
...,...,...,...
188,C,2019-07-01,541
189,C,2019-08-01,490
190,C,2019-09-01,302
191,C,2019-10-01,293


In [19]:
highlight = alt.selection(type='single', on='mouseover',
                          fields=['GRADE'], nearest=True)

base = alt.Chart(grades_ts).encode(
    x='INSPECTION DATE:T',
    y='CAMIS:Q',
    color='GRADE:N'
)

points = base.mark_circle().encode(
    opacity=alt.value(0)
).add_selection(
    highlight
).properties(
    width=600
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
)

points + lines

### Create a map to visualize the restaurants with NYC neighborhood boundaries (NTA)

In [None]:
import altair as alt
from vega_datasets import data

nyc = alt.topo_feature('https://raw.githubusercontent.com/grantpezeshki/NYC-topojson/master/NTA.topojson', 
                       feature='objects')


In [None]:
alt.topo_feature?

In [None]:
airports = data.airports.url