### Import NYC Restaurant Inspection Dataset
We retrieved the latest NYC Restaurant Inspection Data from NYC Open Data

In [38]:
import pandas as pd
import numpy as np
df_rest = pd.read_csv('DOHMH_New_York_City_Restaurant_Inspection_Results.csv')
df_rest.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,...,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA
0,40395525,MCDONALD AVENUE DINER,Brooklyn,1111,MCDONALD AVENUE,11230.0,7189518475,American,05/29/2018,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Initial Inspection,40.626251,-73.976281,312.0,44.0,46201.0,3170385.0,3065040000.0,BK42
1,40982677,STARBUCKS,Manhattan,2,BROADWAY,10004.0,2123444290,Café/Coffee/Tea,07/16/2018,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Re-inspection,40.704111,-74.013186,101.0,1.0,900.0,1000029.0,1000110000.0,MN25
2,41236580,DUNKIN',Queens,10005,QUEENS BOULEVARD,11375.0,7187933690,Donuts,09/26/2019,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Initial Inspection,40.726899,-73.853544,406.0,29.0,71303.0,4050969.0,4021190000.0,QN17
3,50069385,GOLDEN BIRD CHINESE RESTAURANT,Brooklyn,1669,NOSTRAND AVE,11226.0,7186938810,Chinese,10/25/2017,Violations were cited in the following area(s).,...,12/16/2019,Pre-permit (Operational) / Initial Inspection,40.6455,-73.948992,317.0,45.0,82600.0,3110400.0,3049150000.0,BK95
4,50066345,TINA'S PLACE,Brooklyn,1002,FLUSHING AVE,11206.0,7184976890,American,09/14/2018,Violations were cited in the following area(s).,...,12/16/2019,Cycle Inspection / Initial Inspection,40.703724,-73.931235,304.0,34.0,42500.0,3071854.0,3031480000.0,BK78


In [40]:
df_rest.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398348 entries, 0 to 398347
Data columns (total 26 columns):
CAMIS                    398348 non-null int64
DBA                      397706 non-null object
BORO                     398348 non-null object
BUILDING                 398096 non-null object
STREET                   398338 non-null object
ZIPCODE                  392816 non-null float64
PHONE                    398331 non-null object
CUISINE DESCRIPTION      398348 non-null object
INSPECTION DATE          398348 non-null object
ACTION                   396709 non-null object
VIOLATION CODE           392352 non-null object
VIOLATION DESCRIPTION    389066 non-null object
CRITICAL FLAG            389066 non-null object
SCORE                    381138 non-null float64
GRADE                    201420 non-null object
GRADE DATE               199687 non-null object
RECORD DATE              398348 non-null object
INSPECTION TYPE          396709 non-null object
Latitude                

### Cleaning the dataset...

In [41]:
# format date fields as datetime
date_cols = ['RECORD DATE', 'INSPECTION DATE']

for col in date_cols:
    df_rest[col] = pd.to_datetime(df_rest[col])

In [4]:
# # drop restaurants that don't have restaurant grade ABC 
# df_rest = df_rest[df_rest['GRADE'].isin(['A','B','C'])]

In [42]:
# drop records where lat/lng are nulls
df_rest = df_rest[(df_rest['Latitude'].notnull() & df_rest['Longitude'].notnull())]

# drop records where lat/long are 0
df_rest = df_rest[(df_rest['Latitude'] != 0) | (df_rest['Longitude'] != 0)]

In [43]:
# drop restaurants where DBA (name of restaurant) is null
df_rest = df_rest[df_rest['DBA'].notnull()]

In [44]:
# check for number of unique restaurants
df_rest['CAMIS'].nunique()

26531

In [45]:
df_rest.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 397281 entries, 0 to 398347
Data columns (total 26 columns):
CAMIS                    397281 non-null int64
DBA                      397281 non-null object
BORO                     397281 non-null object
BUILDING                 397275 non-null object
STREET                   397281 non-null object
ZIPCODE                  391764 non-null float64
PHONE                    397264 non-null object
CUISINE DESCRIPTION      397281 non-null object
INSPECTION DATE          397281 non-null datetime64[ns]
ACTION                   396293 non-null object
VIOLATION CODE           391943 non-null object
VIOLATION DESCRIPTION    388659 non-null object
CRITICAL FLAG            388659 non-null object
SCORE                    380737 non-null float64
GRADE                    201174 non-null object
GRADE DATE               199442 non-null object
RECORD DATE              397281 non-null datetime64[ns]
INSPECTION TYPE          396293 non-null object
Latitude

### Let's visualize the number of restaurants by the latest inspection grade by borough

In [46]:
import altair as alt
from vega_datasets import data

In [48]:
latest_inspection = df_rest.sort_values(['CAMIS', 'INSPECTION DATE'], 
                    ascending=[True, False]).groupby('CAMIS').head(1)

grade_count = latest_inspection.groupby(['BORO', 'GRADE'])['CAMIS'].nunique().reset_index()

alt.Chart(grade_count).mark_bar(cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3).encode(
    x='GRADE',
    y='CAMIS',
    color='GRADE',
    column='BORO')

### Let's visualize the number of restaurants by grade over time by Inspection Date

In [49]:
df_dates = df_rest.copy()
df_dates.index = df_dates['INSPECTION DATE']
grades_ts = df_dates.groupby('GRADE').resample('MS')['CAMIS'].count().reset_index()

In [84]:
# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['INSPECTION DATE'], empty='none')

# Line Chart
line = alt.Chart().mark_line(interpolate='basis').encode(
    alt.X('INSPECTION DATE:T', axis=alt.Axis(title='')),
    alt.Y('CAMIS:Q', axis=alt.Axis(title='')),
    color='GRADE:N'
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart().mark_point().encode(
    x='INSPECTION DATE:T',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'CAMIS:Q', alt.value(' '))
)

# Draw a rule at the location of the selection
rules = alt.Chart().mark_rule(color='gray').encode(
    x='INSPECTION DATE:T',
).transform_filter(
    nearest
)

# Put the five layers into a chart and bind the data
inspections = alt.layer(line, selectors, points, rules, text,
                       data=grades_ts, 
                       width=600, height=400,title='Monthly Restaurant Inspections')
inspections
# inspections.save('inspections_trend.html')

#### Create a map to visualize the locations of the restaurants that received letter C inspection grade. We will use the NYC NTA boundary mapping. 

Since there is a limitation of how many records can be rendered on a viz (5,000 rows) on Altair, we decided to visualize the location (lat/lng) of restaurants with poor inspection grade (C).

In [113]:
nta_topo = 'https://raw.githubusercontent.com/grantpezeshki/NYC-topojson/master/NTA.topojson'
nta = alt.topo_feature(nta_topo, 'collection')

# NYC nta background
background = alt.Chart(nta).mark_geoshape(
    stroke='white',
    strokeWidth=2
).encode(
    color=alt.value('#eee'),
).properties(
    width=700,
    height=500
).properties(
    title='Restaurants with Inspection Grade C'
)

# C-grade restaurant locations 
latest_inspection = df_rest.sort_values(['CAMIS', 'INSPECTION DATE'], 
                    ascending=[True, False]).groupby('CAMIS').head(1)
c_grade = latest_inspection[latest_inspection['GRADE'] == 'C']

points = alt.Chart(c_grade).mark_circle(
    size=8,
    color='steelblue'
).encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',
    tooltip=['DBA', 'CUISINE DESCRIPTION', 'GRADE',
            'BUILDING','STREET','ZIPCODE', 'BORO'])

interactive_map = background + points
interactive_map
#interactive_map.save('Map of C-grade Restaurants.html')