[Data Source](https://data.kingcounty.gov/Health-Wellness/Food-Establishment-Inspection-Data/f29f-zza5/about_data)

[Rating System](https://kingcounty.gov/en/dept/dph/health-safety/food-safety/inspection-rating-system/rating-system)

[Permit types and risk categories](https://kingcounty.gov/en/dept/dph/health-safety/food-safety/inspection-rating-system/rating-system#:~:text=recorded%20meeting.-,Permit%20types%20and%20risk%20categories,-Who%27s%20included%20in)

In [17]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [18]:
## Settings
%matplotlib inline
sns.set_context("paper", font_scale=1.25)

pd.set_option('display.max_columns', 100)
pd.set_option('display.float_format', lambda x: f'{x:,.2f}')
pd.set_option('display.max_rows', 50)

# Import Data

In [43]:
data_path = './data/Food_Establishment_Inspection_Data_20240220.csv'

source = pd.read_csv(data_path, low_memory=False)
source.head()

Unnamed: 0,Name,Program Identifier,Inspection Date,Description,Address,City,Zip Code,Phone,Longitude,Latitude,Inspection Business Name,Inspection Type,Inspection Score,Inspection Result,Inspection Closed Business,Violation Type,Violation Description,Violation Points,Business_ID,Inspection_Serial_Num,Violation_Record_ID,Grade
0,#807 TUTTA BELLA,#807 TUTTA BELLA,03/02/2023,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.3,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,20.0,Unsatisfactory,False,RED,1300 - Food contact surfaces cleaned and sanit...,15,PR0089260,DAJ5DTHLV,IVBTPZO0B,1.0
1,#807 TUTTA BELLA,#807 TUTTA BELLA,03/02/2023,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.3,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,20.0,Unsatisfactory,False,RED,2120 - Proper cold holding temperatures ( 42 d...,5,PR0089260,DAJ5DTHLV,IV5GOME67,1.0
2,#807 TUTTA BELLA,#807 TUTTA BELLA,08/31/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.3,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,10.0,Unsatisfactory,False,BLUE,"3200 - Insects, rodents, animals not present; ...",5,PR0089260,DAEEWQC0L,IVQ7QYW2V,1.0
3,#807 TUTTA BELLA,#807 TUTTA BELLA,08/31/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.3,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,10.0,Unsatisfactory,False,RED,0200 - Food Worker Cards current for all food ...,5,PR0089260,DAEEWQC0L,IV0J437H6,1.0
4,#807 TUTTA BELLA,#807 TUTTA BELLA,01/13/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.3,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,0.0,Satisfactory,False,,,0,PR0089260,DAWWGK08K,,1.0


# Review Data Quality

In [44]:
source.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 261681 entries, 0 to 261680
Data columns (total 22 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Name                        261681 non-null  object 
 1   Program Identifier          261681 non-null  object 
 2   Inspection Date             261219 non-null  object 
 3   Description                 261681 non-null  object 
 4   Address                     261681 non-null  object 
 5   City                        261681 non-null  object 
 6   Zip Code                    261681 non-null  object 
 7   Phone                       190250 non-null  object 
 8   Longitude                   260689 non-null  float64
 9   Latitude                    260689 non-null  float64
 10  Inspection Business Name    261219 non-null  object 
 11  Inspection Type             261219 non-null  object 
 12  Inspection Score            261182 non-null  float64
 13  Inspection Res

## Inspect Missing Values

In [30]:
source.isna().sum()

Name                               0
Program Identifier                 0
Inspection Date                  462
Description                        0
Address                            0
City                               0
Zip Code                           0
Phone                          71431
Longitude                        992
Latitude                         992
Inspection Business Name         462
Inspection Type                  462
Inspection Score                 499
Inspection Result                462
Inspection Closed Business       462
Violation Type                115308
Violation Description         115308
Violation Points                   0
Business_ID                        0
Inspection_Serial_Num            462
Violation_Record_ID           115308
Grade                          63876
dtype: int64

In [28]:
source[source['Inspection Closed Business'].isna()]

Unnamed: 0,Name,Program Identifier,Inspection Date,Description,Address,City,Zip Code,Phone,Longitude,Latitude,Inspection Business Name,Inspection Type,Inspection Score,Inspection Result,Inspection Closed Business,Violation Type,Violation Description,Violation Points,Business_ID,Inspection_Serial_Num,Violation_Record_ID,Grade
20,1 OAK SANDWICHES,1 OAK SANDWICHES @ 1314 AUBURN WAY N AUBURN 9...,,Mobile Food Unit - Risk Category III,5805 LACEY BL SE,LACEY,98503,(253) 227-4542,,,,,,,,,,0,PR0091532,,,
175,12S TACOS MEXICAN FOOD KC1012,12S TACOS MEXICAN FOOD KC1012,,Mobile Food Unit - Risk Category III,625 S 4TH ST,RENTON,98057,(425) 545-5552,,,,,,,,,,0,PR0091263,,,
182,12TH AVE SLICE,12TH AVE SLICE,,Seating 13-50 - Risk Category III,1542 12TH AVE,SEATTLE,98122,(206) 861-9399,-122.32,47.62,,,,,,,,0,PR0081260,,,
603,3BROS VIETNAMESE CUISINE LLC,3BROS VIETNAMESE CUISINE LLC,,Seating 13-50 - Risk Category III,14611 1ST AVE S,BURIEN,98168,(206) 838-6020,-122.33,47.47,,,,,,,,0,PR0076274,,,
674,405 EXPRESS MARKET,405 EXPRESS MARKET,,Grocery Store-no seating - Risk Category I,1800 NE 44TH ST STE 100,Renton,98056,(425) 255-4259,-122.19,47.53,,,,,,,,0,PR0013001,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258958,YAKIMONO,YAKIMONO @ ROUTE 425 728 0706,,Mobile Food Unit - Risk Category III,10836 E MARGINAL WAY S,TUKWILA,98168,(425) 728-0706,-122.29,47.51,,,,,,,,0,PR0090441,,,
258980,YAN TEA,YAN TEA,,Seating 0-12 - Risk Category III,4474 University WAY NE,Seattle,98105,(206) 257-0168,,,,,,,,,,0,PR0091289,,,
259929,YUMMY ASIAN CUISINE,YUMMY ASIAN CUISINE,,Seating 51-150 - Risk Category III,22228 PACIFIC HWY S,DES MOINES,98198,(425) 239-1571,-122.30,47.40,,,,,,,,0,PR0033573,,,
260668,ZAINA FOOD DRINKS & FRIENDS,ZAINA FOOD DRINKS & FRIENDS,,Seating 13-50 - Risk Category III,800 NE 65TH ST,Seattle,98115,(206) 293-9502,-122.32,47.68,,,,,,,,0,PR0045410,,,


In [45]:
source = (source.dropna(subset = ['Inspection Score'])
         .reset_index(drop=True))
source

Unnamed: 0,Name,Program Identifier,Inspection Date,Description,Address,City,Zip Code,Phone,Longitude,Latitude,Inspection Business Name,Inspection Type,Inspection Score,Inspection Result,Inspection Closed Business,Violation Type,Violation Description,Violation Points,Business_ID,Inspection_Serial_Num,Violation_Record_ID,Grade
0,#807 TUTTA BELLA,#807 TUTTA BELLA,03/02/2023,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,20.00,Unsatisfactory,False,RED,1300 - Food contact surfaces cleaned and sanit...,15,PR0089260,DAJ5DTHLV,IVBTPZO0B,1.00
1,#807 TUTTA BELLA,#807 TUTTA BELLA,03/02/2023,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,20.00,Unsatisfactory,False,RED,2120 - Proper cold holding temperatures ( 42 d...,5,PR0089260,DAJ5DTHLV,IV5GOME67,1.00
2,#807 TUTTA BELLA,#807 TUTTA BELLA,08/31/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,10.00,Unsatisfactory,False,BLUE,"3200 - Insects, rodents, animals not present; ...",5,PR0089260,DAEEWQC0L,IVQ7QYW2V,1.00
3,#807 TUTTA BELLA,#807 TUTTA BELLA,08/31/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,10.00,Unsatisfactory,False,RED,0200 - Food Worker Cards current for all food ...,5,PR0089260,DAEEWQC0L,IV0J437H6,1.00
4,#807 TUTTA BELLA,#807 TUTTA BELLA,01/13/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,0.00,Satisfactory,False,,,0,PR0089260,DAWWGK08K,,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261177,ZYLBERSCHTEIN'S DELICATESSEN & BAKERY,ZYLBERSCHTEIN'S DELICATESSEN & BAKERY,09/06/2017,Seating 0-12 - Risk Category III,11752 15TH AVE NE,SEATTLE,98125,(415) 385-1379,-122.31,47.72,ZYLBERSCHTEIN'S DELICATESSEN & BAKERY,Consultation/Education - Field,0.00,Complete,False,,,0,PR0086991,DALIX7JOT,,2.00
261178,ZZ DESSERT,ZZ DESSERT,09/26/2023,Seating 0-12 - Risk Category III,18230 E VALLEY HWY STE 178,KENT,98032,(206) 779-0528,-122.22,47.44,ZZ DESSERT,Routine Inspection/Field Review,5.00,Satisfactory,False,BLUE,"3400 - Wiping cloths properly used, stored, sa...",5,PR0089412,DARMOYIGD,IVD3G9MHG,1.00
261179,ZZ DESSERT,ZZ DESSERT,01/23/2023,Seating 0-12 - Risk Category III,18230 E VALLEY HWY STE 178,KENT,98032,(206) 779-0528,-122.22,47.44,ZZ DESSERT,Routine Inspection/Field Review,10.00,Satisfactory,False,BLUE,"4400 - Plumbing properly sized, installed,...",5,PR0089412,DAQZ5Q076,IV3MMPWH7,1.00
261180,ZZ DESSERT,ZZ DESSERT,01/23/2023,Seating 0-12 - Risk Category III,18230 E VALLEY HWY STE 178,KENT,98032,(206) 779-0528,-122.22,47.44,ZZ DESSERT,Routine Inspection/Field Review,10.00,Satisfactory,False,BLUE,"3400 - Wiping cloths properly used, stored, sa...",5,PR0089412,DAQZ5Q076,IVGU0QCRM,1.00


In [46]:
source.isna().sum()

Name                               0
Program Identifier                 0
Inspection Date                    0
Description                        0
Address                            0
City                               0
Zip Code                           0
Phone                          71406
Longitude                        811
Latitude                         811
Inspection Business Name           0
Inspection Type                    0
Inspection Score                   0
Inspection Result                  0
Inspection Closed Business         0
Violation Type                114816
Violation Description         114816
Violation Points                   0
Business_ID                        0
Inspection_Serial_Num              0
Violation_Record_ID           114816
Grade                          63413
dtype: int64

## Possible Target: `Inspection Score`

In [48]:
source['Inspection Score'].value_counts(dropna=False).sort_index(ascending = False)

Inspection Score
180.00        18
178.00        14
167.00        19
154.00        16
153.00        34
           ...  
2.00        1020
0.00      114418
-1.00          2
-2.00          2
-10.00         1
Name: count, Length: 143, dtype: int64

In [42]:
source['Inspection Result'].value_counts(dropna=False, normalize = True, ascending = False)

Inspection Result
Unsatisfactory             0.45
Satisfactory               0.40
Complete                   0.14
Incomplete                 0.00
Not Accessible             0.00
Not Applicable             0.00
Not Ready For Inspection   0.00
Baseline Data              0.00
Not Confirmed              0.00
No Longer At Location      0.00
Confirmed                  0.00
In Compliance              0.00
Out of Business            0.00
Needs Assessment           0.00
Not In Compliance          0.00
Increased Knowledge        0.00
Not Tested                 0.00
Not Permitted              0.00
Name: proportion, dtype: float64

In [50]:
target_classes = ['Unsatisfactory', 'Satisfactory', 'Complete']

source[source['Inspection Result'].isin(target_classes)]

Unnamed: 0,Name,Program Identifier,Inspection Date,Description,Address,City,Zip Code,Phone,Longitude,Latitude,Inspection Business Name,Inspection Type,Inspection Score,Inspection Result,Inspection Closed Business,Violation Type,Violation Description,Violation Points,Business_ID,Inspection_Serial_Num,Violation_Record_ID,Grade
0,#807 TUTTA BELLA,#807 TUTTA BELLA,03/02/2023,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,20.00,Unsatisfactory,False,RED,1300 - Food contact surfaces cleaned and sanit...,15,PR0089260,DAJ5DTHLV,IVBTPZO0B,1.00
1,#807 TUTTA BELLA,#807 TUTTA BELLA,03/02/2023,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,20.00,Unsatisfactory,False,RED,2120 - Proper cold holding temperatures ( 42 d...,5,PR0089260,DAJ5DTHLV,IV5GOME67,1.00
2,#807 TUTTA BELLA,#807 TUTTA BELLA,08/31/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,10.00,Unsatisfactory,False,BLUE,"3200 - Insects, rodents, animals not present; ...",5,PR0089260,DAEEWQC0L,IVQ7QYW2V,1.00
3,#807 TUTTA BELLA,#807 TUTTA BELLA,08/31/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,10.00,Unsatisfactory,False,RED,0200 - Food Worker Cards current for all food ...,5,PR0089260,DAEEWQC0L,IV0J437H6,1.00
4,#807 TUTTA BELLA,#807 TUTTA BELLA,01/13/2022,Seating 0-12 - Risk Category III,2746 NE 45TH ST,SEATTLE,98105,(206) 722-6400,-122.30,47.66,#807 TUTTA BELLA,Routine Inspection/Field Review,0.00,Satisfactory,False,,,0,PR0089260,DAWWGK08K,,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261177,ZYLBERSCHTEIN'S DELICATESSEN & BAKERY,ZYLBERSCHTEIN'S DELICATESSEN & BAKERY,09/06/2017,Seating 0-12 - Risk Category III,11752 15TH AVE NE,SEATTLE,98125,(415) 385-1379,-122.31,47.72,ZYLBERSCHTEIN'S DELICATESSEN & BAKERY,Consultation/Education - Field,0.00,Complete,False,,,0,PR0086991,DALIX7JOT,,2.00
261178,ZZ DESSERT,ZZ DESSERT,09/26/2023,Seating 0-12 - Risk Category III,18230 E VALLEY HWY STE 178,KENT,98032,(206) 779-0528,-122.22,47.44,ZZ DESSERT,Routine Inspection/Field Review,5.00,Satisfactory,False,BLUE,"3400 - Wiping cloths properly used, stored, sa...",5,PR0089412,DARMOYIGD,IVD3G9MHG,1.00
261179,ZZ DESSERT,ZZ DESSERT,01/23/2023,Seating 0-12 - Risk Category III,18230 E VALLEY HWY STE 178,KENT,98032,(206) 779-0528,-122.22,47.44,ZZ DESSERT,Routine Inspection/Field Review,10.00,Satisfactory,False,BLUE,"4400 - Plumbing properly sized, installed,...",5,PR0089412,DAQZ5Q076,IV3MMPWH7,1.00
261180,ZZ DESSERT,ZZ DESSERT,01/23/2023,Seating 0-12 - Risk Category III,18230 E VALLEY HWY STE 178,KENT,98032,(206) 779-0528,-122.22,47.44,ZZ DESSERT,Routine Inspection/Field Review,10.00,Satisfactory,False,BLUE,"3400 - Wiping cloths properly used, stored, sa...",5,PR0089412,DAQZ5Q076,IVGU0QCRM,1.00
