In [3]:
"""
Some code to look at sanitation violations in restaurant inspection data in Florida.

These are the high-priority violations involving sanitation concerns:

09 Bare hand contact with RTE food; Alternative Operating Procedure (AOP)
11 Employee health knowledge; ill/symptomatic employee present
12a Hands clean and washed properly; use of hand antiseptic if use of AOP
16 Dishwashing facilities; chemical test kit(s); gauges
22 Food-contact surfaces clean and sanitized

These are the columns in the csv files from the state that correspond to those:

30	AE	Bare hand contact with RTE food; Alternative Operating Procedure (AOP)
32	AG	Employee health knowledge; ill/symptomatic employee present
33	AH	Hands washed and clean, good hygienic practices, eating / drinking /smoking
37	AL	Dishwashing facilities; chemical test kit(s); gauges
43	AR	Food-contact surfaces clean and sanitized

"""
# Last updated 3/11/2020 by doug.ray@starbanner.com

# built-in libraries
import csv
import datetime
import os.path
import re
import sqlite3
from sqlite3 import Error
import sys

# installed with pip
import pandas as pd
import numpy as np

# Create dataframe from files
colnames = [
    "county", "licnum", "sitename", "streetaddy", "cityaddy", "zip",
    "inspnum", "visitnum", "insptype", "inspdispos", "inspdate", "totalvio", "highvio",
    "intermedvio", "basicvio", "vio9", "vio11","vio12", "vio16", "vio22","licid", "visitid"
    ]

colnums = [
    2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 17, 18, 19, 20, 30, 32, 33, 37, 43, 80, 81
    ]

filepath_1 = 'fy2019/1fdinspi031120.csv'
filepath_2 = 'fy2019/2fdinspi031120.csv'
filepath_3 = 'fy2019/3fdinspi031120.csv'
filepath_4 = 'fy2019/4fdinspi031120.csv'
filepath_5 = 'fy2019/5fdinspi031120.csv'
filepath_6 = 'fy2019/6fdinspi031120.csv'
filepath_7 = 'fy2019/7fdinspi031120.csv'

df_1 = pd.read_csv(
    filepath_1,
    names=colnames,
    usecols=colnums,
    encoding="ISO-8859-1"
    )

df_2 = pd.read_csv(
    filepath_2,
    names=colnames,
    usecols=colnums,
    encoding="ISO-8859-1"
    )

df_3 = pd.read_csv(
    filepath_3,
    names=colnames,
    usecols=colnums,
    encoding="ISO-8859-1"
    )

df_4 = pd.read_csv(
    filepath_4,
    names=colnames,
    usecols=colnums,
    encoding="ISO-8859-1"
    )

df_5 = pd.read_csv(
    filepath_5,
    names=colnames,
    usecols=colnums,
    encoding="ISO-8859-1"
    )

df_6 = pd.read_csv(
    filepath_6,
    names=colnames,
    usecols=colnums,
    encoding="ISO-8859-1"
    )

df_7 = pd.read_csv(
    filepath_7,
    names=colnames,
    usecols=colnums,
    encoding="ISO-8859-1"
    )

df_all = pd.concat(
    [df_1, df_2, df_3, df_4, df_5, df_6, df_7], ignore_index=True
    )


In [23]:
"""
These are the high-priority violations involving sanitation concerns:

09 Bare hand contact with RTE food; Alternative Operating Procedure (AOP)
11 Employee health knowledge; ill/symptomatic employee present
12a Hands clean and washed properly; use of hand antiseptic if use of AOP
16 Dishwashing facilities; chemical test kit(s); gauges
22 Food-contact surfaces clean and sanitized

These are the columns in the csv files from the state that correspond to those:

30	AE	Bare hand contact with RTE food; Alternative Operating Procedure (AOP)
32	AG	Employee health knowledge; ill/symptomatic employee present
33	AH	Hands washed and clean, good hygienic practices, eating / drinking /smoking
37	AL	Dishwashing facilities; chemical test kit(s); gauges
43	AR	Food-contact surfaces clean and sanitized



# Create dataframe for just those with violation 9: bare hand contact with ready-to-eat food
#vio9 = df_all.vio9 > 0
df9 = df_all[vio9]

# Create dataframe for just those with violation 11: ill/symptomatic employee present
#vio11 = df_all.vio11 > 0
df11 = df_all[vio11]

# Create dataframe for just those with violation 12: Hands clean and washed properly
#vio12 = df_all.vio12 > 0
df12 = df_all[vio12]

# Create dataframe for just those with violation 16: Dishwashing facilities; chemical test kit(s); gauges
#vio16 = df_all.vio16 > 0
df16 = df_all[vio16]

# Create dataframe for just those with violation 22: Food-contact surfaces clean and sanitized
#vio22 = df_all.vio22 > 0
df22 = df_all[vio22]

"""


In [27]:
"""
Connect to database with details to select 09-01-4 violations:
High Priority - Employee touching ready-to-eat food with their bare hands - 
food was not being heated as a sole ingredient to 145 degrees F or immediately 
added to other ingredients to be cooked/heated to the minimum required temperature 
to allow bare hand contact.  Establishment has no approved Alternative Operating Procedure.
"""

cnx = sqlite3.connect('datafiles/rinspect031120.sqlite')
df_914 = pd.read_sql_query("SELECT * FROM violations WHERE violation LIKE '09-01-4'", cnx)
df_914 = pd.merge(df_914, df_all, on="visitid") # Merge that with full dataframe
df_914 = df_914.drop(['id', 'details_id'], axis=1)

In [28]:
"""
Connect to database with details to select 09-02-4 violations:
High Priority - Bare hand contact with ready-to-eat food while 
the establishment is under a foodborne illness investigation.
"""

cnx = sqlite3.connect('datafiles/rinspect031120.sqlite')
df_924 = pd.read_sql_query("SELECT * FROM violations WHERE violation LIKE '09-02-4'", cnx)
df_924 = pd.merge(df_924, df_all, on="visitid") # Merge that with full dataframe
df_924 = df_924.drop(['id', 'details_id'], axis=1)

In [29]:
"""
Connect to database with details to select 09-03-4 violations:
High Priority - Employee touching ready-to-eat food with their bare hands. 
Establishment serves a highly susceptible population. 
"""

cnx = sqlite3.connect('datafiles/rinspect031120.sqlite')
df_934 = pd.read_sql_query("SELECT * FROM violations WHERE violation LIKE '09-03-4'", cnx)
df_934 = pd.merge(df_934, df_all, on="visitid") # Merge that with full dataframe
df_934 = df_934.drop(['id', 'details_id'], axis=1)


In [None]:
"""
Connect to database with details to select 09-19-4 violations:
High Priority - Employee touching ready-to-eat food with their bare hands 
- ready-to-eat food used for multiple menu items, some of which will not 
be cooked/heated to the minimum required temperature to allow bare hand contact.  
Establishment has no approved Alternative Operating Procedure.
"""

cnx = sqlite3.connect('datafiles/rinspect031120.sqlite')
df_9194 = pd.read_sql_query("SELECT * FROM violations WHERE violation LIKE '09-19-4'", cnx)
df_9194 = pd.merge(df_9194, df_all, on="visitid") # Merge that with full dataframe
df_9194 = df_9194.drop(['id', 'details_id'], axis=1)

In [None]:
# write to csv
df35A.to_csv('analysis/vio35.csv')

In [None]:
# find repeat offenders
