# Import Counter and defaultdict module.

In [None]:
from collections import Counter, defaultdict

# Import csv module.

In [None]:
import csv

# Open data file, convert data rows into dictionary type, and then put all of them into a list named food.

In [None]:
food = list(csv.DictReader(open('Food_Inspections.csv')))

# Check the length of food to see how much data in this list. We should do this to avoid rendering a huge amount of data in one go.

In [None]:
len(food)

# Take a quick look at the first item in the list to see the structure of data, such as keys (columns) and values (rows).

In [None]:
food[0]

# Take a look at one more item in the list.

In [None]:
food[1]

# Get a set of all possible unique outcomes of Results.

In [None]:
{row['Results'] for row in food}

# Get all rows in food if Results = 'Fail', and then put all of them into a list named fail.

In [None]:
fail = [ row for row in food if row['Results'] == 'Fail']

# Check the length of fail.

In [None]:
len(fail)

# Take a quick look at the first item in the list.

In [None]:
fail[0]

# Now, we may come up with a question: what would be the worst place to eat in Chicago?

# Firstly, we count the rows in fail by DBA Name (business name).

In [None]:
worst = Counter(row['DBA Name'] for row in fail)

# Get the top 5 of the worst places to eat.

In [None]:
worst.most_common(5)

# Get the top 15 of the worst places to eat. There are some typing mistakes in DBA Name that need cleaning.

In [None]:
worst.most_common(15)

# Create a new fail list of rows with DBA Name having single quote removed and being in upper case.

In [None]:
fail = [ { **row, 'DBA Name': row['DBA Name'].replace("'",'').upper()}
             for row in fail]

# Count rows in fail by DBA Name again.

In [None]:
worst = Counter(row['DBA Name'] for row in fail)

# Get the top 5 of the worst places to eat to see whether DBA Name are corrected or not.

In [None]:
worst.most_common(5)

# Get the top 15 of the worst places to eat to see whether DBA Name are corrected or not.

In [None]:
worst.most_common(20)

# We got the worst places to eat. However, there is a lot of different Subway or McDonald. So, another kind of question we might come up with now is the worst street addresses to eat?

# We could count the rows in fail by Address.

In [None]:
bad = Counter(row['Address'] for row in fail)

# Get the top 5 of the worst addresses to eat.

In [None]:
bad.most_common(5)

# We also want to see what are the worst addresses to eat by year?

# Create a default dictionary of counter by_year.

In [None]:
by_year = defaultdict(Counter)

# Count the rows in fail by Address in each year.

In [None]:
for row in fail:
    by_year[row['Inspection Date'][-4:]][row['Address']] +=1

# Get the top 5 of the worst street Addresses in some years.

In [None]:
by_year['2015'].most_common(5)

In [None]:
by_year['2014'].most_common(5)

In [None]:
by_year['2013'].most_common(5)

In [None]:
by_year['2016'].most_common(5)

In [None]:
by_year['2021'].most_common(5)

In [None]:
by_year['2020'].most_common(5)

In [None]:
by_year['2019'].most_common(5)

# We got that '11601 W TOUHY AVE' seems to be the worst street address. 
# Now, we get the top 5 of worst addresses to eat again.

In [None]:
bad.most_common(5)

# Call the result of the last expression. Aim to get the worst address.

In [None]:
_[0][0]

# Get the id of the result of the last expression. Aim to get the id of the worst address.

In [None]:
id(_)

# Create a list named ohare to store all rows in fail with Address start with '11601 W TOUHY'.

In [None]:
ohare = [row for row in fail if row['Address'].startswith('11601 W TOUHY')]

# Check the length of ohare.

In [None]:
len(ohare)

# Get a set of all different unique addresses in ohare. 

In [None]:
{ row['Address'] for row in ohare }

# Get a set of all unique businesses in ohare.

In [None]:
{ row['DBA Name'] for row in ohare}

# Take a look at the first item in ohare list. Aim to check DBA Name (Business Name) and AKA Name (Public name)

In [None]:
ohare[0]

# Count the rows in ohare by AKA Name to identify the worst location in ohare to eat.

In [None]:
c = Counter(row['AKA Name'] for row in ohare)

# Get the top 10 of the worst location in ohare.

In [None]:
c.most_common(10)

# Take a look at the first item of ohare.

In [None]:
ohare[0]

# Next, we may want to do some more exploratory.

# Create inspections default dictionary.

In [None]:
inspections = defaultdict(list)

# Add rows in ohare into inspections dictionary as values, and keys are License #.

In [None]:
for row in ohare:
    inspections[row['License #']].append(row)

# Get data from inspections dictionary by using License # = 2428080.

In [None]:
inspections['2428080']

# Get all keys in inspections dictionary.

In [None]:
inspections.keys()

# Try to get data from inspections dictionary by using another License # = 34192.

In [None]:
inspections['34192']

# Get all Inspection Dates from the date set of License # = 34192.

In [None]:
[row ['Inspection Date'] for row in inspections['34192']]

# The question maybe now is What is the most common way that a place at ohare fails in an inspection. 
# Firstly, we take a look at an example of the Violations data.

In [None]:
ohare[175]

# List out all of the different sections in Violations by decomposing the text.

In [None]:
ohare[175]['Violations'].split('|')

# Assign the result of the last expression to violations variable.

In [None]:
violations = _

In [None]:
violations

# Strip out all the text after '- Comments' from Violation sections.

In [None]:
[v[:v.find('- Comments:')] for v in violations]

# Strip out the comment part and white space from Violation sections.

In [None]:
[v[:v.find('- Comments:')].strip() for v in violations]

# Next, we could get all Violation sections splitted by '|' in ohare.

In [None]:
all_violations = [row['Violations'].split('|') for row in ohare]

# Then, we count all rows in ohare by Violations with comments stripped out.

In [None]:
c = Counter()

In [None]:
for violations in all_violations:
    for v in violations:
        c[v[:v.find('- Comments:')].strip()] +=1

# Finally, we find out what are the most common violations in ohare.

In [None]:
c.most_common(5)