In [None]:
import numpy as np
import pandas as pd
import os.path

root_path = os.path.dirname(os.getcwd())

# Load business license data
licenses = pd.read_csv(os.path.join(root_path, "DATA/business_licenses.csv"))

# Load food inspection data
inspections = pd.read_csv(os.path.join(root_path, "DATA/food_inspections.csv"))

# Load violation matrix
violation_matrix = pd.read_csv(os.path.join(root_path, "DATA/violation_matrix.csv"))

### CALCULATE FEATURES BASED ON FOOD INSPECTION DATA

In [36]:
# Create basis for model_data (risk not included in Chicago repository)
data = inspections[["inspection_date", "license", "inspection_id", "facility_type", "results", "risk"]]

In [38]:
# Merge with violation data
data = pd.merge(data, violation_matrix, on="inspection_id")

In [39]:
# Create pass / fail flags
pass_flags = data.results.map(lambda x: 1 if x == "Pass" else 0)
fail_flags = data.results.map(lambda x: 1 if x == "Fail" else 0)
data["pass_flags"], data["fail_flags"] = pass_flags, fail_flags

In [46]:
# Sort inspections by date
data.sort_values(by="inspection_date", inplace=True)

# Find previous inspection by shifting columns (grouped by license)
data["past_inspection_id"] = data.groupby("license").inspection_id.shift(1)


In [49]:
# Find previous violation data
thing = pd.merge(data, violation_matrix, left_on="past_inspection_id", right_on="inspection_id", suffixes=["", "_prev"])

In [52]:
past_columns = ["past_" + name for name in violation_matrix.columns]

In [54]:
violation_matrix.add_prefix("past_")

Unnamed: 0,past_v_1,past_v_2,past_v_3,past_v_4,past_v_5,past_v_6,past_v_7,past_v_8,past_v_9,past_v_10,...,past_v_41,past_v_42,past_v_43,past_v_44,past_v_45,past_v_70,past_critical_count,past_serious_count,past_minor_count,past_inspection_id
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,2176589
1,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,2176568
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,2176564
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,2176561
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,2176518
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,2176507
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,3.0,2176510
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2176498
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,4.0,2176491
9,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,5.0,2176484
