## Initial setup and imports

In [1]:
import geopandas
import os
import numpy as np
import pandas as pd
from shapely.geometry import Point, Polygon
import xml.etree.ElementTree as ET

pd.set_option('display.max_columns', None)

## Initial Data Exploration

In [2]:
element_tree = ET.parse(os.path.join('data', 'ds.xml'))
xml_root = element_tree.getroot()
data_list = []
for establishment in xml_root:
    establistment_dist = {}
    has_inspection_data = False
    for tag in establishment:
        if tag.tag == 'INSPECTION':
            has_inspection_data = True
        else:
            establistment_dist[tag.tag] = tag.text
    if has_inspection_data:
        inspection_dict = {}
        inspections = establishment.findall('INSPECTION')
        for inspection in inspections:
            for tag in inspection:
                if tag.tag != 'INFRACTION':
                    inspection_dict[tag.tag] = tag.text
            data_list.append({**establistment_dist, **inspection_dict})
    else:
        data_list.append(establistment_dist)
dinesafe_df = pd.DataFrame(data_list)

dinesafe_df = dinesafe_df.drop(columns=['ID', 'TYPE']).drop_duplicates()
dinesafe_df['LONGITUDE'] = dinesafe_df['LONGITUDE'].astype('float')
dinesafe_df['LATITUDE'] = dinesafe_df['LATITUDE'].astype('float')
# add point data for mapping to ward
dinesafe_df["POINT"] = dinesafe_df.apply(lambda x: Point(x['LONGITUDE'], x['LATITUDE']), axis=1)
dinesafe_df

Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,STATUS,DATE,POINT
0,'K' STORE,99 CARLTON ST,43.66205,-79.37747,Pass,2019-03-29,POINT (-79.37747 43.66205)
1,0109 Dessert + Chocolate,"2190 MCNICOLL AVE, -109",43.81477,-79.29491,Pass,2019-10-25,POINT (-79.29491 43.81477)
2,1 PLUS 1 PIZZA,361 OAKWOOD AVE,43.68725,-79.43842,Conditional Pass,2019-03-04,POINT (-79.43841999999999 43.68725)
3,1 PLUS 1 PIZZA,361 OAKWOOD AVE,43.68725,-79.43842,Pass,2019-03-08,POINT (-79.43841999999999 43.68725)
4,1 PLUS 1 PIZZA,361 OAKWOOD AVE,43.68725,-79.43842,Pass,2019-10-24,POINT (-79.43841999999999 43.68725)
...,...,...,...,...,...,...,...
40923,fimi Kitchens,2958 ISLINGTON AVE,43.75729,-79.57019,Pass,2020-02-03,POINT (-79.57019 43.75729)
40924,iQ FOOD CO.,181 BAY ST,43.64748,-79.37849,Pass,2019-06-21,POINT (-79.37849 43.64748)
40925,iQ FOOD CO.,181 BAY ST,43.64748,-79.37849,Pass,2019-11-15,POINT (-79.37849 43.64748)
40926,iQx,55 AVENUE RD,43.67121,-79.39441,Pass,2020-01-02,POINT (-79.39440999999999 43.67121)


In [3]:
ward_df = geopandas.read_file(os.path.join('data', 'City Wards Data.geojson'))
# this joining below is not performant
# TODO: vectorize this 
dinesafe_df['WARD_INDEX'] = dinesafe_df['POINT'].apply(lambda establishment: ward_df[ward_df['geometry'].apply(lambda ward: ward.contains(establishment))]['AREA_NAME'].first_valid_index())
dinesafe_df = dinesafe_df.merge(
    ward_df['AREA_NAME'].to_frame(),
    left_on='WARD_INDEX',
    right_index=True
)
dinesafe_df

Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,STATUS,DATE,POINT,WARD_INDEX,AREA_NAME
0,'K' STORE,99 CARLTON ST,43.66205,-79.37747,Pass,2019-03-29,POINT (-79.37747 43.66205),21,Toronto Centre
45,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-04-05,POINT (-79.37553 43.65217),21,Toronto Centre
46,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-10-04,POINT (-79.37553 43.65217),21,Toronto Centre
77,1858 CAFE,22 ADELAIDE ST W,43.65012,-79.37994,Pass,2019-03-29,POINT (-79.37994 43.65012),21,Toronto Centre
78,1858 CAFE,22 ADELAIDE ST W,43.65012,-79.37994,Pass,2019-08-09,POINT (-79.37994 43.65012),21,Toronto Centre
...,...,...,...,...,...,...,...,...,...
40205,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Conditional Pass,2019-02-19,POINT (-79.14991999999999 43.79717),6,Scarborough-Rouge Park
40206,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Pass,2019-02-20,POINT (-79.14991999999999 43.79717),6,Scarborough-Rouge Park
40207,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Pass,2019-10-09,POINT (-79.14991999999999 43.79717),6,Scarborough-Rouge Park
40208,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Pass,2020-07-21,POINT (-79.14991999999999 43.79717),6,Scarborough-Rouge Park
