## Initial setup and imports

In [27]:
import geopandas
import math
import os
import numpy as np
import pandas as pd
from shapely.geometry import Point, Polygon
import xml.etree.ElementTree as ET

pd.set_option('display.max_columns', None)

## Initial Data Exploration

In [28]:
element_tree = ET.parse(os.path.join('data', 'ds.xml'))
xml_root = element_tree.getroot()
data_list = []
for establishment in xml_root:
    establistment_dist = {}
    has_inspection_data = False
    for tag in establishment:
        if tag.tag == 'INSPECTION':
            has_inspection_data = True
        else:
            establistment_dist[tag.tag] = tag.text
    if has_inspection_data:
        inspection_dict = {}
        inspections = establishment.findall('INSPECTION')
        for inspection in inspections:
            for tag in inspection:
                if tag.tag != 'INFRACTION':
                    inspection_dict[tag.tag] = tag.text
            data_list.append({**establistment_dist, **inspection_dict})
    else:
        data_list.append(establistment_dist)
dinesafe_df = pd.DataFrame(data_list)

dinesafe_df = dinesafe_df.drop(columns=['ID', 'TYPE']).drop_duplicates()
dinesafe_df['LONGITUDE'] = dinesafe_df['LONGITUDE'].astype('float')
dinesafe_df['LATITUDE'] = dinesafe_df['LATITUDE'].astype('float')
# add point data for mapping to ward
dinesafe_df["POINT"] = dinesafe_df.apply(lambda x: Point(x['LONGITUDE'], x['LATITUDE']), axis=1)
dinesafe_df

Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,STATUS,DATE,POINT
0,'K' STORE,99 CARLTON ST,43.66205,-79.37747,Pass,2019-03-29,POINT (-79.37747 43.66205)
1,0109 Dessert + Chocolate,"2190 MCNICOLL AVE, -109",43.81477,-79.29491,Pass,2019-10-25,POINT (-79.29491 43.81477)
2,1 PLUS 1 PIZZA,361 OAKWOOD AVE,43.68725,-79.43842,Conditional Pass,2019-03-04,POINT (-79.43841999999999 43.68725)
3,1 PLUS 1 PIZZA,361 OAKWOOD AVE,43.68725,-79.43842,Pass,2019-03-08,POINT (-79.43841999999999 43.68725)
4,1 PLUS 1 PIZZA,361 OAKWOOD AVE,43.68725,-79.43842,Pass,2019-10-24,POINT (-79.43841999999999 43.68725)
...,...,...,...,...,...,...,...
40923,fimi Kitchens,2958 ISLINGTON AVE,43.75729,-79.57019,Pass,2020-02-03,POINT (-79.57019 43.75729)
40924,iQ FOOD CO.,181 BAY ST,43.64748,-79.37849,Pass,2019-06-21,POINT (-79.37849 43.64748)
40925,iQ FOOD CO.,181 BAY ST,43.64748,-79.37849,Pass,2019-11-15,POINT (-79.37849 43.64748)
40926,iQx,55 AVENUE RD,43.67121,-79.39441,Pass,2020-01-02,POINT (-79.39440999999999 43.67121)


In [29]:
ward_df = geopandas.read_file(os.path.join('data', 'City Wards Data.geojson'))
# this joining below is not performant
# TODO: vectorize this 
dinesafe_df['WARD_INDEX'] = dinesafe_df['POINT'].apply(lambda establishment: ward_df[ward_df['geometry'].apply(lambda ward: ward.contains(establishment))]['AREA_NAME'].first_valid_index())
dinesafe_df = dinesafe_df.merge(
    ward_df['AREA_NAME'].to_frame(),
    left_on='WARD_INDEX',
    right_index=True
)
dinesafe_df = dinesafe_df.drop(columns=['WARD_INDEX'])
dinesafe_df

Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,STATUS,DATE,POINT,AREA_NAME
0,'K' STORE,99 CARLTON ST,43.66205,-79.37747,Pass,2019-03-29,POINT (-79.37747 43.66205),Toronto Centre
45,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-04-05,POINT (-79.37553 43.65217),Toronto Centre
46,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-10-04,POINT (-79.37553 43.65217),Toronto Centre
77,1858 CAFE,22 ADELAIDE ST W,43.65012,-79.37994,Pass,2019-03-29,POINT (-79.37994 43.65012),Toronto Centre
78,1858 CAFE,22 ADELAIDE ST W,43.65012,-79.37994,Pass,2019-08-09,POINT (-79.37994 43.65012),Toronto Centre
...,...,...,...,...,...,...,...,...
40205,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Conditional Pass,2019-02-19,POINT (-79.14991999999999 43.79717),Scarborough-Rouge Park
40206,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Pass,2019-02-20,POINT (-79.14991999999999 43.79717),Scarborough-Rouge Park
40207,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Pass,2019-10-09,POINT (-79.14991999999999 43.79717),Scarborough-Rouge Park
40208,Wimpy's Diner,"65 RYLANDER BLVD, Unit-1-4",43.79717,-79.14992,Pass,2020-07-21,POINT (-79.14991999999999 43.79717),Scarborough-Rouge Park


In [30]:
hotspots_df = pd.read_csv(os.path.join('data', 'CULTURAL_HOTSPOT_WGS84.csv'))
hotspots_df

Unnamed: 0.1,Unnamed: 0,PNT_OF_INT,DESCRPTION,SOCIAL_MED,WEBSITE,CATEGORY,LOCATION,X,Y,LONGITUDE,LATITUDE,OBJECTID,RID,geometry
0,0,21 Points in Equilibrium (Sculpture),This sculpture is by James Southerland. It is ...,,,Public Art,150 Borough Dr,324341.227,4847914.922,-79.257067,43.772936,75154.0,1,POINT (-79.25706655499999 43.772936248)
1,1,Crucified Again (Sculpture),Crucified Again shows the body of a tortured m...,,,Public Art,450 Scarborough Golf Club Road,327641.001,4845799.116,-79.216170,43.753806,75155.0,2,POINT (-79.2161703309953 43.7538063534814)
2,2,A Tall Couple (Sculpture),Louis Archambault (1915-2003) created A Tall C...,,,Public Art,"-79.187369, 43.783078",329947.860,4849060.368,-79.187369,43.783078,102997.0,3,POINT (-79.187369 43.783078)
3,3,Warden Underpass Mural (Mural),The Warden Underpass Mural provides a visual h...,,,Public Art,"-79.273286, 43.693461",323059.616,4839081.887,-79.273286,43.693461,102998.0,4,POINT (-79.273286 43.693461)
4,4,Sustenance (Mural),"Sustenance, also known as the ""Western Gateway...",,,Public Art,"-79.255266, 43.700826",324509.747,4839904.258,-79.255266,43.700826,102999.0,5,POINT (-79.25526600000001 43.700826)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,469,T-Bones Sizzling Steak&Burgers,T-Bones is a 24-hour open steakhouse that offe...,,,Business,2540 Eglinton Avenue East,324426.155,4843745.445,-79.256166,43.735412,37648.0,470,POINT (-79.2561663300115 43.7354123164262)
470,470,Vi Pei Bistro,"Vi Pei Bistro, is the ""go to"" place to enjoy f...",https://www.facebook.com/ViPeiBistro,http://www.vipeibistro.com,Business,3101 Kingston Road,326561.419,4842833.684,-79.229696,43.727146,37649.0,471,POINT (-79.22969558957909 43.7271458947681)
471,471,Vi Pei Bistro,"Vi Pei Bistro, is the ""go to"" place to enjoy f...",https://www.facebook.com/ViPeiBistro,http://www.vipeibistro.com,Business,2258 Kingston Road,324721.052,4840364.464,-79.252627,43.704963,37650.0,472,POINT (-79.252627109 43.704962754)
472,472,Wexford Restaurant,The Wexford Restaurant hasserving Scarborough ...,,,Business,2072 Lawrence Avenue East,321384.174,4844918.480,-79.293891,43.746036,38811.0,473,POINT (-79.293890981 43.746036417)


We will map points of interest by category to the dinesafe establishments using euclidean distance

In [31]:
public_art_df = hotspots_df[hotspots_df['CATEGORY'] == 'Public Art']
dinesafe_df['INDEX_CLOSEST_ART'] = dinesafe_df.apply(lambda establishment: public_art_df.apply(lambda x: math.sqrt(pow(x['LONGITUDE'] - establishment['LONGITUDE'], 2) + pow(x['LATITUDE'] - establishment['LATITUDE'], 2)), axis=1).idxmin(), axis=1)
dinesafe_df = dinesafe_df.merge(
    public_art_df['PNT_OF_INT'].to_frame(),
    left_on=['INDEX_CLOSEST_ART'],
    right_index=True
)
dinesafe_df = dinesafe_df.drop(columns=['INDEX_CLOSEST_ART'])
dinesafe_df = dinesafe_df.rename(columns={'PNT_OF_INT': 'CLOSEST_ART'})
dinesafe_df

Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,STATUS,DATE,POINT,AREA_NAME,PNT_OF_INT
0,'K' STORE,99 CARLTON ST,43.66205,-79.37747,Pass,2019-03-29,POINT (-79.37747 43.66205),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture)
45,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-04-05,POINT (-79.37553 43.65217),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture)
46,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-10-04,POINT (-79.37553 43.65217),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture)
85,1871 BERKELEY CHURCH,315 QUEEN ST E,43.65519,-79.36597,Pass,2019-02-20,POINT (-79.36597 43.65519),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture)
86,1871 BERKELEY CHURCH,315 QUEEN ST E,43.65519,-79.36597,Pass,2019-07-10,POINT (-79.36597 43.65519),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture)
...,...,...,...,...,...,...,...,...,...
35696,THE JOY OF HARVEST,"271 OLD KINGSTON RD, Unit-6",43.78205,-79.17229,Pass,2019-03-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural)
35697,THE JOY OF HARVEST,"271 OLD KINGSTON RD, Unit-6",43.78205,-79.17229,Pass,2019-10-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural)
38299,Tante Laure 2,255 CORONATION DR,43.76243,-79.17977,Pass,2019-05-29,POINT (-79.17977 43.76243),Scarborough-Rouge Park,Creekside (Mural)
38300,Tante Laure 2,255 CORONATION DR,43.76243,-79.17977,Pass,2019-10-21,POINT (-79.17977 43.76243),Scarborough-Rouge Park,Creekside (Mural)


In [38]:
creative_df = hotspots_df[hotspots_df['CATEGORY'] == 'Creative']
dinesafe_df['INDEX_CLOSEST_CREATIVE'] = dinesafe_df.apply(lambda establishment: creative_df.apply(lambda x: math.sqrt(pow(x['LONGITUDE'] - establishment['LONGITUDE'], 2) + pow(x['LATITUDE'] - establishment['LATITUDE'], 2)), axis=1).idxmin(), axis=1)
dinesafe_df = dinesafe_df.merge(
    creative_df['PNT_OF_INT'].to_frame(),
    left_on=['INDEX_CLOSEST_CREATIVE'],
    right_index=True
)
dinesafe_df = dinesafe_df.drop(columns=['INDEX_CLOSEST_CREATIVE'])
dinesafe_df = dinesafe_df.rename(columns={'PNT_OF_INT': 'CLOSEST_CREATIVE'})
dinesafe_df

Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,STATUS,DATE,POINT,AREA_NAME,CLOSEST_ART,CLOSEST_CREATIVE
0,'K' STORE,99 CARLTON ST,43.66205,-79.37747,Pass,2019-03-29,POINT (-79.37747 43.66205),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival
45,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-04-05,POINT (-79.37553 43.65217),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival
46,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-10-04,POINT (-79.37553 43.65217),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival
85,1871 BERKELEY CHURCH,315 QUEEN ST E,43.65519,-79.36597,Pass,2019-02-20,POINT (-79.36597 43.65519),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival
86,1871 BERKELEY CHURCH,315 QUEEN ST E,43.65519,-79.36597,Pass,2019-07-10,POINT (-79.36597 43.65519),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival
...,...,...,...,...,...,...,...,...,...,...
35504,THE GOLDEN CHOPSTICK,"271 OLD KINGSTON RD, Unit-103",43.78205,-79.17229,Pass,2019-10-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School
35505,THE GOLDEN CHOPSTICK,"271 OLD KINGSTON RD, Unit-103",43.78205,-79.17229,Pass,2020-03-04,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School
35696,THE JOY OF HARVEST,"271 OLD KINGSTON RD, Unit-6",43.78205,-79.17229,Pass,2019-03-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School
35697,THE JOY OF HARVEST,"271 OLD KINGSTON RD, Unit-6",43.78205,-79.17229,Pass,2019-10-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School


In [43]:
park_df = hotspots_df[hotspots_df['CATEGORY'] == 'Park']
dinesafe_df['INDEX_CLOSEST_PARK'] = dinesafe_df.apply(lambda establishment: park_df.apply(lambda x: math.sqrt(pow(x['LONGITUDE'] - establishment['LONGITUDE'], 2) + pow(x['LATITUDE'] - establishment['LATITUDE'], 2)), axis=1).idxmin(), axis=1)
dinesafe_df = dinesafe_df.merge(
    creative_df['PNT_OF_INT'].to_frame(),
    left_on=['INDEX_CLOSEST_PARK'],
    right_index=True
)
dinesafe_df = dinesafe_df.drop(columns=['INDEX_CLOSEST_PARK'])
dinesafe_df = dinesafe_df.rename(columns={'PNT_OF_INT': 'CLOSEST_PARK'})
dinesafe_df

Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,STATUS,DATE,POINT,AREA_NAME,CLOSEST_ART,CLOSEST_CREATIVE,CLOSEST_PARK
0,'K' STORE,99 CARLTON ST,43.66205,-79.37747,Pass,2019-03-29,POINT (-79.37747 43.66205),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival,Rosetta McClain Gardens
45,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-04-05,POINT (-79.37553 43.65217),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival,Rosetta McClain Gardens
46,120 DINER,120 CHURCH ST,43.65217,-79.37553,Pass,2019-10-04,POINT (-79.37553 43.65217),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival,Rosetta McClain Gardens
85,1871 BERKELEY CHURCH,315 QUEEN ST E,43.65519,-79.36597,Pass,2019-02-20,POINT (-79.36597 43.65519),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival,Rosetta McClain Gardens
86,1871 BERKELEY CHURCH,315 QUEEN ST E,43.65519,-79.36597,Pass,2019-07-10,POINT (-79.36597 43.65519),Toronto Centre,Dawes Crossing by Noel Harding (Sculpture),Scarborough Film Festival,Rosetta McClain Gardens
...,...,...,...,...,...,...,...,...,...,...,...
35504,THE GOLDEN CHOPSTICK,"271 OLD KINGSTON RD, Unit-103",43.78205,-79.17229,Pass,2019-10-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School,Chesterton Shores Park
35505,THE GOLDEN CHOPSTICK,"271 OLD KINGSTON RD, Unit-103",43.78205,-79.17229,Pass,2020-03-04,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School,Chesterton Shores Park
35696,THE JOY OF HARVEST,"271 OLD KINGSTON RD, Unit-6",43.78205,-79.17229,Pass,2019-03-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School,Chesterton Shores Park
35697,THE JOY OF HARVEST,"271 OLD KINGSTON RD, Unit-6",43.78205,-79.17229,Pass,2019-10-29,POINT (-79.17229 43.78205),Scarborough-Rouge Park,Creekside (Mural),Framing Dames Art School,Chesterton Shores Park
