In [1]:
import os
import pandas as pd
import numpy as np
import requests
import yaml
import re
from utils import get_SODA_data
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon

pd.set_option("mode.chained_assignment", None)

In [2]:
ROOT = "C:/Users/Beau/Documents/GitHub/RealEstate"
CLEANED = os.path.join(ROOT, "data", "processed")
SHAPE_FILES = os.path.join(ROOT, "data", "raw", "shape_files")

In [3]:
df_EV_chargers = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/iq3c-68ew.json",
    custom_filter="&city=Chicago",
)

df_EV_chargers["label"] = df_EV_chargers["ev_network"]
df_EV_chargers.head()

Unnamed: 0,ev_network,station_name,fuel_type_code,location,station_phone,ev_network_web,city,intersection_directions,status_code,state,...,owner_type_code,ev_dc_fast_count,cards_accepted,expected_date,ev_level1_evse_num,lpg_primary,ng_psi,ng_fill_type_code,ng_vehicle_class,label
0,Blink Network,University of Illinois - 915 S Paulina St,ELEC,"{'latitude': '41.864575', 'needs_recoding': Fa...",888-998-2546,{'url': 'http://www.blinkcharging.com/'},Chicago,1st Floor||,E,IL,...,,,,,,,,,,Blink Network
1,,MCC Chicago,E85,"{'latitude': '41.876579', 'needs_recoding': Fa...",,,Chicago,Garage,E,IL,...,FG,,,,,,,,,
2,ChargePoint Network,900 SELF PARK 8TH FLOOR,ELEC,"{'latitude': '40.162314', 'needs_recoding': Fa...",888-758-4389,{'url': 'http://www.chargepoint.com/'},Chicago,,E,IL,...,,,,,,,,,,ChargePoint Network
3,Tesla,Target - Tesla Supercharger,ELEC,"{'latitude': '41.92780291', 'needs_recoding': ...",877-798-3752,{'url': 'https://www.tesla.com/supercharger'},Chicago,,E,IL,...,P,12.0,,,,,,,,Tesla
4,Non-Networked,St John's Episcopal Church,ELEC,"{'latitude': '41.9515574', 'needs_recoding': F...",650-200-3488,,Chicago,,E,IL,...,P,,A CREDIT D Debit M V,,,,,,,Non-Networked


## Farmer's Markets

In [4]:
df_farmers_market = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/atzs-u7pv.json"
)

df_farmers_market["label"] = df_farmers_market["day"]
df_farmers_market.head()

Unnamed: 0,:@computed_region_43wa_7qmu,link_accepted,location,type,:@computed_region_rpca_8um6,latitude,:@computed_region_awaf_s7ux,start_time,end_time,longitude,:@computed_region_bdys_3d7i,:@computed_region_6mkv_f3dw,end_date,intersection,start_date,day,:@computed_region_vrxf_vc4k,map,website,label
0,16,NO,Edgewater,Independent,14,41.9918227,40,8:00 AM,1:00 PM,-87.66032349,536,4451,2012-10-13T00:00:00.000,Broadway & Norwood,2012-06-09T00:00:00.000,Saturday,76,"{'latitude': '41.9918227', 'needs_recoding': F...",,Saturday
1,9,YES,Homegrown Bronzeville,Independent,10,41.80197062,12,9:00 AM,1:00 PM,-87.61768784,218,21192,2012-10-28T00:00:00.000,343 E 51st St,2012-06-10T00:00:00.000,Sunday,7,"{'latitude': '41.80197062', 'needs_recoding': ...",{'url': 'http://www.greenyouthfarm.com'},Sunday
2,14,YES,Covenant Bank - North Lawndale,Independent,30,41.86780306,36,8:00 AM,1:00 PM,-87.71044099,98,21572,2012-10-10T00:00:00.000,1111 S Homan Ave,2012-06-13T00:00:00.000,Wednesday,30,"{'latitude': '41.86780306', 'needs_recoding': ...",,Wednesday
3,50,NO,Jefferson Park Sunday Market,Independent,48,41.96800571,20,10:00 AM,2:00 PM,-87.75834697,129,21869,2012-10-21T00:00:00.000,5216 W Lawrence Ave,2012-06-24T00:00:00.000,Sunday,11,"{'latitude': '41.96800571', 'needs_recoding': ...",{'url': 'http://www.jpna.net/FM'},Sunday
4,48,YES,Bridgeport,Weekly,40,41.83081542,26,7:00 AM,1:00 PM,-87.64123528,149,14924,2012-10-06T00:00:00.000,35th & Wallace,2012-06-16T00:00:00.000,Saturday,58,"{'latitude': '41.83081542', 'needs_recoding': ...",{'url': 'http://www.chicagofarmersmarkets.us'},Saturday


## Divvy Stations

In [5]:
df_divvy = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/bbyy-e7gq.json"
)

df_divvy["label"] = df_divvy["station_name"]
df_divvy.head()

Unnamed: 0,id,station_name,total_docks,docks_in_service,status,latitude,longitude,location,:@computed_region_awaf_s7ux,:@computed_region_6mkv_f3dw,:@computed_region_vrxf_vc4k,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,label
0,2,Buckingham Fountain,39,39,Not In Service,41.876423,-87.620339,"{'latitude': '41.876423', 'longitude': '-87.62...",22,14913,38,367,36,Buckingham Fountain
1,3,Shedd Aquarium,55,55,In Service,41.867225957,-87.61535539,"{'latitude': '41.86722595682', 'longitude': '-...",48,14913,34,374,10,Shedd Aquarium
2,4,Burnham Harbor,23,23,In Service,41.857411787,-87.613791525,"{'latitude': '41.85741178707404', 'longitude':...",48,21194,34,374,10,Burnham Harbor
3,5,State St & Harrison St,23,23,In Service,41.874053,-87.627716,"{'latitude': '41.874053', 'longitude': '-87.62...",48,14913,38,12,10,State St & Harrison St
4,6,Dusable Harbor,39,39,In Service,41.886976,-87.612813,"{'latitude': '41.886976', 'longitude': '-87.61...",22,21182,38,159,36,Dusable Harbor


## L Stations

In [6]:
df_L = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/8pix-ypme.json"
)

df_L["label"] = df_L["station_descriptive_name"]
df_L.head()

Unnamed: 0,stop_id,direction_id,stop_name,station_name,station_descriptive_name,map_id,ada,red,blue,g,...,y,pnk,o,location,:@computed_region_awaf_s7ux,:@computed_region_6mkv_f3dw,:@computed_region_vrxf_vc4k,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,label
0,30162,W,18th (54th/Cermak-bound),18th,18th (Pink Line),40830,True,False,False,False,...,False,True,False,"{'latitude': '41.857908', 'longitude': '-87.66...",8,14920,33,343,26,18th (Pink Line)
1,30161,E,18th (Loop-bound),18th,18th (Pink Line),40830,True,False,False,False,...,False,True,False,"{'latitude': '41.857908', 'longitude': '-87.66...",8,14920,33,343,26,18th (Pink Line)
2,30022,N,35th/Archer (Loop-bound),35th/Archer,35th/Archer (Orange Line),40120,True,False,False,False,...,False,False,True,"{'latitude': '41.829353', 'longitude': '-87.68...",26,14924,56,719,1,35th/Archer (Orange Line)
3,30023,S,35th/Archer (Midway-bound),35th/Archer,35th/Archer (Orange Line),40120,True,False,False,False,...,False,False,True,"{'latitude': '41.829353', 'longitude': '-87.68...",26,14924,56,719,1,35th/Archer (Orange Line)
4,30214,S,35-Bronzeville-IIT (63rd-bound),35th-Bronzeville-IIT,35th-Bronzeville-IIT (Green Line),41120,True,False,False,True,...,False,False,False,"{'latitude': '41.831677', 'longitude': '-87.62...",12,21194,1,25,9,35th-Bronzeville-IIT (Green Line)


In [7]:
# extract latitude and longitude
df_L["latitude"] = df_L.location.apply(lambda x: x["latitude"]).astype(float)
df_L["longitude"] = df_L.location.apply(lambda x: x["longitude"]).astype(float)

## Business Licenses

In [8]:
df_licenses = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/r5kz-chrr.json"
)

# remove records with no coordinate data
df_licenses = df_licenses.loc[
    df_licenses.latitude.notnull() & df_licenses.longitude.notnull()
].reset_index(drop=True)

df_licenses["label"] = df_licenses["doing_business_as_name"]
df_licenses.head()

Unnamed: 0,id,license_id,account_number,site_number,legal_name,doing_business_as_name,address,city,state,zip_code,...,longitude,location,:@computed_region_vrxf_vc4k,:@computed_region_awaf_s7ux,:@computed_region_6mkv_f3dw,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,license_status_change_date,ssa,label
0,2617269-20220916,2857496,1611,5,TTX COMPANY,TTX COMPANY,4016 S ASHLAND AVE,CHICAGO,IL,60609,...,-87.665396822,"{'latitude': '41.82028493557808', 'longitude':...",59,29,14924,706,1,,,TTX COMPANY
1,2652692-20201217,2652692,456833,1,JYM INVESTMENTS LLC,WINGMAN,2321 W HOWARD ST 1ST,CHICAGO,IL,60645,...,-87.688628188,"{'latitude': '42.019413270594214', 'longitude'...",20,3,22528,341,5,2022-08-08T00:00:00.000,,WINGMAN
2,2647849-20200916,2739788,64603,7,LEOPARDO COMPANIES INC.,Leopardo Companies Inc,210 N CARPENTER ST 3RD FLOOR,CHICAGO,IL,60607,...,-87.653461783,"{'latitude': '41.88594495760403', 'longitude':...",29,41,14917,63,46,2022-08-08T00:00:00.000,,Leopardo Companies Inc
3,1579838-20220616,2842924,289144,1,"1021 MONTROSE, L.L.C.",Driftwood,1021 W MONTROSE AVE,CHICAGO,IL,60613,...,-87.655649945,"{'latitude': '41.961655308981946', 'longitude'...",31,37,21186,577,39,,34.0,Driftwood
4,2027540-20220616,2841687,10726,5,GRAHAM ENTERPRISE INC,Rock N Roll BP (GEI-157),631-647 N LA SALLE DR,CHICAGO,IL,60654,...,-87.632493012,"{'latitude': '41.89325119846543', 'longitude':...",37,22,4446,670,36,,,Rock N Roll BP (GEI-157)


In [9]:
df_licenses.latitude = df_licenses.latitude.astype(float)
df_licenses.longitude = df_licenses.longitude.astype(float)

## Current Business Licenses

In [10]:
df_current_licenses = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/uupf-x98q.json"
)

# remove records with no coordinate data
df_current_licenses = df_current_licenses.loc[
    df_current_licenses.latitude.notnull() & df_current_licenses.longitude.notnull()
]

df_current_licenses["label"] = df_current_licenses["license_description"]
df_current_licenses.head()

Unnamed: 0,license_description,zip_code,license_id,location,date_issued,city,ward_precinct,address,license_status,conditional_approval,...,license_approved_for_issuance,expiration_date,account_number,site_number,license_code,legal_name,id,ssa,application_created_date,label
0,Regulated Business License,60620,2856938,"{'latitude': '41.748245609196715', 'human_addr...",2022-08-09T00:00:00.000,CHICAGO,18-18,8001 S WESTERN AVE,AAI,N,...,2022-08-08T00:00:00.000,2025-12-15T00:00:00.000,378455,1,4404,FOREST EDGE HEALTHCARE & REHABILITATION CENTER...,2283660-20231216,,,Regulated Business License
1,Mobile Food License,60608,2877055,"{'latitude': '41.85045102427', 'human_address'...",2022-10-27T00:00:00.000,CHICAGO,25-2,2300 S THROOP ST,AAI,N,...,2022-10-27T00:00:00.000,2023-09-15T00:00:00.000,378104,7,4405,PERK UP INC.,2689771-20230916,,,Mobile Food License
2,Regulated Business License,60657,2909383,"{'latitude': '41.944108848555665', 'human_addr...",2023-05-17T00:00:00.000,CHICAGO,44-38,3416 N SHEFFIELD AVE 1ST,AAI,N,...,2023-05-16T00:00:00.000,2025-07-15T00:00:00.000,377221,2,4404,REFLEXION SPA LLC,2698731-20230716,17.0,,Regulated Business License
3,Limited Business License,60609,2911368,"{'latitude': '41.81944142629576', 'human_addre...",2023-05-24T00:00:00.000,CHICAGO,12-19,4100 S ASHLAND AVE,AAI,N,...,2023-05-23T00:00:00.000,2025-07-15T00:00:00.000,476495,2,1010,AMERICAN EXPORT & GENERAL SERVICES INC.,2867883-20230716,10.0,,Limited Business License
5,Retail Food Establishment,60643,2908956,"{'latitude': '41.69224436617783', 'human_addre...",2023-05-24T00:00:00.000,CHICAGO,21-43,805 W 111TH ST 1ST,AAI,N,...,2023-05-23T00:00:00.000,2025-07-15T00:00:00.000,338898,1,1006,TONY'S STEAK GYROS & LEMONADE INC.,1942358-20230716,45.0,,Retail Food Establishment


## Liquor Business Licenses

In [11]:
df_liquor = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/nrmj-3kcf.json"
)

# remove records with no coordinate data
df_liquor = df_liquor.loc[df_liquor.latitude.notnull() & df_liquor.longitude.notnull()]

df_liquor["label"] = df_liquor["legal_name"]
df_liquor.head()

Unnamed: 0,zip_code,:@computed_region_43wa_7qmu,license_description,license_id,location,date_issued,city,ward_precinct,address,license_status,...,expiration_date,account_number,site_number,license_code,legal_name,:@computed_region_vrxf_vc4k,id,ssa,application_created_date,label
0,60647,41,Consumption on Premises - Incidental Activity,2806251,"{'latitude': '41.91736812603766', 'human_addre...",2021-08-17T00:00:00.000,CHICAGO,26-20,2833 W ARMITAGE AVE,AAI,...,2023-10-15T00:00:00.000,397430,1,1475,WANDERING CHEF LLC,23,2391388-20211016,,,WANDERING CHEF LLC
1,60613,39,Tavern,2842924,"{'latitude': '41.961655308981946', 'human_addr...",2022-05-11T00:00:00.000,CHICAGO,46-9,1021 W MONTROSE AVE,AAI,...,2024-06-15T00:00:00.000,289144,1,1470,"1021 MONTROSE, L.L.C.",31,1579838-20220616,34.0,,"1021 MONTROSE, L.L.C."
2,60634,17,Consumption on Premises - Incidental Activity,2804007,"{'latitude': '41.93758152034964', 'human_addre...",2021-10-05T00:00:00.000,CHICAGO,31-15,3127 N CENTRAL AVE 1ST,AAI,...,2023-10-15T00:00:00.000,3542,1,1475,"CENTRAL GYROS, CORP.",19,28852-20211016,2.0,,"CENTRAL GYROS, CORP."
3,60639,45,Package Goods,2878957,"{'latitude': '41.90933896530053', 'human_addre...",2022-12-27T00:00:00.000,CHICAGO,37-6,5345 W NORTH AVE 1,AAI,...,2025-01-15T00:00:00.000,11998,1,1474,ABH FOOD & LIQUOR INC,26,30044-20230116,,,ABH FOOD & LIQUOR INC
4,60607,46,Consumption on Premises - Incidental Activity,2880684,"{'latitude': '41.88424444452276', 'human_addre...",2022-12-27T00:00:00.000,CHICAGO,27-1,917-923 W RANDOLPH ST,AAI,...,2025-01-15T00:00:00.000,393943,1,1475,B RESTAURANT LLC,29,2359724-20230116,,,B RESTAURANT LLC


## Landmarks

In [12]:
df_landmarks = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/tdab-kixi.json"
)

df_landmarks["label"] = df_landmarks["landmark_name"]
df_landmarks.head()

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,:@computed_region_rpca_8um6,:@computed_region_vrxf_vc4k,:@computed_region_6mkv_f3dw,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,:@computed_region_awaf_s7ux,date_built,architect,label
0,Vassar Swiss Underwear Company Building,L-265,2543 - 2545 W Diversey Av,2008-07-30T07:00:00.000Z,41.9316266084,-87.6921000957,"{'latitude': '41.9316266084', 'longitude': '-8...",1,23,22535,240,40,24,,,Vassar Swiss Underwear Company Building
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02T07:00:00.000Z,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",36,2,4301,220,10,1,1886.0,Adler & Sullivan,Mathilde Eliel House
2,Manhattan Building,L-139,431 S Dearborn St,1978-07-07T07:00:00.000Z,41.8760657234,-87.6289644505,"{'latitude': '41.8760657234', 'longitude': '-8...",35,38,14913,92,10,48,1891.0,William LeBaron Jenney,Manhattan Building
3,Machinery Hall at Illinois Institute of Techno...,L- 12,100 W 33rd St,2004-05-26T07:00:00.000Z,41.8351614122,-87.6292212235,"{'latitude': '41.8351614122', 'longitude': '-8...",40,1,21194,25,9,12,1901.0,"Patton, Fisher & Miller",Machinery Hall at Illinois Institute of Techno...
4,Melissa Ann Elam House,L- 88,4726 S Dr Martin Luther King Jr Dr,1979-03-21T08:00:00.000Z,41.808529769,-87.6172043949,"{'latitude': '41.808529769', 'longitude': '-87...",10,4,21192,162,9,12,1903.0,Henry L. Newhouse,Melissa Ann Elam House


## Public Park Art

In [13]:
df_park_art = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/sj6t-9cju.json"
)

df_park_art["label"] = df_park_art["art"]
df_park_art.head()

Unnamed: 0,park_name,park_number,art,artist,owner,x_coordinate,y_coordinate,latitude,longitude,location,:@computed_region_rpca_8um6,:@computed_region_vrxf_vc4k,:@computed_region_6mkv_f3dw,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,:@computed_region_awaf_s7ux,label
0,HUMBOLDT (BARON VON),219,Interpreting Nature,Roman Villareal,CPD,1156808.64946,1909066.86792,41.906255,-87.69942,"{'latitude': '41.906255', 'longitude': '-87.69...",4,25,22535,301,49,10,Interpreting Nature
1,SENN (NICHOLAS),227,The Young Lincoln,Charles Keck,CPD,1164734.00565,1939563.37418,41.989775,-87.66943999999998,"{'latitude': '41.989775', 'longitude': '-87.66...",14,76,4451,539,16,40,The Young Lincoln
2,LINCOLN (ABRAHAM),100,Kwa-Ma-Rolas (Totem Pole),Haida Indians,CPD,1172198.75844,1924752.85787,41.948972,-87.64242299999998,"{'latitude': '41.948972', 'longitude': '-87.64...",53,57,4449,726,39,37,Kwa-Ma-Rolas (Totem Pole)
3,ARRIGO (VICTOR),215,Christopher Columbus and Fountain,Moses Ezekiel,CPD,1167268.04553,1896559.5281,41.871716,-87.661359,"{'latitude': '41.871716', 'longitude': '-87.66...",44,29,14917,759,23,8,Christopher Columbus and Fountain
4,LINCOLN (ABRAHAM),100,Curve XXII (I Will),Ellsworth Kelly,CPD,1174726.40627,1916451.13989,41.926136,-87.633381,"{'latitude': '41.926136', 'longitude': '-87.63...",16,68,21190,101,34,51,Curve XXII (I Will)


## Murals

In [14]:
df_murals = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/we8h-apcf.json"
)

df_murals["label"] = df_murals["artwork_title"]
df_murals.head()

Unnamed: 0,mural_registration_id,artist_credit,artwork_title,media,year_installed,location_description,street_address,zip,ward,affiliated_or_commissioning,...,latitude,longitude,location,:@computed_region_rpca_8um6,:@computed_region_vrxf_vc4k,:@computed_region_6mkv_f3dw,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,year_restored,label
0,19001,Kerry James Marshall,Rush More,Painting,2017,"Chicago Cultural Center, West FaÃ§ade",78 E Washington St,60602,42,DCASE,...,41.88333428,-87.62505014,"{'type': 'Point', 'coordinates': [-87.62505014...",41,38,14310,580,36,,Rush More
1,19043,"Rahmaan Statik, Max Sansing",Sepia,Paint,2016,,2230 S Central Park Ave,60623,22,Chicago Public Art Group,...,41.850922,-87.715009,"{'type': 'Point', 'coordinates': [-87.715009, ...",57,32,21569,754,28,,Sepia
2,19139,Hector Duarte,Honor Boricua,,1993,,2601 W Evergreen Ave,60622,26,Chicago Public Art Group,...,41.90560086,-87.69214278,"{'type': 'Point', 'coordinates': [-87.69214278...",4,25,21560,543,49,,Honor Boricua
3,19135,Mirtes Zwierzynski,Garfield Park Conservatory Alliance Mosaic,,2007,N Hamlin Ave and Lake St,N Hamlin Ave and Lake St,60624,28,Chicago Public Art Group,...,41.885114,-87.721068,"{'type': 'Point', 'coordinates': [-87.721068, ...",30,27,21572,658,23,,Garfield Park Conservatory Alliance Mosaic
4,19148,Ed Paschke,"Luna Negra, 1998",Paint,2014,Outside left wall of building,5415 W Higgins Ave,60630,45,Ed Paschke Art Center,...,41.96872306,-87.76207536,"{'type': 'Point', 'coordinates': [-87.76207536...",48,11,21869,129,50,,"Luna Negra, 1998"


## Grocery Stores

In [15]:
df_grocery = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/3e26-zek2.json"
)

# remove records with no coordinate data
df_grocery = df_grocery.loc[df_grocery.location.notnull()]

df_grocery["label"] = df_grocery["store_name"]
df_grocery.head()

Unnamed: 0,store_name,address,zip,new_status,last_updated,location,:@computed_region_rpca_8um6,:@computed_region_vrxf_vc4k,:@computed_region_6mkv_f3dw,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,label
0,Jewel - Osco,87 W 87th St,60620,OPEN,2020-06-03T17:00:00.000,"{'type': 'Point', 'coordinates': [-87.626243, ...",59,40,21554,1,13,Jewel - Osco
1,Farm on Ogden,3555 W OGDEN AVE,60623,OPEN,2020-06-10T00:00:00.000,"{'type': 'Point', 'coordinates': [-87.71437, 4...",57,30,21569,157,14,Farm on Ogden
2,Jewel - Osco,5343 N Broadway St,60640-2311,OPEN,2020-06-03T17:00:00.000,"{'type': 'Point', 'coordinates': [-87.659887, ...",15,76,22616,160,16,Jewel - Osco
3,International Foods NW,4404 W FULLERTON AVE,60639,OPEN,2020-06-10T00:00:00.000,"{'type': 'Point', 'coordinates': [-87.737127, ...",2,21,22615,439,17,International Foods NW
4,Jewel - Osco,2520 N Narragansett Ave,60639-1041,OPEN,2020-06-03T17:00:00.000,"{'type': 'Point', 'coordinates': [-87.785559, ...",3,19,22615,137,44,Jewel - Osco


In [16]:
# extract latitude and longitude
df_grocery["latitude"] = df_grocery.location.apply(
    lambda x: x["coordinates"][1]
).astype(float)
df_grocery["longitude"] = df_grocery.location.apply(
    lambda x: x["coordinates"][0]
).astype(float)

## Hospitals

In [17]:
df_hospitals = gpd.read_file(os.path.join(SHAPE_FILES, "Hospitals.zip"))
df_hospitals["label"] = df_hospitals["LABEL"]
df_hospitals.head()

Unnamed: 0,BLDGID,X,Y,BLDGNOTES,LABEL,FACILITY,CITY,ADDRESS,COMMONNAME,TYPE1,...,CA2,USE,EDITDATE,ZIP,WARD,TRACT,AREA_NUMBE,COMMUNITY,geometry,label
0,34142.0,1168427.011,1940926.452,,Kindred- Lakeshore,Kindred Hospital - Chicago Lakeshore,Chicago,6130 N. Sheridan,Kindred Hospital - Chicago Lakeshore,HOSPITAL,...,77.0,Hospitals,2007-08-10,60660,48,30100,77,EDGEWATER,POINT (1168427.011 1940926.452),Kindred- Lakeshore
1,262155.0,1171002.449,1916046.01,,Children's,Children's Memorial Hospital,Chicago,2300 Children's Plaza,Children's Memorial Hospital,HOSPITAL,...,7.0,Hospitals,2007-08-10,60614,43,71200,7,LINCOLN PARK,POINT (1171002.449 1916046.010),Children's
2,533051.0,1175254.006,1862670.662,,St Bernard,St Bernard Hospital,Chicago,326 W. 64th Street,St. Bernard Hospital,HOSPITAL,...,68.0,Hospitals,2007-08-10,60621,20,680900,68,ENGLEWOOD,POINT (1175254.006 1862670.662),St Bernard
3,565454.0,1159091.55,1859131.693,,Holy Cross,Holy Cross Hospital,Chicago,2701 W. 68th Street,Holy Cross Hospital,HOSPITAL,...,66.0,Hospitals,2007-08-10,60629,15,660900,66,CHICAGO LAWN,POINT (1159091.550 1859131.693),Holy Cross
4,220136.0,1169801.369,1920306.523,,IL Masonic,Advocate Illinois Masonic Medical Center,Chicago,836 W. Wellington,Advocate Illinois Masonic Med Ctr,HOSPITAL,...,6.0,Hospitals,2007-08-10,60657,44,63000,6,LAKE VIEW,POINT (1169801.369 1920306.523),IL Masonic


In [29]:
df_hospitals["longitude"] = df_hospitals.geometry.to_crs(4326).geometry.x
df_hospitals["latitude"] = df_hospitals.geometry.to_crs(4326).geometry.y

## Metra Stations

In [31]:
df_metra = pd.read_csv(os.path.join(ROOT, "data", "raw", "Metra_Stations.csv"))
df_metra["label"] = df_metra["station_name"]
df_metra.head()

Unnamed: 0,source,station_name,latitude,longitude,label
0,metra,Union Station,41.878683,-87.638892,Union Station
1,metra,Western Avenue,41.88918,-87.688161,Western Avenue
2,metra,Healy,41.92475,-87.727983,Healy
3,metra,Grayland,41.948852,-87.740302,Grayland
4,metra,Mayfair,41.960163,-87.746298,Mayfair


## Building Permits

In [32]:
df_permits = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/building-permits.json"
)

# remove records with no coordinate data
df_permits = df_permits.loc[
    df_permits.latitude.notnull() & df_permits.longitude.notnull()
]

df_permits["label"] = df_permits["permit_type"]
df_permits.head()

Unnamed: 0,id,permit_,permit_type,review_type,application_start_date,issue_date,processing_time,street_number,street_direction,street_name,...,contact_14_name,contact_14_city,contact_14_state,contact_14_zipcode,contact_15_type,contact_15_name,contact_15_city,contact_15_state,contact_15_zipcode,label
0,1614287,100072880,PERMIT - RENOVATION/ALTERATION,STANDARD PLAN REVIEW,2005-10-14T00:00:00.000,2006-01-03T00:00:00.000,81,2728,N,MONTICELLO,...,,,,,,,,,,PERMIT - RENOVATION/ALTERATION
1,1614371,100072936,PERMIT - SIGNS,SIGN PERMIT,2005-10-17T00:00:00.000,2006-01-12T00:00:00.000,87,120,S,LA SALLE,...,,,,,,,,,,PERMIT - SIGNS
2,1641218,100089035,PERMIT - SIGNS,SIGN PERMIT,2006-01-10T00:00:00.000,2006-01-10T00:00:00.000,0,4001,W,IRVING PARK,...,,,,,,,,,,PERMIT - SIGNS
3,1641219,100089036,PERMIT - SIGNS,SIGN PERMIT,2006-01-10T00:00:00.000,2006-01-10T00:00:00.000,0,4001,W,IRVING PARK,...,,,,,,,,,,PERMIT - SIGNS
4,1641220,100089037,PERMIT - SIGNS,SIGN PERMIT,2006-01-11T00:00:00.000,2006-01-11T00:00:00.000,0,4001,W,IRVING PARK,...,,,,,,,,,,PERMIT - SIGNS


## Mobility Areas

In [33]:
df_community_areas = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/igwz-8jzy.json"
)

df_community_areas["label"] = df_community_areas["community"]
df_community_areas.head()

Unnamed: 0,the_geom,perimeter,area,comarea,comarea_id,area_numbe,community,area_num_1,shape_area,shape_len,label
0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,0,0,0,35,DOUGLAS,35,46004621.1581,31027.0545098,DOUGLAS
1,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,0,0,0,36,OAKLAND,36,16913961.0408,19565.5061533,OAKLAND
2,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,0,0,0,37,FULLER PARK,37,19916704.8692,25339.0897503,FULLER PARK
3,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,0,0,0,38,GRAND BOULEVARD,38,48492503.1554,28196.8371573,GRAND BOULEVARD
4,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,0,0,0,39,KENWOOD,39,29071741.9283,23325.1679062,KENWOOD


In [34]:
# mark which community areas are mobility areas
mobility_areas = [
    "KENWOOD",
    "LINCOLN SQUARE",
    "HYDE PARK",
    "ROGERS PARK",
    "JEFFERSON PARK",
    "FOREST GLEN",
    "NORTH PARK",
    "ALBANY PARK",
    "PORTAGE PARK",
    "IRVING PARK",
    "DUNNING",
    "MONTCLARE",
    "BELMONT CRAGIN",
    "WEST RIDGE",
    "HERMOSA",
    "AVONDALE",
    "LOGAN SQUARE",
    "WEST TOWN",
    "NEAR WEST SIDE",
    "UPTOWN",
    "NEAR SOUTH SIDE",
    "ARMOUR SQUARE",
    "NORWOOD PARK",
    "NEAR NORTH SIDE",
    "LOOP",
    "LAKE VIEW",
    "NORTH CENTER",
    "EAST SIDE",
    "HEGEWISCH",
    "GARFIELD RIDGE",
    "ARCHER HEIGHTS",
    "BRIDGEPORT",
    "WEST ELSDON",
    "CLEARING",
    "WEST LAWN",
    "LINCOLN PARK",
    "ASHBURN",
    "BEVERLY",
    "MOUNT GREENWOOD",
    "MORGAN PARK",
    "OHARE",
    "EDGEWATER",
    "EDISON PARK",
]

# subset to mobility areas
df_mobility_areas = df_community_areas.loc[
    df_community_areas.community.isin(mobility_areas)
]

In [35]:
# create geometry variable for the shapes
df_mobility_areas["geometry"] = df_mobility_areas["the_geom"].apply(
    lambda x: Polygon(x["coordinates"][0][0])
)
gdf_mobility_areas = gpd.GeoDataFrame(
    data=df_mobility_areas, crs="EPSG:4326", geometry="geometry"
)

## ADU Areas

In [36]:
df_ADU = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/ttjb-ayff.json"
)

df_ADU["label"] = df_ADU["area"]
df_ADU.head()

Unnamed: 0,the_geom,objectid,area,shape_leng,shape_area,label
0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",1,West,51421.0696119,139856090.07,West
1,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",3,North,78723.3692912,291136399.974,North
2,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",5,Southeast,91576.4719351,321419159.095,Southeast
3,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",8,Northwest,122124.562254,180457542.639,Northwest
4,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",11,South,146010.910711,476944013.591,South


In [37]:
# create geometry variable for the shapes
df_ADU["geometry"] = df_ADU["the_geom"].apply(lambda x: Polygon(x["coordinates"][0][0]))
gdf_ADU = gpd.GeoDataFrame(data=df_ADU, crs="EPSG:4326", geometry="geometry")

In [38]:
# without a .zip shapefile, we have to convert into a geoseries and project the points
# df_ADU = gpd.read_file(os.path.join(SHAPE_FILES, "Additional_Dwelling_Unit_Areas.tsv"))
# df_ADU['geometry'] = gpd.GeoSeries.from_wkt(df_ADU['the_geom'])
# df_ADU['geometry'].crs = "EPSG:4326"

## Zoning

In [39]:
# get zoning spatial data
df_zoning = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/dj47-wfun.json"
)

df_zoning["label"] = df_zoning["zone_class"]
df_zoning.head()

Unnamed: 0,the_geom,case_numbe,zoning_id,zone_type,zone_class,edit_statu,create_dat,edit_date,pd_num,ordinance_1,shape_area,shape_len,pd_prefix,edit_uid,comments,ordinance,case_type,create_uid,label
0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,2488,2,C2-3,ACTIVE,2002-08-14T00:00:00.000Z,1899-11-30T00:00:00.000Z,0,1899-11-30T00:00:00.000Z,41101.3463588,817.327179297,,,,,,,C2-3
1,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,4088,1,B1-1,ACTIVE,2002-08-29T00:00:00.000Z,1899-11-30T00:00:00.000Z,0,1899-11-30T00:00:00.000Z,3981.02012211,380.062184507,,,,,,,B1-1
2,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,4745,2,C2-2,ACTIVE,2002-09-05T00:00:00.000Z,1899-11-30T00:00:00.000Z,0,1899-11-30T00:00:00.000Z,21286.1923599,590.945627594,,,,,,,C2-2
3,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,4086,3,M2-3,ACTIVE,2002-08-29T00:00:00.000Z,1899-11-30T00:00:00.000Z,0,1899-11-30T00:00:00.000Z,489097.720164,2889.9502873,,,,,,,M2-3
4,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",0,5525,5,PD 416,ACTIVE,2002-09-12T00:00:00.000Z,1899-11-30T00:00:00.000Z,416,1899-11-30T00:00:00.000Z,350813.081204,3526.80670883,I,,,,,,PD 416


In [40]:
df_zoning["geometry"] = df_zoning["the_geom"].apply(
    lambda x: Polygon(x["coordinates"][0][0])
)
gdf_zoning = gpd.GeoDataFrame(data=df_zoning, crs="EPSG:4326", geometry="geometry")

## Bike Routes

In [41]:
df_bike_routes = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/hvv9-38ut.json"
)

df_bike_routes["label"] = df_bike_routes["st_name"]
df_bike_routes.head()

Unnamed: 0,the_geom,street,st_name,oneway_dir,f_street,t_street,displayrou,mi_ctrline,br_oneway,br_ow_dir,contraflow,label
0,"{'type': 'MultiLineString', 'coordinates': [[[...",MARQUETTE RD,MARQUETTE,-,S CENTRAL PARK AVE,S KEDZIE AVE,BUFFERED BIKE LANE,0.495169733402,N,-,-,MARQUETTE
1,"{'type': 'MultiLineString', 'coordinates': [[[...",MARQUETTE RD,MARQUETTE,-,S KEDZIE AVE,S SACRAMENTO AVE,SHARED-LANE,0.252403055895,N,-,-,MARQUETTE
2,"{'type': 'MultiLineString', 'coordinates': [[[...",18TH ST,18TH,,S DAMEN AVE,S LOOMIS ST,BIKE LANE,0.757320648958,N,,,18TH
3,"{'type': 'MultiLineString', 'coordinates': [[[...",LAKE SHORE DR,LAKE SHORE,,E 81ST ST,E 87TH ST,BUFFERED BIKE LANE,0.754309006935,N,,,LAKE SHORE
4,"{'type': 'MultiLineString', 'coordinates': [[[...",WELLS ST,WELLS,S,W VAN BUREN ST,W ARCADE PL,SHARED-LANE,0.306834750149,Y,S,N,WELLS


In [42]:
# create geometry variable for the lines
df_bike_routes["geometry"] = df_bike_routes["the_geom"].apply(
    lambda x: LineString(x["coordinates"][0])
)
gdf_bike_routes = gpd.GeoDataFrame(
    data=df_bike_routes, crs="EPSG:4326", geometry="geometry"
)

## Wards

In [43]:
df_wards = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/k9yb-bpqx.json"
)

df_wards["label"] = df_wards["ward"]
df_wards.head()

Unnamed: 0,the_geom,ward,shape_leng,shape_area,label
0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",12,93073.3408379,116096507.849,12
1,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",16,97901.3238332,103143638.546,16
2,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",15,82183.6948197,65588297.917,15
3,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",20,90105.1956185,137290356.97,20
4,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",49,38122.6928259,49733459.1385,49


In [44]:
df_wards["geometry"] = df_wards["the_geom"].apply(
    lambda x: Polygon(x["coordinates"][0][0])
)
gdf_wards = gpd.GeoDataFrame(data=df_wards, crs="EPSG:4326", geometry="geometry")

## Neighborhoods

In [45]:
df_neighborhoods = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/y6yq-dbs2.json"
)

df_neighborhoods["label"] = df_neighborhoods["pri_neigh"]
df_neighborhoods.head()

Unnamed: 0,the_geom,pri_neigh,sec_neigh,shape_area,shape_len,label
0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",Grand Boulevard,BRONZEVILLE,48492503.1554,28196.837157,Grand Boulevard
1,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",Printers Row,PRINTERS ROW,2162137.97139,6864.247156,Printers Row
2,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",United Center,UNITED CENTER,32520512.7053,23101.363745,United Center
3,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",Sheffield & DePaul,SHEFFIELD & DEPAUL,10482592.2987,13227.049745,Sheffield & DePaul
4,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",Humboldt Park,HUMBOLDT PARK,125010425.593,46126.751351,Humboldt Park


In [46]:
df_neighborhoods["geometry"] = df_neighborhoods["the_geom"].apply(
    lambda x: Polygon(x["coordinates"][0][0])
)
gdf_neighborhoods = gpd.GeoDataFrame(
    data=df_neighborhoods, crs="EPSG:4326", geometry="geometry"
)

## Enterprise Zones

In [47]:
df_enterprise_zones = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/bwpt-y235.json"
)

df_enterprise_zones["label"] = df_enterprise_zones["name"]
df_enterprise_zones.head()

Unnamed: 0,display_co,the_geom,objectid,enterprise,name,shape_leng,shape_area,label
0,1,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",1,5,5,166021.685716,235940637.947,5
1,0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",3,2,2,232181.868554,337162988.056,2
2,0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",4,4,4,265182.045484,250430390.712,4
3,0,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",5,3,3,342171.797656,446806134.969,3
4,6,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",6,6,6,334404.043467,327062437.241,6


In [48]:
df_enterprise_zones["geometry"] = df_enterprise_zones["the_geom"].apply(
    lambda x: Polygon(x["coordinates"][0][0])
)
gdf_enterprise_zones = gpd.GeoDataFrame(
    data=df_enterprise_zones, crs="EPSG:4326", geometry="geometry"
)

## Public High Schools

In [49]:
df_hs = get_SODA_data(
    api_endpoint="https://data.cityofchicago.org/resource/juf9-y87b.json"
)

df_hs["label"] = df_hs["school_nm"]
df_hs.head()

Unnamed: 0,school_id,school_nm,school_add,the_geom,grade_cat,boundarygr,sch_type,shape_leng,shape_area,label
0,609691,NORTH-GRAND HS,"4338 W WABANSIA AVE CHICAGO, IL 60639","{'type': 'MultiPolygon', 'coordinates': [[[[-8...",HS,"9, 10, 11, 12","District, non-alternative",21536.4574225,18818852.8467,NORTH-GRAND HS
1,609695,AMUNDSEN HS,"5110 N DAMEN AVE CHICAGO, IL 60625","{'type': 'MultiPolygon', 'coordinates': [[[[-8...",HS,"9, 10, 11, 12","District, non-alternative",42505.3602555,73276218.3437,AMUNDSEN HS
2,609698,BOGAN HS,"3939 W 79TH ST CHICAGO, IL 60652","{'type': 'MultiPolygon', 'coordinates': [[[[-8...",HS,"9, 10, 11, 12","District, non-alternative",63913.7580959,204960361.339,BOGAN HS
3,610323,BOWEN HS,"2710 E 89TH ST CHICAGO, IL 60617","{'type': 'MultiPolygon', 'coordinates': [[[[-8...",HS,"9, 10, 11, 12","District, non-alternative",13732.613646,11384287.2148,BOWEN HS
4,610518,AUSTIN CCA HS,"231 N PINE AVE CHICAGO, IL 60644","{'type': 'MultiPolygon', 'coordinates': [[[[-8...",HS,"9, 10, 11, 12","District, non-alternative",44154.4120323,67205615.8692,AUSTIN CCA HS


In [50]:
df_hs["geometry"] = df_hs["the_geom"].apply(lambda x: Polygon(x["coordinates"][0][0]))
gdf_hs = gpd.GeoDataFrame(data=df_hs, crs="EPSG:4326", geometry="geometry")

# Combine all data sources

In [51]:
sources_point_data = {
    "EV_chargers": df_EV_chargers,
    "farmers_market": df_farmers_market,
    "divvy": df_divvy,
    "L": df_L,
    "licenses": df_licenses,
    "current_licenses": df_current_licenses,
    "liquor": df_liquor,
    "landmarks": df_landmarks,
    "park_art": df_park_art,
    "murals": df_murals,
    "grocery": df_grocery,
    "permits": df_permits,
    "hospitals": df_hospitals,
    "metra": df_metra,
}

In [40]:
df_location_combined = pd.DataFrame(
    columns=["source", "label", "latitude", "longitude"]
)

for name, df in sources_point_data.items():
    df_location = df[["label", "latitude", "longitude"]]
    df_location["source"] = name
    df_location_combined = pd.concat([df_location_combined, df_location])

df_location_combined.reset_index(drop=True, inplace=True)

# fix data types
df_location_combined.latitude = df_location_combined.latitude.astype("float")
df_location_combined.longitude = df_location_combined.longitude.astype("float")

In [41]:
# df_location_combined.latitude.plot(kind='kde')

In [42]:
# df_location_combined.longitude.plot(kind='kde')

In [44]:
sources_shape_data = {
    "mobility_areas": gdf_mobility_areas,
    "ADU": gdf_ADU,
    "zoning": gdf_zoning,
    "bike_routes": df_bike_routes,
    "ward": gdf_wards,
    "neighborhood": gdf_neighborhoods,
    "enterprise_zone": df_enterprise_zones,
    "high_schools": gdf_hs,
}

In [45]:
df_shape_data_combined = pd.DataFrame(columns=["source", "label", "geometry"])

for name, df in sources_shape_data.items():
    df_shape_data = df[["label", "geometry"]]
    df_shape_data["source"] = name
    df_shape_data_combined = pd.concat([df_shape_data_combined, df_shape_data])


df_shape_data_combined.reset_index(drop=True, inplace=True)



In [46]:
# make column names upper case
df_location_combined.columns = [col.upper() for col in df_location_combined.columns]
df_shape_data_combined.columns = [col.upper() for col in df_shape_data_combined.columns]

In [47]:
# write data
df_location_combined.to_pickle(os.path.join(CLEANED, "whats_nearby_location_data.pkl"))
df_shape_data_combined.to_pickle(os.path.join(CLEANED, "whats_nearby_shape_data.pkl"))