# Find the neighborhood a coordinate falls within
#### Author: Jenny Zhen
#### Nov 6th, 2019

#### Note:
1. The coordinate points need to be formatted as (longitude, latitude).

## Function

In [37]:
# Import following packages.
import fiona
from shapely.geometry import shape,mapping, Point, Polygon, MultiPolygon
import pandas as pd
import numpy as np 

In [2]:
def find_neighborhood(neighborhood, lat, long):
    """
    Find the neighborhood where each coordinate in a list falls within. Remember to import
    packages above first.
    
    Note: Point in this funciton is formatted as (long, lat). 
    
    Parameters:
    --------------------
    neighborhood:    string
                     String of data path and file name. The file should be a shape file containing 
                     neighborhood name and boundary coordinates. Check which dictionary keys to 
                     call for shaping polygon and neighborhood name.
    
    lat:             pd.Series
                     The latitude column of a panda dataframe which we need to find the neighborhood for.
                     
    long:            pd.Series
                     The longitude column of a panda dataframe which we need to find the neighborhood for.
                     
                     
    
    Return:
    --------------------
    neigh_names:     list
                     The names of neighborhoods where each pair of lat and long lies within.
                     
    """
    neigh_names = []
    multipol = fiona.open(neighborhood)
    for i in range(0,len(lat)):
        # Point in this case is formatted as (long, lat). 
        point = Point(long[i], lat[i])
        
        counter = 0
        for feat in multipol:
            if point.within(shape(feat['geometry'])):
                neigh_names.append(feat['properties']['pri_neigh'])
                counter +=1
                
        # If point does not find any neighborhood, return nan
        if counter == 0:
            neigh_names.append(np.nan)
            
                
                
    return(neigh_names)
        
    

## Find neighborhoods for datasets

### Station

In [91]:
station = pd.read_csv('stations.csv')

In [92]:
station['neighborhood'] = find_neighborhood("geo_export_cdc4661f-83bd-40ef-9034-c4bdb778cf15.shp", station['lat'], station['long'])

In [131]:
# Some bike stations are out of scope in terms of searching neighborhood. 
sum(station['neighborhood_id'].isna())

14

In [95]:
# Substitue neighborhood name with id.
neighborhood = pd.read_csv('neighborhood.csv')

station = station.merge(neighborhood, left_on = 'neighborhood', right_on = 'name', how='left')
station.drop(['name', 'neighborhood'], axis = 1, inplace=True)

In [128]:
station['neighborhood_id'] = station['neighborhood_id'].astype('Int64')

In [132]:
station.to_csv('stations.csv', index=False)

### Segment

1. Data has start neighborhood and end neighborhood.  1046 rows
2. If start and/or end neighborhoods are out of scope, then delete entire row. 963 rows
3. Keep segments where start_neigh equals to end_neigh. 609 rows

In [174]:
seg = pd.read_csv('segments.csv')

In [175]:
seg['start_neigh'] = find_neighborhood("geo_export_cdc4661f-83bd-40ef-9034-c4bdb778cf15.shp", seg['START_LATITUDE'], seg['START_LONGITUDE'])
seg['end_neigh'] = find_neighborhood("geo_export_cdc4661f-83bd-40ef-9034-c4bdb778cf15.shp", seg['END_LATITUDE'], seg['END_LONGITUDE'])

In [177]:
seg = seg.merge(neighborhood, left_on = 'start_neigh', right_on = 'name', how = 'left')

seg.rename(columns = {'neighborhood_id': 'start_neigh_id'}, inplace=True)

seg.drop(['start_neigh', 'name'], axis = 1, inplace=True)

seg = seg.merge(neighborhood, left_on = 'end_neigh', right_on = 'name', how = 'left')

seg.rename(columns = {'neighborhood_id': 'end_neigh_id'}, inplace = True)

seg.drop(['end_neigh', 'name'], axis=1, inplace=True)

In [191]:
seg.to_csv('segments.csv', index=False)

### Crash

In [3]:
crash = pd.read_csv('/Users/Jenny/Downloads/Clean_Crash.csv')

In [5]:
crash.drop(['Unnamed: 0'], axis=1, inplace=True)

In [7]:
crash['neighborhood'] = find_neighborhood("geo_export_cdc4661f-83bd-40ef-9034-c4bdb778cf15.shp", crash['LATITUDE'], crash['LONGITUDE'])

In [11]:
crash.to_csv('crash_backup.csv', index=False)

In [12]:
neighborhood = pd.read_csv('neighborhood.csv')

crash = crash.merge(neighborhood, left_on = 'neighborhood', right_on = 'name', how='left')
crash.drop(['name', 'neighborhood'], axis = 1, inplace=True)

In [14]:
crash['neighborhood_id'] = crash['neighborhood_id'].astype('Int64')

In [15]:
crash.to_csv('crash.csv', index=False)

## Export all neighborhoods as csv with its id. 

In [43]:
df = fiona.open("/Users/Jenny/Desktop/MSCA/Data Engineering/Project/Boundaries - Neighborhoods/Boundaries - Neighborhoods/geo_export_c0182ca0-1f35-4cb2-b1bd-75b05d26853c.shp")

In [44]:
for feat in df:
    print(feat['properties']['pri_neigh'])

Grand Boulevard
Printers Row
United Center
Sheffield & DePaul
Humboldt Park
Garfield Park
North Lawndale
Little Village
Armour Square
Avalon Park
Burnside
Hermosa
Avondale
Logan Square
Calumet Heights
East Side
West Pullman
Garfield Ridge
New City
Englewood
Grand Crossing
Ashburn
Mount Greenwood
Morgan Park
O'Hare
Jackson Park
Loop
Pullman
Riverdale
Hegewisch
Greektown
Douglas
Museum Campus
Edgewater
Lake View
Lincoln Park
Magnificent Mile
Lincoln Square
Oakland
Grant Park
West Loop
Fuller Park
Andersonville
Woodlawn
Portage Park
Rush & Division
Little Italy, UIC
Kenwood
Rogers Park
Jefferson Park
Sauganash,Forest Glen
North Park
Albany Park
Irving Park
Dunning
West Ridge
Uptown
Norwood Park
Streeterville
South Shore
Chatham
South Chicago
Roseland
North Center
South Deering
Washington Park
Millenium Park
Near South Side
Chinatown
Chicago Lawn
Auburn Gresham
Beverly
Washington Heights
Edison Park
Hyde Park
Bucktown
Lower West Side
Wrigleyville
Archer Heights
Brighton Park
Mckinley Park


In [26]:
df.next()

  """Entry point for launching an IPython kernel.


{'type': 'Feature',
 'id': '0',
 'properties': OrderedDict([('pri_neigh', 'Grand Boulevard'),
              ('sec_neigh', 'BRONZEVILLE'),
              ('shape_area', 48492503.1554),
              ('shape_len', 28196.837157)]),
 'geometry': {'type': 'Polygon',
  'coordinates': [[(-87.60670812560372, 41.816813771373916),
    (-87.60670480953513, 41.81657908583583),
    (-87.60670022648416, 41.8163387135525),
    (-87.60669581538596, 41.81609935772734),
    (-87.60668982110386, 41.815811802465646),
    (-87.60668357216166, 41.8155663152661),
    (-87.60667660553902, 41.81529991216345),
    (-87.6066796364494, 41.81499416811356),
    (-87.6066823589318, 41.81471953500857),
    (-87.60667153481018, 41.814281645324144),
    (-87.60666414094077, 41.813994602529604),
    (-87.60665643548607, 41.81366052091473),
    (-87.60665089439038, 41.81342058153232),
    (-87.60664694986741, 41.81317320016693),
    (-87.60664346744456, 41.812954776539605),
    (-87.60663536449945, 41.81265587393209),
   

In [67]:
neighborhood = pd.DataFrame({'neighborhood_id':[], 'name':[]})

for feat in df:
    neighborhood = neighborhood.append({'neighborhood_id':feat['id'], 'name':feat['properties']['pri_neigh']}
                       , ignore_index=True)

In [68]:
neighborhood.to_csv('neighborhood.csv', index=False)

In [62]:
neighborhood=pd.read_csv('/Users/Jenny/Desktop/MSCA/Data Engineering/Project/Boundaries - Neighborhoods/neighborhood.csv')

In [65]:
for i in range(0,len(neighborhood)):
        print("UPDATE neighborhood SET NAME = '{}' WHERE neighborhood_id = {};".format(neighborhood['name'][i], neighborhood['neighborhood_id'][i]))

UPDATE neighborhood SET NAME = 'Grand Boulevard' WHERE neighborhood_id = 1;
UPDATE neighborhood SET NAME = 'Printers Row' WHERE neighborhood_id = 2;
UPDATE neighborhood SET NAME = 'United Center' WHERE neighborhood_id = 3;
UPDATE neighborhood SET NAME = 'Sheffield & DePaul' WHERE neighborhood_id = 4;
UPDATE neighborhood SET NAME = 'Humboldt Park' WHERE neighborhood_id = 5;
UPDATE neighborhood SET NAME = 'Garfield Park' WHERE neighborhood_id = 6;
UPDATE neighborhood SET NAME = 'North Lawndale' WHERE neighborhood_id = 7;
UPDATE neighborhood SET NAME = 'Little Village' WHERE neighborhood_id = 8;
UPDATE neighborhood SET NAME = 'Armour Square' WHERE neighborhood_id = 9;
UPDATE neighborhood SET NAME = 'Avalon Park' WHERE neighborhood_id = 10;
UPDATE neighborhood SET NAME = 'Burnside' WHERE neighborhood_id = 11;
UPDATE neighborhood SET NAME = 'Hermosa' WHERE neighborhood_id = 12;
UPDATE neighborhood SET NAME = 'Avondale' WHERE neighborhood_id = 13;
UPDATE neighborhood SET NAME = 'Logan Square

In [52]:
neighborhood

Unnamed: 0,neighborhood_id,name
0,0,Grand Boulevard
1,1,Printers Row
2,2,United Center
3,3,Sheffield & DePaul
4,4,Humboldt Park
5,5,Garfield Park
6,6,North Lawndale
7,7,Little Village
8,8,Armour Square
9,9,Avalon Park
