In [1]:
import pandas as pd

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import requests
import time
import shapely
import json
import math
import re
from shapely.geometry import asShape
from collections import defaultdict

In [4]:
data_path = 'nyc-accessibility/elevator-pipeline/data/'

## Read in Data

In [5]:
df = pd.read_csv('num_elevator.csv', index_col = 'id')
df.head()

Unnamed: 0_level_0,num_elevator
id,Unnamed: 1_level_1
1,3
10,3
105,2
110,2
114,3


In [6]:
df['num_elevator'] = df['num_elevator'].astype(float)

In [7]:
num_elev_dict = df.to_dict()['num_elevator']
num_elev_dict

{1: 3.0,
 3: 2.0,
 5: 3.0,
 8: 3.0,
 9: 2.0,
 10: 3.0,
 14: 3.0,
 25: 5.0,
 34: 3.0,
 35: 3.0,
 41: 3.0,
 46: 2.0,
 49: 10.0,
 57: 2.0,
 59: 3.0,
 67: 2.0,
 69: 1.0,
 72: 2.0,
 77: 2.0,
 82: 1.0,
 84: 5.0,
 105: 2.0,
 110: 2.0,
 114: 3.0,
 120: 1.0,
 133: 7.0,
 135: 2.0,
 140: 1.0,
 157: 5.0,
 159: 1.0,
 163: 2.0,
 176: 7.0,
 178: 3.0,
 189: 0.0,
 194: 2.0,
 205: 4.0,
 219: 3.0,
 223: 3.0,
 225: 2.0,
 226: 2.0,
 227: 2.0,
 241: 3.0,
 250: 3.0,
 253: 2.0,
 256: 2.0,
 262: 2.0,
 269: 3.0,
 276: 2.0,
 277: 1.0,
 279: 0.0,
 280: 2.0,
 283: 2.0,
 291: 5.0,
 294: 3.0,
 305: 3.0,
 315: 3.0,
 331: 2.0,
 345: 0.0,
 347: 3.0,
 348: 0.0,
 350: 3.0,
 352: 2.0,
 353: 2.0,
 354: 5.0,
 355: 0.0,
 359: 3.0,
 360: 4.0,
 363: 3.0,
 369: 4.0,
 373: 3.0,
 400: 3.0,
 405: 3.0,
 409: 3.0,
 414: 19.0,
 420: 2.0,
 425: 0.0,
 430: 19.0,
 434: 5.0,
 436: 2.0,
 438: 4.0,
 439: 4.0,
 445: 5.0,
 460: 3.0,
 469: 2.0,
 470: 3.0,
 471: 4.0}

## Get neighborhood

In [8]:
# Start by reloading our core datasets:
with open('{}neighborhood_to_stations.json'.format(data_path)) as infile:
    neighborhood_to_stations=json.load(infile)
with open('{}stations_to_neighborhoods.json'.format(data_path)) as infile:
    stations_to_neighborhoods=json.load(infile)
with open('{}mta_stations_sorted_by_name.json'.format(data_path)) as infile:
    mta_stations_sorted_by_name=json.load(infile)
with open('{}nyca_stations.json'.format(data_path)) as infile:
    nyca_stations=json.load(infile)
with open('{}all_nyca_station_details.json'.format(data_path)) as infile:
    all_nyca_station_details=json.load(infile)
with open('{}neighborhood_data.json'.format(data_path)) as infile:
    neighborhood_data=json.load(infile)

In [9]:
mta_sorted = sorted([
    (m['properties']['name'],m['properties']['line'],m['properties']['objectid'])
    for m in mta_stations_sorted_by_name
])
nyca_sorted = sorted([
    (n['name'],tuple(n['lines']),n['id'])
    for n in nyca_stations
]) 

In [10]:
pd.DataFrame(mta_sorted).to_csv('mta_sorted.csv')
pd.DataFrame(nyca_sorted).to_csv('nyca_sorted.csv')
dr1=pd.DataFrame.from_csv('{}aligned_mta_nyca.csv'.format(data_path))

In [11]:
# tuple of (mta,nyca_id)
def format_mta_id(flt):
    if not flt or math.isnan(flt):
        return None
    else:
        return str(int(flt))

def format_nyca_id(flt):
    if not flt or math.isnan(flt):
        return None
    else:
        return int(flt)

aligned_objectids = [
    (format_mta_id(r[3]),format_nyca_id(r[7]))
    for r in dr1.to_records()
]
mta_stations_to_name={ms['properties']['objectid']:ms['properties']['name'] for ms in mta_stations_sorted_by_name}
nyca_stations_to_name={ns['id']:ns['name'] for ns in nyca_stations}
first_rec=aligned_objectids[0]
(mta_stations_to_name[first_rec[0]],nyca_stations_to_name[first_rec[1]]) # confirming they match

('103rd St', '103rd St')

In [12]:
nyca_station_to_ntacode=defaultdict(lambda:set())
#aligned_objectids[0][1]
#stations_to_neighborhoods[aligned_objectids[0][0]]
for mta_id,nyca_id in aligned_objectids:
    if mta_id is None:
        print('Missing mta_id for nyca_id {}'.format(nyca_id))
    elif nyca_id is None:
        print('Missing nyca_id for mta_id {}'.format(mta_id))
    elif mta_id in stations_to_neighborhoods:
        neighborhoods=stations_to_neighborhoods[mta_id]
        if len(neighborhoods) == 1:
            nyca_station_to_ntacode[nyca_id].add(neighborhoods[0])
        else:
            print('Multi-neighborhood data for {}/{}'.format(mta_id,nyca_id))
    else:
        print('Missing neighborhood data for {}'.format(mta_id))

ntacode_to_name={nb['properties']['NTACode']:nb['properties']['NTAName'] for nb in neighborhood_data['features']}
for nyca_id,ntacodes in nyca_station_to_ntacode.items():
    if len(ntacodes) != 1:
        print('NYCA id {} ({}) maps to neighborhoods {}'.format(
            nyca_id,
            nyca_stations_to_name[nyca_id],
            [ntacode_to_name[nb] for nb in ntacodes]))

Missing mta_id for nyca_id 372
Missing nyca_id for mta_id 470
Missing nyca_id for mta_id 442
Missing nyca_id for mta_id 641
Missing nyca_id for mta_id 642
Missing nyca_id for mta_id 443
Missing nyca_id for mta_id 643
Missing nyca_id for mta_id 153
Missing nyca_id for mta_id 371
NYCA id 34 (Union Sq - 14th St) maps to neighborhoods ['West Village', 'Hudson Yards-Chelsea-Flatiron-Union Square']
NYCA id 49 (Atlantic Ave) maps to neighborhoods ['Fort Greene', 'Park Slope-Gowanus']
NYCA id 57 (Borough Hall) maps to neighborhoods ['Brooklyn Heights-Cobble Hill', 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill']
NYCA id 274 (Broadway Junction) maps to neighborhoods ['Ocean Hill', 'Bushwick South']
NYCA id 471 (Grand Central - 42nd St) maps to neighborhoods ['Turtle Bay-East Midtown', 'Murray Hill-Kips Bay', 'Midtown-Midtown South']


In [13]:
def station_scores_to_neighborhood_data(station_scores):
    neighborhood_scores=defaultdict(lambda:0)
    for station_id,station_score in station_scores.items():
        for neighborhood in nyca_station_to_ntacode[int(station_id)]:
            neighborhood_scores[neighborhood] = neighborhood_scores[neighborhood]+station_score    
    return dict(neighborhood_scores)

In [14]:
num_elev = station_scores_to_neighborhood_data(num_elev_dict)
num_elev

{'BK09': 2.0,
 'BK23': 3.0,
 'BK28': 3.0,
 'BK37': 10.0,
 'BK38': 6.0,
 'BK41': 3.0,
 'BK42': 1.0,
 'BK43': 0.0,
 'BK44': 2.0,
 'BK50': 0.0,
 'BK60': 2.0,
 'BK61': 5.0,
 'BK68': 13.0,
 'BK69': 2.0,
 'BK72': 2.0,
 'BK77': 3.0,
 'BK78': 3.0,
 'BK82': 3.0,
 'BK95': 2.0,
 'BX05': 6.0,
 'BX10': 2.0,
 'BX17': 2.0,
 'BX27': 5.0,
 'BX29': 2.0,
 'BX34': 2.0,
 'BX44': 5.0,
 'BX49': 3.0,
 'BX63': 5.0,
 'MN01': 2.0,
 'MN03': 1.0,
 'MN11': 3.0,
 'MN12': 2.0,
 'MN13': 8.0,
 'MN14': 9.0,
 'MN17': 26.0,
 'MN19': 7.0,
 'MN20': 4.0,
 'MN23': 11.0,
 'MN24': 10.0,
 'MN25': 40.0,
 'MN27': 3.0,
 'MN31': 2.0,
 'MN34': 2.0,
 'MN35': 1.0,
 'MN36': 9.0,
 'MN40': 2.0,
 'QN10': 1.0,
 'QN15': 2.0,
 'QN17': 3.0,
 'QN20': 0.0,
 'QN22': 1.0,
 'QN29': 11.0,
 'QN31': 6.0,
 'QN60': 5.0,
 'QN61': 12.0,
 'QN63': 5.0,
 'QN68': 3.0,
 'QN98': 3.0}

In [15]:
len(num_elev)

58

In [16]:
# And, once again, enrich the original neighborhood_data geojson with neighborhood_score:
for n in neighborhood_data['features']:
    n_id=n['properties']['NTACode']
    n['properties']['elevator_outage_score'] = num_elev.get(n_id,0)

#... and write it back out:
with open('num_elev.json','w') as outfile:
    json.dump(neighborhood_data,outfile)