## This notebook is to process NYCTaxiZone geojson

In [None]:
import networkx as nx
import shapely
import matplotlib.pyplot as plt
import geopandas as gpd
import tqdm
import googlemaps
import numpy as np
import pickle

In [None]:
API_KEY = 'AIzaSyB2gMJIVVvlpgRoGkHOzWqe_s6WcEXN9Eo'
gmaps = googlemaps.Client(key=API_KEY)

In [22]:
zones_gpd = 'data/NYC Taxi Zones/NYC Taxi Zones.geojson'
nycZones = gpd.read_file(zones_gpd)

In [None]:
testGeom = nycZones.loc[0]['geometry']
testGeomBuffed = testGeom.buffer(0.0005)

In [None]:
testGeomBuffed.exterior.xy

In [None]:
dir(testGeom)

In [None]:
for geom in testGeom.geoms:
    xs, ys = geom.exterior.xy
    plt.plot(xs, ys, color="blue")
    
b_xs, b_ys = testGeomBuffed.exterior.xy
plt.plot(b_xs, b_ys, color="red")
    
plt.show()

In [4]:
def Plot_Multi_Polygons(Geom, _color="blue", plt_show=False):
    for _geom in Geom.geoms:
        _xs, _ys = _geom.exterior.xy
        plt.plot(_xs, _ys, color=_color)
    if plt_show:
        plt.show()
        
def Plot_Polygon(polyGeom, _color="red", plt_show=False):
    _xs, _ys = polyGeom.exterior.xy
    plt.plot(_xs, _ys, color=_color)
    if plt_show:
        plt.show()
        
def Plot_Zone(_Geom, _color, plt_show=False):
    try:
        Plot_Multi_Polygons(_Geom, _color, plt_show)
    except AttributeError:
        Plot_Polygon(_Geom, _color, plt_show)

In [None]:
testGeom = nycZones.loc[2]['geometry']
testGeomBuffed = testGeom.buffer(0.0005)
testGeomToMatch = nycZones.loc[50]['geometry']
Plot_Zone(testGeomToMatch, "red")
Plot_Zone(testGeomBuffed, "blue")

intsec = testGeomBuffed.intersection(testGeomToMatch)
print (intsec.is_empty)

In [5]:
def Create_Proximity_Edges(zones_gpd):
    connectionList = []
    for i in tqdm.trange(len(zones_gpd)):
        currGeom = zones_gpd.loc[i]['geometry']
        currGeomBuffed = currGeom.buffer(0.0005)
        for j in range(len(zones_gpd)):
            if i == j:
                continue
            matchGeom = zones_gpd.loc[j]['geometry']
            intsec_ = currGeomBuffed.intersection(matchGeom)
            if not intsec_.is_empty:
                connectionList.append(tuple([i,j]))
    return connectionList
            

In [6]:
proxConn_ = Create_Proximity_Edges(nycZones)
proxConn_

100%|████████████████████████████████████████████████████████████████████████████████| 263/263 [00:07<00:00, 35.84it/s]


[(1, 32),
 (1, 136),
 (2, 31),
 (2, 50),
 (2, 82),
 (2, 183),
 (2, 184),
 (2, 242),
 (2, 256),
 (3, 73),
 (3, 151),
 (3, 221),
 (3, 232),
 (4, 86),
 (4, 99),
 (4, 203),
 (5, 115),
 (5, 118),
 (5, 213),
 (5, 223),
 (6, 149),
 (6, 176),
 (6, 192),
 (6, 208),
 (6, 222),
 (6, 225),
 (6, 260),
 (7, 176),
 (7, 225),
 (8, 20),
 (8, 69),
 (8, 98),
 (8, 123),
 (8, 171),
 (8, 191),
 (9, 40),
 (9, 44),
 (9, 132),
 (9, 166),
 (10, 206),
 (10, 214),
 (10, 215),
 (10, 217),
 (10, 218),
 (11, 16),
 (11, 18),
 (11, 67),
 (12, 13),
 (12, 87),
 (12, 261),
 (13, 12),
 (13, 230),
 (13, 261),
 (14, 23),
 (14, 46),
 (14, 94),
 (14, 140),
 (14, 174),
 (14, 240),
 (15, 28),
 (15, 39),
 (15, 51),
 (15, 64),
 (15, 97),
 (15, 106),
 (15, 180),
 (16, 11),
 (16, 67),
 (16, 226),
 (16, 227),
 (17, 20),
 (17, 171),
 (17, 255),
 (18, 11),
 (18, 24),
 (18, 25),
 (18, 67),
 (19, 99),
 (19, 118),
 (19, 155),
 (19, 188),
 (19, 254),
 (20, 8),
 (20, 17),
 (20, 63),
 (20, 98),
 (20, 171),
 (20, 175),
 (21, 36),
 (21, 48),


In [None]:
nycZones.loc[0]['geometry'].buffer(0.005).exterior.xy

In [None]:
lat_o = nycZones.loc[0]['geometry'].centroid.xy[1][0]
lng_o = nycZones.loc[0]['geometry'].centroid.xy[0][0]

lat_d = nycZones.loc[105]['geometry'].centroid.xy[1][0]
lng_d = nycZones.loc[105]['geometry'].centroid.xy[0][0]

In [None]:
dm = gmaps.distance_matrix((lat_d, lng_d), (lat_o, lng_o), mode='driving')
dm

In [None]:
timeString = dm['rows'][0]['elements'][0]['duration']['text']

In [None]:
extractedNums = [int(str_) for str_ in timeString.split() if str_.isdigit() ]
extractedNums

In [None]:
traveltimemins = sum([ extractedNums[-_i-1] * [60, 1][-_i-1]  for _i, timedigit in enumerate(extractedNums)  ])
traveltimemins

In [23]:
def get_minutes_from_timestring(timestring):
    extractedNums = [int(str_) for str_ in timestring.split() if str_.isdigit() ]
    traveltimemins = sum([ extractedNums[-_i-1] * [60, 1][-_i-1]  for _i, timedigit in enumerate(extractedNums)  ])    
    return traveltimemins

get_minutes_from_timestring('2 hours 29 mins')

149

In [57]:
def get_travel_time(zoneid_o, zoneid_d, mode='driving'):
    lat_o = nycZones.loc[zoneid_o]['geometry'].centroid.xy[1][0]
    lng_o = nycZones.loc[zoneid_o]['geometry'].centroid.xy[0][0]
    lat_d = nycZones.loc[zoneid_d]['geometry'].centroid.xy[1][0]
    lng_d = nycZones.loc[zoneid_d]['geometry'].centroid.xy[0][0]
    if mode == 'driving':
        dm = gmaps.distance_matrix((lat_d, lng_d), (lat_o, lng_o), mode='driving')
        stringOfTravelTime = dm['rows'][0]['elements'][0]['duration']['text']
        travelTimeMinutes = get_minutes_from_timestring(stringOfTravelTime)
    elif mode == 'transit':
#         dm_rail = gmaps.distance_matrix((lat_d, lng_d), (lat_o, lng_o), mode='transit', transit_mode='rail')
#         railTimeStr = dm_rail['rows'][0]['elements'][0]['duration']['text']
#         railMins = get_minutes_from_timestring(railTimeStr)
#         dm_bus = gmaps.distance_matrix((lat_d, lng_d), (lat_o, lng_o), mode='transit', transit_mode='bus')
#         busTimeStr = dm_bus['rows'][0]['elements'][0]['duration']['text']
#         busMins = get_minutes_from_timestring(busTimeStr)
#         travelTimeMinutes = min(railMins, busMins)
        dmTransit = gmaps.distance_matrix((lat_d, lng_d), (lat_o, lng_o), mode='transit')
        transitTimeStr = dmTransit['rows'][0]['elements'][0]['duration']['text']
        travelTimeMinutes = get_minutes_from_timestring(transitTimeStr)
    else:
        raise ('Invalid mode!')
    return travelTimeMinutes


In [56]:
''' plot distribution of inter-zone travel times by driving '''
get_travel_time(0, 38, 'transit')

2 hours 6 mins


126

In [39]:
def create_travel_time_matrix(mode='driving'):
    nZones = len(nycZones)
    matrixOfTravelTimes = np.full((nZones, nZones), float('inf'))
    for i in range(matrixOfTravelTimes.shape[0]):
        for j in tqdm.trange(matrixOfTravelTimes.shape[1], leave=False, desc='processing origin '+str(i)+"/" + str(nZones) ):
            if i == j:
                continue
            try:
                matrixOfTravelTimes[i][j] = get_travel_time(i, j, mode)
            except KeyError: 
                continue
    return matrixOfTravelTimes
    
    

In [46]:
drivingTravelTimeMatrix = create_travel_time_matrix(mode='driving')
drivingTravelTimeMatrix

                                                                                                                       

array([[inf, 55., 53., ..., 49., 28., 42.],
       [51., inf, 40., ..., 41., 41., 41.],
       [49., 41., inf, ..., 28., 36., 24.],
       ...,
       [46., 40., 26., ..., inf, 25., 19.],
       [39., 45., 36., ..., 26., inf, 16.],
       [44., 43., 25., ..., 21., 19., inf]])

In [50]:
pickle.dump(
    drivingTravelTimeMatrix,
    open('data/driving_travel_time','wb')
)

In [None]:
transitTravelTimeMatrix = create_travel_time_matrix(mode='transit')
transitTravelTimeMatrix

In [10]:
import re
from time import sleep
from tqdm import trange

class DescStr:
    def __init__(self):
        self._desc = ''

    def write(self, instr):
        self._desc += re.sub('\n|\x1b.*|\r', '', instr)

    def read(self):
        ret = self._desc
        self._desc = ''
        return ret

    def flush(self):
        pass


rng_a = trange(10)
desc = DescStr()
for x in rng_a:
    for y in trange(10, file=desc, desc="Y"):
        rng_a.set_description(desc.read())
        sleep(0.1)

Y:  90%|######### | 9/10 [00:00<00:00,  9.12it/s]: 100%|███████████████████████████████| 10/10 [00:10<00:00,  1.10s/it]
