#### Mapping the taxi zones to origin and destination lat-long for all permutations of the zones for getting the time and pricing information for walking and subway from HereMaps and the pricing info for FHVs from the Uber API.

In [151]:
import geopandas as gpd
import pandas as pd

In [152]:
zones = gpd.read_file('Data/Taxi_Zones/taxi_zones.shp')

In [154]:
zones.shape

(263, 7)

In [131]:
zones = zones.to_crs(epsg=4326)

In [132]:
zones.head()

Unnamed: 0,OBJECTID,Shape_Leng,Shape_Area,zone,LocationID,borough,geometry
0,1,0.116357,0.000782,Newark Airport,1,EWR,"POLYGON ((-74.18445299999996 40.6949959999999,..."
1,2,0.43347,0.004866,Jamaica Bay,2,Queens,(POLYGON ((-73.82337597260663 40.6389870471767...
2,3,0.084341,0.000314,Allerton/Pelham Gardens,3,Bronx,POLYGON ((-73.84792614099985 40.87134223399991...
3,4,0.043567,0.000112,Alphabet City,4,Manhattan,POLYGON ((-73.97177410965318 40.72582128133705...
4,5,0.092146,0.000498,Arden Heights,5,Staten Island,POLYGON ((-74.17421738099989 40.56256808599987...


In [133]:
centroids = zones.geometry.centroid

In [134]:
type(centroids[0])

shapely.geometry.point.Point

In [135]:
from shapely.geometry import Polygon

#### Mapping the zones to a representative point(which is better than the centroid because it always lies inside the polygon especially for polygons with a weird shape like the one below)

####                       Motivation for using a representative point

![Why_to use_representative point](Repr_Point.png)

In [136]:
zones['rps'] = zones['geometry'].representative_point()

In [137]:
type(zones.rps[0])

shapely.geometry.point.Point

#### Mapping the points to lat-long

In [138]:
zones['lon'] = zones.rps.apply(lambda p: p.x)
zones['lat'] = zones.rps.apply(lambda p: p.y)

In [139]:
zones.head()

Unnamed: 0,OBJECTID,Shape_Leng,Shape_Area,zone,LocationID,borough,geometry,rps,lon,lat
0,1,0.116357,0.000782,Newark Airport,1,EWR,"POLYGON ((-74.18445299999996 40.6949959999999,...",POINT (-74.17677842004377 40.68951499999988),-74.176778,40.689515
1,2,0.43347,0.004866,Jamaica Bay,2,Queens,(POLYGON ((-73.82337597260663 40.6389870471767...,POINT (-73.82614118027297 40.62572435264644),-73.826141,40.625724
2,3,0.084341,0.000314,Allerton/Pelham Gardens,3,Bronx,POLYGON ((-73.84792614099985 40.87134223399991...,POINT (-73.84947945770672 40.86587116199992),-73.849479,40.865871
3,4,0.043567,0.000112,Alphabet City,4,Manhattan,POLYGON ((-73.97177410965318 40.72582128133705...,POINT (-73.97702362880241 40.72415107077562),-73.977024,40.724151
4,5,0.092146,0.000498,Arden Heights,5,Staten Island,POLYGON ((-74.17421738099989 40.56256808599987...,POINT (-74.18993821824958 40.55033924549988),-74.189938,40.550339


In [140]:
# Keeping only relevant columns
zones_final = zones[['LocationID','lon','lat']]

In [155]:
zones_final.head()

Unnamed: 0,LocationID_pickup,lon_pickup,lat_pickup,LocationID_dropoff,lon_dropoff,lat_dropoff
1,1,-74.176778,40.689515,2,-73.826141,40.625724
2,1,-74.176778,40.689515,3,-73.849479,40.865871
3,1,-74.176778,40.689515,4,-73.977024,40.724151
4,1,-74.176778,40.689515,5,-74.189938,40.550339
5,1,-74.176778,40.689515,6,-74.067786,40.599053


#### Creating a new dataframe which has origin and destination lat-longs for all possible combinations

In [142]:
# Creating a new column on which the merge will take place
zones_final['key'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [143]:
# Creating a copy that will be merged into the original data frame
zones_final2 = zones_final.copy()

In [144]:
# Merging the 2 dataframes 
zones_final = pd.merge(zones_final,zones_final2,on = 'key', suffixes=('_left','_right'))

In [145]:
zones_final.head()

Unnamed: 0,LocationID_left,lon_left,lat_left,key,LocationID_right,lon_right,lat_right
0,1,-74.176778,40.689515,1,1,-74.176778,40.689515
1,1,-74.176778,40.689515,1,2,-73.826141,40.625724
2,1,-74.176778,40.689515,1,3,-73.849479,40.865871
3,1,-74.176778,40.689515,1,4,-73.977024,40.724151
4,1,-74.176778,40.689515,1,5,-74.189938,40.550339


In [146]:
# Dropping the rows with the same location ID as the origin and destination would be the same
zones_final = zones_final[zones_final.LocationID_left!=zones_final.LocationID_right].drop('key',axis=1)

In [147]:
# Renaming the columns to make more sense
columns = ['LocationID_pickup', 'lon_pickup', 'lat_pickup', 'LocationID_dropoff',
       'lon_dropoff', 'lat_dropoff']

In [148]:
zones_final.columns = columns

In [149]:
zones_final.head()

Unnamed: 0,LocationID_pickup,lon_pickup,lat_pickup,LocationID_dropoff,lon_dropoff,lat_dropoff
1,1,-74.176778,40.689515,2,-73.826141,40.625724
2,1,-74.176778,40.689515,3,-73.849479,40.865871
3,1,-74.176778,40.689515,4,-73.977024,40.724151
4,1,-74.176778,40.689515,5,-74.189938,40.550339
5,1,-74.176778,40.689515,6,-74.067786,40.599053


In [150]:
zones_final.to_csv('Data/Taxi_Zones/lat_long_for_API.csv')