### New York
latitude = 40
<br>
longitude = -74

In [1]:
import networkx as nx
import osmnx as ox
import pandas as pd

%matplotlib inline
ox.config(log_console=True)
ox.__version__

'1.1.0'

### download/model a street network for New York City from OSMnx

In [2]:
G = ox.graph_from_place("New York, New York, USA", network_type="drive")
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)

gdf_nodes = gdf_nodes.rename(columns = {'y':'latitude', 'x':'longitude'})

gdf_nodes.head()

Unnamed: 0_level_0,latitude,longitude,ref,highway,street_count,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
39076461,40.786409,-73.794627,33,motorway_junction,3,POINT (-73.79463 40.78641)
39076490,40.762429,-73.757091,31W,motorway_junction,3,POINT (-73.75709 40.76243)
39076504,40.753467,-73.744164,30W,motorway_junction,3,POINT (-73.74416 40.75347)
42421728,40.798048,-73.960044,,traffic_signals,3,POINT (-73.96004 40.79805)
42421731,40.798645,-73.961474,,traffic_signals,4,POINT (-73.96147 40.79865)


### Create a new dataframe keeping only the columns we care about

In [3]:
gdf_nodes['osmid'] = gdf_nodes.index
nodes = gdf_nodes[['osmid', 'latitude', 'longitude']]

nodes.head()

Unnamed: 0_level_0,osmid,latitude,longitude
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
39076461,39076461,40.786409,-73.794627
39076490,39076490,40.762429,-73.757091
39076504,39076504,40.753467,-73.744164
42421728,42421728,40.798048,-73.960044
42421731,42421731,40.798645,-73.961474


### Try resetting the index to see if that fixes the merge issue below

(it doesn't)

In [4]:
nodes.reset_index(drop=True, inplace=True)
nodes.head()

Unnamed: 0,osmid,latitude,longitude
0,39076461,40.786409,-73.794627
1,39076490,40.762429,-73.757091
2,39076504,40.753467,-73.744164
3,42421728,40.798048,-73.960044
4,42421731,40.798645,-73.961474


In [5]:
dataFileName = "fullnycdata.csv"

In [6]:
csvdata = pd.read_csv(dataFileName, sep =",")

csvdata.head()

Unnamed: 0,medallion,hack_license,vendor_id,rate_code,store_and_fwd_flag,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
0,89D227B655E5C82AECF13C3F540D4CF4,BA96DE419E711691B9445D6A6307C170,CMT,1,N,01-01-13 15:11,01-01-13 15:18,4,-73.978165,40.757977,-73.989838,40.751171
1,0BD7C8F5BA12B88E0B67BED28BEA73D8,9FD8F69F0804BDB5549F40E9DA1BE472,CMT,1,N,06-01-13 0:18,06-01-13 0:22,1,-74.006683,40.731781,-73.994499,40.75066
2,0BD7C8F5BA12B88E0B67BED28BEA73D8,9FD8F69F0804BDB5549F40E9DA1BE472,CMT,1,N,05-01-13 18:49,05-01-13 18:54,1,-74.004707,40.73777,-74.009834,40.726002
3,DFD2202EE08F7A8DC9A57B02ACB81FE2,51EE87E3205C985EF8431D850C786310,CMT,1,N,07-01-13 23:54,07-01-13 23:58,2,-73.974602,40.759945,-73.984734,40.759388
4,DFD2202EE08F7A8DC9A57B02ACB81FE2,51EE87E3205C985EF8431D850C786310,CMT,1,N,07-01-13 23:25,07-01-13 23:34,1,-73.97625,40.748528,-74.002586,40.747868


### Create a new dataframe keeping only the columns we care about

In [7]:
dropoffdf = csvdata[['hack_license', 'dropoff_datetime','passenger_count','dropoff_latitude', 'dropoff_longitude']]

dropoffdf.head()

Unnamed: 0,hack_license,dropoff_datetime,passenger_count,dropoff_latitude,dropoff_longitude
0,BA96DE419E711691B9445D6A6307C170,01-01-13 15:18,4,40.751171,-73.989838
1,9FD8F69F0804BDB5549F40E9DA1BE472,06-01-13 0:22,1,40.75066,-73.994499
2,9FD8F69F0804BDB5549F40E9DA1BE472,05-01-13 18:54,1,40.726002,-74.009834
3,51EE87E3205C985EF8431D850C786310,07-01-13 23:58,2,40.759388,-73.984734
4,51EE87E3205C985EF8431D850C786310,07-01-13 23:34,1,40.747868,-74.002586


In [8]:
nodes.shape

(55314, 3)

In [9]:
nodeid = ox.distance.nearest_nodes(G, 40.757977, -73.978165, return_dist=False)
print(nodeid)

261367744


In [10]:
def latlongtonode(latitude, longitude):
    return ox.distance.nearest_nodes(G, latitude, longitude, return_dist=False)

In [11]:
dropoffdf['dropoff_node'] = latlongtonode(dropoffdf['dropoff_latitude'], dropoffdf['dropoff_longitude'])
dropoffdf.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dropoffdf['dropoff_node'] = latlongtonode(dropoffdf['dropoff_latitude'], dropoffdf['dropoff_longitude'])


Unnamed: 0,hack_license,dropoff_datetime,passenger_count,dropoff_latitude,dropoff_longitude,dropoff_node
0,BA96DE419E711691B9445D6A6307C170,01-01-13 15:18,4,40.751171,-73.989838,261367744
1,9FD8F69F0804BDB5549F40E9DA1BE472,06-01-13 0:22,1,40.75066,-73.994499,261367744
2,9FD8F69F0804BDB5549F40E9DA1BE472,05-01-13 18:54,1,40.726002,-74.009834,261367744
3,51EE87E3205C985EF8431D850C786310,07-01-13 23:58,2,40.759388,-73.984734,261367744
4,51EE87E3205C985EF8431D850C786310,07-01-13 23:34,1,40.747868,-74.002586,261367744


In [13]:
dropoffdf['dropoff_node'] = ox.distance.nearest_nodes(G, 
                       dropoffdf['dropoff_latitude'], dropoffdf['dropoff_longitude'], return_dist=False)

dropoffdf.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dropoffdf['dropoff_node'] = ox.distance.nearest_nodes(G,


Unnamed: 0,hack_license,dropoff_datetime,passenger_count,dropoff_latitude,dropoff_longitude,dropoff_node
0,BA96DE419E711691B9445D6A6307C170,01-01-13 15:18,4,40.751171,-73.989838,261367744
1,9FD8F69F0804BDB5549F40E9DA1BE472,06-01-13 0:22,1,40.75066,-73.994499,261367744
2,9FD8F69F0804BDB5549F40E9DA1BE472,05-01-13 18:54,1,40.726002,-74.009834,261367744
3,51EE87E3205C985EF8431D850C786310,07-01-13 23:58,2,40.759388,-73.984734,261367744
4,51EE87E3205C985EF8431D850C786310,07-01-13 23:34,1,40.747868,-74.002586,261367744


In [14]:
print(dropoffdf.nunique())

hack_license          9990
dropoff_datetime      1546
passenger_count          7
dropoff_latitude     27936
dropoff_longitude    14781
dropoff_node             6
dtype: int64


In [15]:
type(dropoffdf['dropoff_latitude'])

pandas.core.series.Series

In [16]:
# dataFileName = "Sample NYC Data subset rows.csv"

# csvdata = pd.read_csv(dataFileName, sep =",")

# csvdata.shape

In [17]:
# pickupdf = csvdata[['hack_license', 'pickup_datetime','passenger_count','pickup_latitude', 'pickup_longitude']]

# pickupdf.head()

In [18]:
dropoffdf.shape

(99999, 6)

In [19]:
lats = dropoffdf['dropoff_latitude'].tolist()
longs = dropoffdf['dropoff_longitude'].tolist()

In [20]:
def latlongtonode(latitude, longitude):
    return ox.distance.nearest_nodes(G, latitude, longitude, return_dist=False)

In [21]:
nodeids = latlongtonode(lats, longs)

In [22]:
nodeset = set(nodeids)
print(nodeset)

{261367744, 42850178, 42956520, 42850190, 42750232, 43018687}
