In [None]:
# Read me: 
"""
This Notebook uses 'yellow_tripdata_2013-05.csv' dataset which 
represent real taxi calls over manhetten recorded for may 2013
You can download it here: https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-05.csv

How to use : 
1) Download: the csv from the link above.
2) Location: The file must be in the same directory as this notebook.
3) Run: Run all cell consecutively.


Returned values:
1) "clean_2013.csv" : all valid trips from 0000 hours Sunday, May 5,2013, to 2359 hours, Saturday May 11, 2013.
    This used to create "Request nodes" in the project , and containes 4 columns:
    
    a)   request_Index: Natural .
    b)   pickup_datetime : pd.Timestamp .
    c&d) src and dst: both : Natural .
    
    !!!! IMPORTANT !!!!
   (src and dst node ID's are matching this graph: 
    G = ox.graph_from_place('Manhattan, New York City, New York, USA', network_type='drive'))
    
2) "all_paths_nyc.pickle" : Dictionary, keyed by source and target, of shortest paths.
    display the path as a list of node ID's.


3) "all_travel_times.pickle ": (source, dictionary) iterator with dictionary keyed by target and shortest path length as the key value.

How to use : 
1) Download: the csv from the link above.
2) Location: The file must be in the same directory as this notebook.
3) Run: Run all cell consecutively.


Enjoy (=
"""


In [1]:
import pandas as pd
import collections
import networkx as nx
import osmnx as ox
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
ox.config(use_cache=True, log_console=True)
ox.__version__
import pandas as pd
import datetime
from datetime import datetime
from datetime import timedelta
import pickle
from platform import python_version



In [2]:
print(python_version())

3.8.12


In [None]:
# Making request from real nyc taxi data , for Simulation

In [None]:
# Getting the Data
df2013 = pd.read_csv('yellow_tripdata_2013-05.csv')
# Sorting by pickupo time
df2013 = df2013.sort_values(by=['pickup_datetime'])
print(df2013.head())

In [None]:
# Parser func
def str_to_time(s):
    t = pd.Timestamp(s)
    tt = t.to_pydatetime()
    return tt
    
def str_to_time_list(s):
    time_list = []
    for t_string in s :
        time_list.append(str_to_time(t_string))
    return time_list

In [None]:
# Parse pickup_datetime string to pd.Timestamp()
df2013['pickup_datetime'] =str_to_time_list(df2013['pickup_datetime'])
df2013['dropoff_datetime'] =str_to_time_list(df2013['dropoff_datetime']) 

In [None]:
# Filter only the requests from 0000 hours Sunday, May 5,2013, to 2359 hours, Saturday May 11, 2013,
df2013 = df2013[df2013.pickup_datetime > pd.Timestamp(2013, 5, 5, 0)  ]
df2013 = df2013[df2013.dropoff_datetime < pd.Timestamp(2013, 5, 11, 23,59) ]


In [None]:
# Drops rows with bad data (bad ccordinates)

df2013 = df2013[df2013.pickup_longitude != 0]
df2013 = df2013[df2013.pickup_latitude != 0]
df2013 = df2013[df2013.dropoff_longitude != 0]
df2013 = df2013[df2013.dropoff_latitude != 0]
df2013 = df2013[df2013.passenger_count != 0]

df2013.dropna()

In [None]:
#Drop out of bound pick up and dropoff
df2013 = df2013[df2013.pickup_longitude > -74.022 ]
df2013 = df2013[df2013.pickup_latitude > 40.69]
df2013 = df2013[df2013.dropoff_longitude > -74.022]
df2013 = df2013[df2013.dropoff_latitude > 40.69]
df2013 = df2013[df2013.pickup_longitude < -73.906 ]
df2013 = df2013[df2013.pickup_latitude < 40.88]
df2013 = df2013[df2013.dropoff_longitude < -73.906]
df2013 = df2013[df2013.dropoff_latitude < 40.88]


In [None]:
# Drop unwanted columns
df2013=df2013.drop(columns=['vendor_id','trip_distance','rate_code','store_and_fwd_flag','payment_type','fare_amount','surcharge','mta_tax','tip_amount','tolls_amount','total_amount','passenger_count'])

In [None]:
df2013.head()

In [None]:
df2013.tail()

In [3]:
# New York map for processing
G = ox.graph_from_place('Manhattan, New York City, New York, USA', network_type='drive')
print(type(G))
# fig, ax = ox.plot_graph(G, node_zorder=2, node_color='w', bgcolor='k')


<class 'networkx.classes.multidigraph.MultiDiGraph'>


In [3]:
# impute missing edge speeds then calculate edge travel times
G = ox.add_edge_speeds(G)
G = ox.add_edge_travel_times(G)

In [None]:
ox.stats.basic_stats(G)

In [4]:
if 370897167 in G.nodes():
    print('370897167 in graph')
else:
    print('370897167 not in graph')

370897167 not in graph


In [8]:
nx.__version__

'2.6.3'

In [9]:
ox.stats.basic_stats(G)

{'n': 4601,
 'm': 9903,
 'k_avg': 4.30471636600739,
 'edge_length_total': 1174033.5900000012,
 'edge_length_avg': 118.55332626476837,
 'streets_per_node_avg': 3.5544446859378396,
 'streets_per_node_counts': {0: 0,
  1: 101,
  2: 28,
  3: 1767,
  4: 2635,
  5: 64,
  6: 6},
 'streets_per_node_proportions': {0: 0.0,
  1: 0.021951749619647902,
  2: 0.006085633557922191,
  3: 0.38404694631601827,
  4: 0.5727015866116062,
  5: 0.013910019560965008,
  6: 0.0013040643338404696},
 'intersection_count': 4500,
 'street_length_total': 988419.4999999977,
 'street_segment_count': 8159,
 'street_length_avg': 121.14468684887825,
 'circuity_avg': 1.021382666104237,
 'self_loop_proportion': 0.0008579482779752421}

In [15]:
map_graph = ox.graph_from_place('Manhattan, New York City, New York, USA', network_type='drive')
if 370897167 in map_graph.nodes():
    print('370897167 in map graph')
else:
    print('370897167 not here bitch')

370897167 not here bitch


In [16]:
ox.stats.basic_stats(map_graph)

{'n': 4601,
 'm': 9903,
 'k_avg': 4.30471636600739,
 'edge_length_total': 1174033.5900000012,
 'edge_length_avg': 118.55332626476837,
 'streets_per_node_avg': 3.5544446859378396,
 'streets_per_node_counts': {0: 0,
  1: 101,
  2: 28,
  3: 1767,
  4: 2635,
  5: 64,
  6: 6},
 'streets_per_node_proportions': {0: 0.0,
  1: 0.021951749619647902,
  2: 0.006085633557922191,
  3: 0.38404694631601827,
  4: 0.5727015866116062,
  5: 0.013910019560965008,
  6: 0.0013040643338404696},
 'intersection_count': 4500,
 'street_length_total': 988419.4999999977,
 'street_segment_count': 8159,
 'street_length_avg': 121.14468684887825,
 'circuity_avg': 1.021382666104237,
 'self_loop_proportion': 0.0008579482779752421}

In [None]:
# Adding df2013['src'] & df2013['dst']
# get the nearest network nodes to two points
df2013['src'] = ox.distance.nearest_nodes(G, df2013['pickup_longitude'], df2013['pickup_latitude']) 
df2013['dst'] = ox.distance.nearest_nodes(G, df2013['dropoff_longitude'], df2013['dropoff_latitude'])

In [None]:
df2013.head()

In [None]:
# Drop unwanted columns
df2013=df2013.drop(columns=['pickup_longitude','pickup_latitude','dropoff_longitude','dropoff_latitude','dropoff_datetime'])

In [None]:
# Checking if every src node and every dst node are indeed in our graph G
validity = True
graph_nodes = G.nodes()
for s_id in df2013['src']:
    if s_id not in graph_nodes:
        validity = Flase  
    
for s_id in df2013['dst']:
    if s_id not in graph_nodes:
        validity = Flase
if validity:
    print('src and dst are valid')
else:
    print('There is a bad src or dst')

In [None]:
graph_nodes = G.nodes()
if 

In [None]:
# Saving the df as csv
df2013.to_csv('clean_2013.csv' , index=True )

In [None]:
if 1775176474 in G.nodes():
    print("yeah")
else:
    print("nah")

In [None]:
temp = nx.single_source_dijkstra_path(G, 42440754, weight='travel_times')


In [None]:
print(temp)

In [None]:
temp2 = nx.single_source_dijkstra_path(G, 42440754, weight='travel_times')

In [None]:
print(temp2)

In [7]:
if 370897167 in G.nodes():
    print("yeah")
else:
    print("nah")

nah


In [4]:
temp3 = nx.shortest_path(G, 370892861, 370897167)

NodeNotFound: Either source 370892861 or target 370897167 is not in G

In [None]:
print(temp3)

In [None]:
temp4 = nx.shortest_path(G, 42442469, 7684225787)

In [None]:
# Shortest paths map generator

In [None]:

#Testing if shortest paths match
#route = nx.shortest_path(G, 1773121034, 42445413, 'travel_time')
#print(route)
#route_map = ox.plot_route_folium(G, route)

In [None]:
#travel_times = dict(nx.all_pairs_bellman_ford_path_length(G,weight='travel_times'))

In [None]:
#path = dict(nx.all_pairs_dijkstra_path(G),weight='travel_times')


In [None]:
#print(path[1773121034][42445413])
#print(type(path))

In [None]:
#Saving the path map
#with open('all_paths_nyc.pickle ', 'wb') as handle:
  #  pickle.dump(path, handle, protocol=pickle.HIGHEST_PROTOCOL)



In [None]:
#Saving the travel_times map
#with open('all_travel_times.pickle ', 'wb') as handle:
   # pickle.dump(travel_times, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
#with open('all_paths_nyc.pickle ', 'rb') as handle:
   # b = pickle.load(handle)
