# Load Trip Dataset

In [25]:
import pandas as pd

# Load csv
df = pd.read_csv('gkne-dk5s.csv')

In [26]:
# remove null values at lat and lon
df = df.dropna(subset=['dropoff_longitude', 'dropoff_latitude', 'pickup_latitude', 'pickup_longitude'])

# Obtain Road Network

In [27]:
import osmnx as ox
# Download the road network for New York City
G = ox.graph_from_place('Manhattan, New York City, New York, USA', network_type='drive')
G_proj = ox.project_graph(G, to_latlong=True)

In [28]:
#len(G_proj.nodes), len(G_proj.edges)

In [29]:
# Check the first few edges of the projected graph
# list(G_proj.edges(data=True))[0:5]

# Add Set Speed Limit for Simple Fastest Route Recovery

In [30]:
#set a fixed speed limit for all edges based on the road type 
dict_speed = {
    'motorway': 70,
    'trunk': 60,
    'primary': 50,
    'secondary': 40,
    'tertiary': 30,
    'residential': 20,
    'unclassified': 15,
    'secondary_link': 30,
    'primary_link': 40,
    'trunk_link': 50,
    'motorway_link': 60,
}

In [31]:
#get an estimated travel time for each edge at G_proj, each junction will take 5 seconds to transition
for u, v, data in G_proj.edges(data=True):
    road_type = data.get('highway', 'residential')  # default to residential if not specified
    # in case it's a list, get the first type
    if isinstance(road_type, list):
        road_type = road_type[0]
    speed_limit = dict_speed.get(road_type, 20)  # default speed limit if road type not found
    data['speed_limit'] = speed_limit
    data['travel_time'] = data['length'] / (speed_limit * 1000 / 3600) + 2  # convert speed to m/s and calculate time in seconds, add 2 seconds for stop time for each edge transition

# Pickle Projected Network

In [32]:
import pickle
with open('g_proj.pkl', 'wb') as f:
    pickle.dump(G_proj, f)
with open('g.pkl', 'wb') as f:
    pickle.dump(G, f)