In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import osmnx as ox
import itertools
import random
from tqdm import tqdm
from shapely.geometry import LineString, Point, MultiPoint, Polygon

#### Read in Datasets and Files

* entrances contains a shapefile of all of the entrances to metro stations

In [None]:
entrances = gpd.read_file("../../Data/RailStationEntrances2023.zip")

#### Preprocessing


* dissolve the metro entrances into multipoints of stations based on the station name
* Make the geometry of the multipoint the centroid
* Change the CRS 

In [None]:
stations = entrances.dissolve(by='STATIONNM')
stations['geometry'] = stations.centroid
stations=stations.to_crs("EPSG:4326")
stations.head(2)

* Create an OSMNX graph of the network that contains all of the stops in the DC metro system and reproject the graph into the coordinate system that matches the metro ststions
* Get the nearest nodes on the network to the metro station
* Create all of the possible combinations of station pairs

In [None]:
graph = ox.graph_from_bbox(39.2, 38.5, -76.7, -77.6 ,network_type="drive")
graph = ox.projection.project_graph(graph, to_crs='4326')

In [None]:
stations['nearest_node'] = ox.distance.nearest_nodes(graph, stations.geometry.x, stations.geometry.y)
stations['nearest_node'].head(2)

In [None]:
od_pairs = gpd.GeoDataFrame(list(itertools.combinations(stations.iterrows(), 2)), columns=['a', 'b'])
od_pairs['geometry_a'] = od_pairs['a'].apply(lambda x: x[1]['geometry'])
od_pairs['geometry_b'] = od_pairs['b'].apply(lambda x: x[1]['geometry'])

* Using the OD pairs, get the x and y coordinates at the origin (a) and destination (b) stations
* Using the x and y values, find the nearest node on the OSMNX graph

In [None]:
x_coordsa = []
y_coordsa = []
x_coordsb = []
y_coordsb = []

for i in range(len(od_pairs)):
  x_coorda = od_pairs.geometry_a[i].x
  y_coorda = od_pairs.geometry_a[i].y
  x_coordsa.append(x_coorda)
  y_coordsa.append(y_coorda)
  x_coordb = od_pairs.geometry_b[i].x
  y_coordb = od_pairs.geometry_b[i].y
  x_coordsb.append(x_coordb)
  y_coordsb.append(y_coordb)

od_pairs['xa'] = x_coordsa
od_pairs['ya'] = y_coordsa
od_pairs['xb'] = x_coordsb
od_pairs['yb'] = y_coordsb

In [None]:
od_pairs['node_a'] = ox.distance.nearest_nodes(graph, od_pairs.xa, od_pairs.ya)
od_pairs['node_b'] = ox.distance.nearest_nodes(graph, od_pairs.xb, od_pairs.yb)
od_pairs.rename(columns={'a':'origins', 'b':'destinations'}, inplace=True)
od_pairs.head(2)

* Make a copy of the dataframe and change the column names so that the origins are now the destinatinons (to get the travel times in the opposite direction)
* Organize the columns in the same order as in the original dataframe
* Concatenate od_pairs and od_pairs2 and reset the index

In [None]:
od_pairs2 = od_pairs.copy()
od_pairs2.rename(columns={'origins':'destinations2', 'destinations':'origins2', 'node_a': 'node_c', 'node_b': 'node_d' }, inplace=True)
od_pairs2.rename(columns={'destinations2':'destinations', 'origins2':'origins', 'node_c':'node_b', 'node_d':'node_a'}, inplace=True)
od_pairs2.head(2)

In [None]:
cols = od_pairs.columns.to_list()
od_pairs2 = od_pairs2[cols]

In [None]:
od_pairs3 = pd.concat([od_pairs2, od_pairs], sort=False)
od_pairs3 = od_pairs3.reset_index()
od_pairs3.head(2)

* Clean Up the names of the stations
* There are issues with the Herndon, VA station. Create a network graph for Herndon
* Replace Herndon's old node with the new node
* Check to make sure there are two records per 2 stations

In [None]:
#get names of stations
origins = []
destinations = []
for i in range(len(od_pairs3)):
  og = od_pairs3['origins'][i][0]
  dest = od_pairs3['destinations'][i][0]
  origins.append(og)
  destinations.append(dest)
od_pairs3['origins'] = origins
od_pairs3['destinations'] = destinations

In [None]:
herndon = ox.graph.graph_from_place("Herndon, Virginia", network_type='drive')

In [None]:
for i in range(len(od_pairs3)):
  if od_pairs3['destinations'].iloc[i] == 'HERNDON':
    od_pairs3['node_b'].iloc[i] = ox.distance.nearest_nodes(herndon, -77.3847417, 38.9524369)
  else:
    od_pairs3['node_b'].iloc[i] = od_pairs3['node_b'].iloc[i]
for i in range(len(od_pairs3)):
  if od_pairs3['origins'].iloc[i] == 'HERNDON':
    od_pairs3['node_a'].iloc[i] = ox.distance.nearest_nodes(herndon, -77.3847417, 38.9524369)
  else:
    od_pairs3['node_a'].iloc[i] = od_pairs3['node_a'].iloc[i]

od_pairs3.head(2)

In [None]:
od_pairs3[(od_pairs3['origins']== 'HERNDON'	) & (od_pairs3['destinations']== 'ANACOSTIA') | (od_pairs3['origins']== 'ANACOSTIA') & (od_pairs3['destinations']== 'HERNDON')]

# Calculate the travel distance and time for each OD pair

* Create a list of random numbers between 25 and 35 to represent average speeds
* For every record in the od_pairs dataframe calculate the shortest path between origin and destination
* Calculate the total distance for the route in miles
* Calculate the travel time for the route based on your average speed
* Add the distance and travel time to the OD pairs GeoDataFrame

In [None]:
random.seed(0)
random_numbers = [random.uniform(25, 35) for _ in range(9312)]

distances = []
travel_times = []

for i in tqdm(range(len(od_pairs3))):
  # Calculate the shortest path between origin and destination
    route = ox.shortest_path(graph, od_pairs3['node_a'].iloc[i],od_pairs3['node_b'].iloc[i], weight='length')

    if route is not None:
        # Calculate the total distance for the route
        distance = ox.utils_graph.route_to_gdf(graph, route, 'length')
        distance = distance['length']
        distance = sum(distance)/1609

        # Calculate the travel time for the route based on your average speed
        travel_time = distance / random_numbers[i]  # average speed based on random numbers

        distances.append(distance)
        travel_times.append(travel_time)
    else:
        distances.append(None)
        travel_times.append(None)

# Add the distance and travel time to the OD pairs GeoDataFrame
od_pairs3['distance'] = distances
od_pairs3['total_travel_time_hrs'] = travel_times
od_pairs3['speed_mph'] = random_numbers
od_pairs3.head()


* Filter wanted columns
* Divide total travel time by 60 to get the travel time in minutes
* Divide travel time in minutes by thr route distance to get travel minutes per mile
* Export


In [None]:
# filter wanted columns
od_pairs3 = od_pairs3[['origins', 'destinations', 'speed_mph', 'distance', 'total_travel_time']]
od_pairs3.head(2)

In [None]:
od_pairs3['total_travel_time_min'] = od_pairs3['total_travel_time_hrs'] * 60

In [None]:
od_pairs3['min_p_mile'] = od_pairs3['total_travel_time_min'] / od_pairs3['distance']

In [None]:
od_pairs3.to_excel("output/25_35_drive_times_distances.xlsx")