In [None]:
"""
Imports
"""
import requests
import pandas as pd
import json
import time

In [None]:
"""Load Station List from XML"""
station_list_path = 'station_list.xml'
station_df = pd.read_xml(station_list_path, xpath='./wm:Stations/wm:Station', namespaces={'wm': 'http://www.wmata.com'})
station_codes = station_df['Code'].tolist()

In [None]:
number_of_stations = len(station_codes)
print(f"Number of stations: {number_of_stations}")

In [None]:
def send_request_retry(url, headers, max_retries=3, backoff_factor=1.0):
    for attempt in range(max_retries):
        time.sleep(0.33)  # To avoid hitting API rate limits
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying in {backoff_factor * (2 ** attempt)} seconds...")
            time.sleep(backoff_factor * (2 ** attempt))
    return response

In [None]:
"""Grab station distances from WMATA API"""
url = 'http://api.wmata.com/Rail.svc/json'
endpoint = '/jSrcStationToDstStationInfo?FromStationCode={}&ToStationCode={}'
with open('../../api_creds.json') as f:
    api_keys = json.loads(f.read())
    api_key = api_keys['primary_metro_api_key']

headers = {'api_key': api_key}
distance_data = pd.DataFrame(columns=['FromStationCode', 'ToStationCode', 'DistanceMiles', 'DistanceMinutes'])
num_stations = len(station_codes)
for i, from_code in enumerate(station_codes):
    print("Getting stations for ", from_code)
    if num_stations / (i+1) == 2:
        print("Taking a 20 second break to avoid API rate limits...")
        time.sleep(20)
    time.sleep(1)
    for j, to_code in enumerate(station_codes):
        if from_code != to_code:
            full_endpoint = endpoint.format(from_code, to_code)
            response = send_request_retry(url + full_endpoint, headers)
            if response.status_code == 200:
                data = response.json()['StationToStationInfos'][0]
                distance_info = {
                    'FromStationCode': [from_code],
                    'ToStationCode': [to_code],
                    'DistanceMiles': [data.get('CompositeMiles', None)],
                    'DistanceMinutes': [data.get('RailTime', None)]
                }
            else:
                print(f"Failed to retrieve data for {from_code} to {to_code}: ", response.text)
                distance_info = {
                    'FromStationCode': [from_code],
                    'ToStationCode': [to_code],
                    'DistanceMiles': [None],
                    'DistanceMinutes': [None]
                }
            
            distance_info = pd.DataFrame.from_dict(distance_info)
            distance_data = pd.concat([distance_data, distance_info], ignore_index=True)

distance_data.to_csv('station_distances.csv', index=False)

In [None]:
distance_data.to_csv('station_distances_p2.csv', index=False)

In [None]:
full_endpoint = endpoint.format("B01", "B02")
requests.get(url + full_endpoint, headers=headers)
print(response.json())

In [None]:
distance_data.to_csv('station_distances.csv', index=False)