In [5]:
import pandas as pd
import json

# Specify the path
file_path = "station_information.json"

# Read the data
with open(file_path) as file:
    data = json.load(file)

# data is a dictionary with keys ['data', 'last_updated', 'ttl', 'version']
data = data['data']
# data['data'] is a dictionary with the single key ['stations']
data = data['stations']
# data['stations'] is a list of dictionaries, where each dict is a station
fields = data[0].keys()


print(f"There are {len(data)} stations in NYC.")
print(f"Information available for each station:")
print(fields)

There are 2209 stations in NYC.
Information available for each station:
dict_keys(['external_id', 'region_id', 'lon', 'capacity', 'eightd_station_services', 'has_kiosk', 'name', 'electric_bike_surcharge_waiver', 'rental_methods', 'rental_uris', 'short_name', 'station_type', 'lat', 'station_id', 'eightd_has_key_dispenser'])


In [6]:
from helper import Station

stations = []

# The fields that matter for our simulation
interesting_fields = ['station_id', 'name', 'lat', 'lon', 'capacity', 'short_name']

# Extract the fields that are provided
for station_info in data:
    info_array = [station_info[field] for field in interesting_fields]
    stations.append(Station(*info_array))

def find_first_with_attr(attr, value):
    """
    Returns the first station that has the given attribute value
    """
    for station in stations:
        if getattr(station, attr) == value:
            return station
    return None

In [8]:
print([station.short_name for station in stations])

['5216.06', '5430.10', '7116.04', '4962.02', '5167.06', '5914.08', '7840.11', '6854.05', '8528.05', '8496.01', '4862.04', '6524.09', '5794.05', '5321.08', '5340.01', '8717.07', '3428.04', '7504.19', '4982.02', '4060.09', '8163.05', '8016.07', '5288.09', '7740.08', '6255.02', '4971.01', '7844.05', '8319.01', '7032.06', '5225.02', '7146.04', '8428.05', '6529.02', '4474.01', '7823.03', '4614.04', '6659.01', '8320.01', '7327.01', '4476.03', '7286.02', '3845.05', '7898.03', '8054.04', '7047.07', '4953.04', '6227.02', '8191.01', '3865.05', '7018.01', '5540.06', '6433.01', '7910.10', '7901.04', '4968.03', '3246.06', '8430.03', '4470.09', '4990.02', '6230.02', '4199.13', '4072.02', '6885.01', '4371.01', '3593.01', '8586.09', '5181.04', '7057.07', '8665.09', '6836.07', '5584.04', '3704.01', '6322.01', '7293.10', '3408.05', '6240.05', '6070.05', '7883.09', '8778.01', '8005.08', '7821.01', '8555.05', '7983.02', '5388.01', '8033.09', '8752.01', '6789.20', '6981.16', '4680.05', '8472.06', '4792.03'

In [8]:
import pickle

try:
    with open('station_adjacency.pickle', 'rb') as file:
        adj = pickle.load(file)
except:
    adj = { st.id: [] for st in stations }

    for ix1,station1 in enumerate(stations):
        st1 = station1.id
        for station2 in stations[ix1+1:]:
            st2 = station2.id
            if station1.distance(station2) < 350:
                adj[st1].append(st2)
                adj[st2].append(st1)
    
    with open('station_adjacency.pickle', 'wb') as file:
        pickle.dump(adj, file)

In [9]:
from helper import Dfs

connected_component = (0,-1)
dfs = Dfs(adj) 

for id in adj:
    size_my_component = dfs.run_dfs(id)
    connected_component = max(connected_component, (size_my_component, id))

print(f"The maximum connected component has size {connected_component[0]}")

The maximum connected component has size 1040
