In [5]:
import pickle
import xml.etree.ElementTree as ET
import pandas as pd

In [6]:
import math

def distance(origin, destination):
    """
    Calculate the Haversine distance.

    Parameters
    ----------
    origin : tuple of float
        (lat, long)
    destination : tuple of float
        (lat, long)

    Returns
    -------
    distance_in_km : float

    Examples
    --------
    >>> origin = (48.1372, 11.5756)  # Munich
    >>> destination = (52.5186, 13.4083)  # Berlin
    >>> round(distance(origin, destination), 1)
    504.2
    """
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371  # km

    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
         math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
         math.sin(dlon / 2) * math.sin(dlon / 2))
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = radius * c

    return d

distance((52.2296756, 21.0122287), (52.406374, 16.9251681))

278.45817507541943

In [7]:
stations = pd.read_csv("data/Capital_Bikeshare_Locations.csv")
df = stations.copy()

In [8]:
df = df.rename(columns={
    'NAME': 'name',
    'CAPACITY': 'nbBikes', 
    'LATITUDE': 'lat',
    'LONGITUDE': 'long'
})

In [9]:
display(df.head())

Unnamed: 0,X,Y,name,STATION_TYPE,STATION_ID,STATION_STATUS,LAST_REPORTED,NUM_DOCKS_AVAILABLE,NUM_DOCKS_DISABLED,NUM_BIKES_AVAILABLE,...,EIGHTD_HAS_KEY_DISPENSER,nbBikes,RENTAL_METHODS,REGION_ID,REGION_NAME,GIS_ID,lat,long,GIS_LAST_MOD_DTTM,OBJECTID
0,-76.980708,38.972511,Riggs Rd & East West Hwy,classic,f0d65475-a505-474b-a40f-2488459105ac,active,2023/03/15 11:16:00+00,12,0,5,...,NO,17,"KEY,CREDITCARD",133,Prince George's County,,38.9725,-76.9807,2023/03/15 11:17:28+00,93817883
1,-77.026066,38.895921,10th & E St NW,classic,08253e7e-1f3f-11e7-bf6b-3863bb334450,active,2023/03/15 11:16:00+00,5,0,10,...,NO,15,"KEY,CREDITCARD",42,"Washington, DC",,38.895914,-77.026064,2023/03/15 11:17:28+00,93817884
2,-77.359717,38.957042,W&OD Trail & Explorer St/Reston Bus Station,classic,08263a37-1f3f-11e7-bf6b-3863bb334450,active,2023/03/15 11:16:03+00,8,0,9,...,NO,17,"KEY,CREDITCARD",104,"Fairfax, VA",,38.957037,-77.359718,2023/03/15 11:17:28+00,93817885
3,-77.044394,38.893898,Virginia Ave & C St NW,classic,0824f16b-1f3f-11e7-bf6b-3863bb334450,active,2023/03/15 11:16:08+00,6,0,9,...,NO,15,"KEY,CREDITCARD",42,"Washington, DC",,38.893888,-77.044393,2023/03/15 11:17:28+00,93817886
4,-76.981162,39.044955,Stewart & April,classic,52ba8179-5dda-404f-abba-796b3f645434,active,2023/03/15 11:16:05+00,15,0,4,...,NO,19,"KEY,CREDITCARD",44,"Montgomery County, MD (South)",,39.044944,-76.981155,2023/03/15 11:17:28+00,93817887


In [10]:
df[df['nbBikes'] == max(df['nbBikes'])]

Unnamed: 0,X,Y,name,STATION_TYPE,STATION_ID,STATION_STATUS,LAST_REPORTED,NUM_DOCKS_AVAILABLE,NUM_DOCKS_DISABLED,NUM_BIKES_AVAILABLE,...,EIGHTD_HAS_KEY_DISPENSER,nbBikes,RENTAL_METHODS,REGION_ID,REGION_NAME,GIS_ID,lat,long,GIS_LAST_MOD_DTTM,OBJECTID
184,-77.004936,38.896969,Columbus Circle / Union Station,classic,0824d0c6-1f3f-11e7-bf6b-3863bb334450,active,2023/03/15 11:16:49+00,1,0,54,...,NO,55,"KEY,CREDITCARD",42,"Washington, DC",,38.89696,-77.00493,2023/03/15 11:17:28+00,93818067


In [12]:
#df.CAPACITY.sum()

In [14]:
df.describe()

Unnamed: 0,X,Y,NUM_DOCKS_AVAILABLE,NUM_DOCKS_DISABLED,NUM_BIKES_AVAILABLE,NUM_EBIKES_AVAILABLE,NUM_BIKES_DISABLED,nbBikes,REGION_ID,GIS_ID,lat,long,OBJECTID
count,720.0,720.0,720.0,720.0,720.0,720.0,720.0,720.0,720.0,0.0,720.0,720.0,720.0
mean,-77.062205,38.91173,9.348611,0.008333,7.694444,0.380556,0.086111,17.188889,51.641667,,38.911722,-77.062203,93818240.0
std,0.090417,0.063192,5.799801,0.105152,5.852472,0.801362,0.322242,5.186952,26.199395,,0.063192,0.090416,207.9904
min,-77.368419,38.782642,0.0,0.0,0.0,0.0,0.0,9.0,40.0,,38.782633,-77.368416,93817880.0
25%,-77.088994,38.878876,5.0,0.0,3.0,0.0,0.0,15.0,42.0,,38.878866,-77.08899,93818060.0
50%,-77.043132,38.900802,9.0,0.0,7.0,0.0,0.0,15.0,42.0,,38.900796,-77.043128,93818240.0
75%,-77.008049,38.936241,13.0,0.0,11.0,1.0,0.0,19.0,43.0,,38.936234,-77.00805,93818420.0
max,-76.825536,39.125838,31.0,2.0,54.0,8.0,3.0,55.0,152.0,,39.125828,-76.825535,93818600.0


In [15]:
#df[df.name == "Greenbelt Station Parkway"]
df[df.name.str.contains("Largo")]

Unnamed: 0,X,Y,name,STATION_TYPE,STATION_ID,STATION_STATUS,LAST_REPORTED,NUM_DOCKS_AVAILABLE,NUM_DOCKS_DISABLED,NUM_BIKES_AVAILABLE,...,EIGHTD_HAS_KEY_DISPENSER,nbBikes,RENTAL_METHODS,REGION_ID,REGION_NAME,GIS_ID,lat,long,GIS_LAST_MOD_DTTM,OBJECTID
192,-76.844603,38.899821,Largo Town Center Metro,classic,29b65d73-fc99-42e3-9007-2175402a5c56,active,2023/03/15 11:09:44+00,8,0,8,...,NO,17,"KEY,CREDITCARD",133,Prince George's County,,38.899811,-76.844604,2023/03/15 11:17:28+00,93818075
260,-76.825536,38.889948,Largo Rd & Campus Way / Prince Georges's Comm Col,classic,5945c731-a300-46e6-8e51-3236ad1f65f2,active,2023/03/15 11:16:02+00,5,0,10,...,NO,15,"KEY,CREDITCARD",133,Prince George's County,,38.88994,-76.825535,2023/03/15 11:17:28+00,93818143


In [16]:
df[df.OBJECTID == 93818279]
 

Unnamed: 0,X,Y,name,STATION_TYPE,STATION_ID,STATION_STATUS,LAST_REPORTED,NUM_DOCKS_AVAILABLE,NUM_DOCKS_DISABLED,NUM_BIKES_AVAILABLE,...,EIGHTD_HAS_KEY_DISPENSER,nbBikes,RENTAL_METHODS,REGION_ID,REGION_NAME,GIS_ID,lat,long,GIS_LAST_MOD_DTTM,OBJECTID
396,-77.135273,38.881,Kennebec St & 11th St N,classic,405,active,2023/03/15 10:02:13+00,5,0,10,...,NO,15,"CREDITCARD,KEY",41,"Arlington, VA",,38.880992,-77.135271,2023/03/15 11:17:28+00,93818279


# Trainstations

In [17]:
# Set the path to the XML file
xml_path = 'trainstations.xml'
tree = ET.parse(xml_path)
root = tree.getroot()

# Extract the data for each station and store it in a list of dictionaries
data = []
#print(root.findall('*/*'))
print(root.tag)
for idx, station in enumerate(root.findall('*/*')):

    d = {
        'name': station.find('Name').text,
        'terminalName': station.find('Code').text,
        'lat': station.find('Lat').text,
        'long': station.find('Lon').text,
        #'nbBikes': ( int(station.find('nbBikes').text) + int(station.find('nbEmptyDocks').text) )
    }

    #print(data)
    #print(idx, d['name'])
    if not any(t['name'] == d['name'] for t in data):
        print("not exists")
        data.append(d)
    else:
        print("duplicate")
        


# Convert the list of dictionaries to a DataFrame
dft = pd.DataFrame(data)
print(dft)

StationsResp
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
duplicate
duplicate
not exists
not exists
duplicate
duplicate
duplicate
duplicate
duplicate
duplicate
not exists
not exists
not exists
not exists
duplicate
duplicate
duplicate
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
duplicate
not exists
not exists
not exists
not exists
not exists
duplicate
duplicate
duplicate
duplicate
duplicate
duplicate
not exists
not exists
duplicate
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not e

In [18]:
import pandas as pd
import numpy as np
import plotly.express as px

In [19]:
fig = px.density_mapbox(df, lat='lat', lon='long', z='nbBikes',
                        mapbox_style="stamen-terrain", width=1500, height=1000)
 
fig

In [20]:
pickle.dump(df, open( "stations2.pkl", "wb" ) )