In [2]:
import pickle
import xml.etree.ElementTree as ET
import urllib.request
import pandas as pd

In [24]:
# Set the path to the XML file
xml_path = 'https://feeds.capitalbikeshare.com/stations/stations.xml'
tree = ET.parse(urllib.request.urlopen(xml_path))
root = tree.getroot()

# Extract the data for each station and store it in a list of dictionaries
data = []
for station in root.findall('station'):
    d = {
        'name': station.find('name').text,
        'terminalName': station.find('terminalName').text,
        'lat': station.find('lat').text,
        'long': station.find('long').text,
        'nbBikes': ( int(station.find('nbBikes').text) + int(station.find('nbEmptyDocks').text) )
    }
    data.append(d)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Convert lat and long columns to numeric values
df['lat'] = pd.to_numeric(df['lat'])
df['long'] = pd.to_numeric(df['long'])

# Convert nbBikes and nbEmptyDocks columns to integer values
df['nbBikes'] = pd.to_numeric(df['nbBikes'], errors='coerce').fillna(0).astype(int)


In [4]:
display(df.head())

Unnamed: 0,name,terminalName,lat,long,nbBikes
0,Eads St & 15th St S,31000,38.858971,-77.05323,15
1,18th St & S Eads St,31001,38.85725,-77.05332,9
2,Crystal Dr & 20th St S,31002,38.856425,-77.049232,15
3,Crystal Dr & 15th St S,31003,38.86017,-77.049593,10
4,Aurora Hills Cmty Ctr / 18th St & S Hayes St,31004,38.857866,-77.05949,11


In [25]:
df[df['nbBikes'] == max(df['nbBikes'])]

Unnamed: 0,name,terminalName,lat,long,nbBikes
110,Columbus Circle / Union Station,31623,38.89696,-77.00493,54


In [26]:
df.nbBikes.sum()

10044

In [27]:
# df=df.append({'name': "Test", 'terminalName': "11111", 'lat':39.000000, 'long':-76.91000, 'nbBikes':999 }, ignore_index=True)
#df[df.name=="Test"]
df

Unnamed: 0,name,terminalName,lat,long,nbBikes
0,Eads St & 15th St S,31000,38.858971,-77.053230,15
1,18th St & S Eads St,31001,38.857250,-77.053320,9
2,Crystal Dr & 20th St S,31002,38.856425,-77.049232,15
3,Crystal Dr & 15th St S,31003,38.860170,-77.049593,10
4,Aurora Hills Cmty Ctr / 18th St & S Hayes St,31004,38.857866,-77.059490,11
...,...,...,...,...,...
611,Fern St & Army Navy Dr,31936,38.864838,-77.056873,15
612,Ft Myer Dr & Arlington Blvd,31937,38.890863,-77.074893,15
613,23rd St S & Hayes St,31938,38.853002,-77.059496,14
614,Wilson Blvd & N Oak St,31939,38.895068,-77.073860,17


In [8]:
df.describe()

Unnamed: 0,lat,long,nbBikes
count,617.0,617.0,617.0
mean,38.915068,-77.055721,17.897893
std,0.063771,0.079524,39.910667
min,38.782633,-77.368416,2.0
25%,38.882788,-77.084636,13.0
50%,38.90093,-77.041779,15.0
75%,38.9375,-77.008133,19.0
max,39.123513,-76.825535,999.0


In [30]:
#df[df.name == "Greenbelt Station Parkway"]
df[df.name.str.contains("Solutions")]

Unnamed: 0,name,terminalName,lat,long,nbBikes


In [29]:
df['terminalName'].values

array(['31000', '31001', '31002', '31003', '31004', '31005', '31006',
       '31007', '31009', '31010', '31011', '31012', '31100', '31101',
       '31102', '31103', '31104', '31105', '31106', '31107', '31108',
       '31201', '31202', '31203', '31204', '31205', '31400', '31401',
       '31502', '31600', '31601', '31602', '31700', '31305', '31702',
       '31703', '31704', '31801', '31802', '31206', '31500', '31111',
       '31207', '31209', '31110', '31109', '31013', '31208', '31200',
       '31603', '31212', '31213', '31604', '31605', '31606', '31607',
       '31214', '31300', '31503', '31608', '31301', '31302', '31402',
       '31804', '31805', '31610', '31216', '31705', '31217', '31215',
       '31501', '31220', '31218', '31219', '31211', '31613', '31221',
       '31303', '31611', '31620', '31222', '31223', '31112', '31224',
       '31225', '31609', '31612', '31226', '31227', '31228', '31504',
       '31505', '31615', '31616', '31617', '31618', '31619', '31701',
       '31622', '312

# Trainstations

In [119]:
# Set the path to the XML file
xml_path = 'trainstations.xml'
tree = ET.parse(xml_path)
root = tree.getroot()

# Extract the data for each station and store it in a list of dictionaries
data = []
#print(root.findall('*/*'))
print(root.tag)
for idx, station in enumerate(root.findall('*/*')):

    d = {
        'name': station.find('Name').text,
        'terminalName': station.find('Code').text,
        'lat': station.find('Lat').text,
        'long': station.find('Lon').text,
        #'nbBikes': ( int(station.find('nbBikes').text) + int(station.find('nbEmptyDocks').text) )
    }

    #print(data)
    #print(idx, d['name'])
    if not any(t['name'] == d['name'] for t in data):
        print("not exists")
        data.append(d)
    else:
        print("duplicate")
        
# Convert the list of dictionaries to a DataFrame
dft = pd.DataFrame(data)
print(dft)

StationsResp
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
duplicate
duplicate
not exists
not exists
duplicate
duplicate
duplicate
duplicate
duplicate
duplicate
not exists
not exists
not exists
not exists
duplicate
duplicate
duplicate
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
duplicate
not exists
not exists
not exists
not exists
not exists
duplicate
duplicate
duplicate
duplicate
duplicate
duplicate
not exists
not exists
duplicate
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not exists
not e

In [26]:
import pandas as pd
import numpy as np
import plotly.express as px

In [129]:
fig = px.density_mapbox(df, lat='lat', lon='long', z='nbBikes',
                        mapbox_style="stamen-terrain", width=1500, height=1500)
 
fig

In [4]:
#pickle.dump(df, open( "stations.pkl", "wb" ) )