In [None]:
import pandas as pd
import datetime as dt 
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point
import requests

In [None]:
def main_request(base_url = 'https://gbfs.capitalbikeshare.com/gbfs/gbfs.json'):

    r = requests.get(base_url)
    data = r.json()

    return data


def station_info(data):
    
    r = requests.get(data['data']['en']['feeds'][1]['url'])
    station_info = r.json()

    return station_info


def station_status(data):

    r = requests.get(data['data']['en']['feeds'][2]['url'])
    station_status = r.json()

    return station_status


def parse_station_status(station_status_json):

    station_status_list = []
    last_update = station_status_json['last_updated']

    for station in range(len(station_status_json['data']['stations'])):
        try:
            station_status = station_status_json['data']['stations'][station]['station_status']
            last_reported = station_status_json['data']['stations'][station]['last_reported']
            num_bikes_available = station_status_json['data']['stations'][station]['num_bikes_available']
            available_scooters = station_status_json['data']['stations'][station]['num_scooters_available']
            num_ebikes_available = station_status_json['data']['stations'][station]['num_ebikes_available']
            num_bikes_disabled = station_status_json['data']['stations'][station]['num_bikes_disabled']
            station_id = station_status_json['data']['stations'][station]['station_id']
            is_returning = station_status_json['data']['stations'][station]['is_returning']
            is_renting = station_status_json['data']['stations'][station]['is_renting']
            num_docks_disabled = station_status_json['data']['stations'][station]['num_docks_disabled']
            num_docks_available = station_status_json['data']['stations'][station]['num_docks_available']

        except KeyError:
            pass

        station_status_dict = {
            'station_status' : station_status,
            'last_reported' : dt.datetime.fromtimestamp(last_reported),
            'last_updated' : dt.datetime.fromtimestamp(last_update),
            'available_scooters': available_scooters,
            'num_bikes_available' : num_bikes_available,
            'num_bikes_disabled' : num_bikes_disabled,
            'num_ebikes_available' : num_ebikes_available,
            'station_id' : station_id,
            'is_renting' : is_renting,
            'is_returning' : is_returning,
            'num_docks_available' : num_docks_available,
            'num_docks_disabled' : num_docks_disabled
        }
        
        station_status_list.append(station_status_dict)

    return station_status_list


def parse_station_info(stations_json):

    station_list = []
    last_update = stations_json['last_updated']
     
    for station in range(len(stations_json['data']['stations'])):
        region_id = stations_json['data']['stations'][station]['region_id']
        has_kiosk = stations_json['data']['stations'][station]['has_kiosk']
        station_type = stations_json['data']['stations'][station]['station_type']
        adress = stations_json['data']['stations'][station]['name']
        capacity = stations_json['data']['stations'][station]['capacity']
        latitude = stations_json['data']['stations'][station]['lat']
        longitude = stations_json['data']['stations'][station]['lon']
        rental_methods = stations_json['data']['stations'][station]['rental_methods']
        station_id = stations_json['data']['stations'][station]['station_id']
        legacy_id = stations_json['data']['stations'][station]['legacy_id']

        station_dict = {
            'region_id' : region_id,
            'adress' : adress,
            'latitude': latitude,
            'longitude' : longitude,
            'type' : station_type,
            'has_kiosk' : has_kiosk,
            'capacity' : capacity,
            'rental_methods' : rental_methods,
            'station_id' : station_id,
            'legacy_id' : legacy_id,
            'last_updated' : dt.datetime.fromtimestamp(last_update)
        }

        station_list.append(station_dict)
        
    return station_list


def get_bike_info(data):
    
    resp = requests.get(data['data']['en']['feeds'][3]['url'])
    bike_data = resp.json()

    return bike_data


def parse_bike_info(bike_json):
    bike_list = []
    last_update = bike_json['last_updated']
    for bike in range(len(bike_json['data']['bikes'])):
        bike_id = bike_json['data']['bikes'][bike]['bike_id']
        latitude = bike_json['data']['bikes'][bike]['lat']
        longitude = bike_json['data']['bikes'][bike]['lon']
        longitude = bike_json['data']['bikes'][bike]['lon']
        type = bike_json['data']['bikes'][bike]['type']
        reserved = bike_json['data']['bikes'][bike]['is_reserved']
        disabled = bike_json['data']['bikes'][bike]['is_disabled']
        bike_dict = {
            'bike_id' : bike_id,
            'latitude': latitude,
            'longitude' : longitude,
            'bike_type' : type,
            'reserved' : reserved,
            'disabled' : disabled,
            'last_updated' : dt.datetime.fromtimestamp(last_update)
        }
        bike_list.append(bike_dict)
    return bike_list   

In [None]:
df_stations = pd.DataFrame(parse_station_info(station_info(main_request())))

In [None]:
df_stations['region_id'].value_counts()

### Limit Data to D.C Area

In [None]:
df_dc_area_stations = df_stations[df_stations['region_id'] == '42']

In [None]:
geometry = [Point(xy) for xy in zip(df_dc_area_stations['longitude'], df_dc_area_stations['latitude'])]
crs = {'init':'epsg:4326'}
geometry[:3]

In [None]:
geo_df = gpd.GeoDataFrame(df_dc_area_stations, 
                          crs=crs, 
                          geometry=geometry) 
geo_df.crs

In [None]:
df_dc_area_stations.iloc[1,10]

In [None]:
washington = gpd.read_file('datasets/tl_2018_11001_roads/tl_2018_11001_roads.shp')

fig, ax = plt.subplots(figsize=(14, 14))

washington.plot(ax=ax, zorder= 1)

geo_df.plot(ax=ax, marker='o', color='red', markersize=7, zorder=2)

ax.set_xlim(washington.total_bounds[0], washington.total_bounds[2])
ax.set_ylim(washington.total_bounds[1], washington.total_bounds[3])

updated_time = df_dc_area_stations.iloc[1,10]

ax.set_title(f"Bike-Sharing Stations in Washington, D.C. at {updated_time}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

plt.show()

In [None]:
df_bikes = pd.DataFrame(parse_bike_info(get_bike_info(main_request())))

In [None]:
df_bikes.head()

In [None]:
df_bikes['reserved'] = df_bikes['reserved'].apply(lambda x: 'No' if x == 0 else 'Yes')

In [None]:
geometry = [Point(xy) for xy in zip(df_bikes['longitude'], df_bikes['latitude'])]
crs = {'init':'epsg:4326'}
geometry[:3]

In [None]:
geo_df = gpd.GeoDataFrame(df_bikes,
                          crs=crs, 
                          geometry=geometry) 

In [None]:
washington = gpd.read_file('datasets/tl_2018_11001_roads/tl_2018_11001_roads.shp')

fig, ax = plt.subplots(figsize=(12, 12))

washington.plot(ax=ax, zorder= 1)

geo_df.plot(ax=ax, marker='o', color='red', markersize=5, zorder=2)

ax.set_xlim(washington.total_bounds[0], washington.total_bounds[2])
ax.set_ylim(washington.total_bounds[1], washington.total_bounds[3])

updated_time = df_bikes.iloc[1,6]

ax.set_title(f"Available Bikes in Washington, D.C. at {updated_time}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

# Display the map
plt.show()

In [None]:
df_station_status = pd.DataFrame(parse_station_status(station_status(main_request(base_url))))

In [None]:
df_station_status

In [None]:
df_stations_merged = pd.merge(df_dc_area_stations, df_station_status, on='station_id')

In [None]:
df_stations_merged

In [None]:
geometry = [Point(xy) for xy in zip(df_stations_merged['longitude'], df_stations_merged['latitude'])]
crs = {'init':'epsg:4326'}
geometry[:3]

In [None]:
geo_df = gpd.GeoDataFrame(df_stations_merged.drop(['last_updated_y','last_updated_x', 'last_reported'],axis=1),
                          crs=crs,
                          geometry=geometry)

geo_df.crs

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
washington.plot(ax=ax, alpha=0.4, color='grey')
geo_df[geo_df['num_bikes_available'] <= 3].plot(ax=ax, 
                                       markersize=15, 
                                       color='red', 
                                       marker='o', 
                                       label='Low on Bikes')
geo_df[geo_df['num_bikes_available'] >= 4].plot(ax=ax, 
                                       markersize=20, 
                                       color='green', 
                                       marker='^', 
                                       label='More than 5 Bikes')

ax.set_xlim(washington.total_bounds[0], washington.total_bounds[2])
ax.set_ylim(washington.total_bounds[1], washington.total_bounds[3])

updated_time = df_stations_merged.iloc[1,10]


ax.set_title(f"Stations with more than 4 available bikes in Washington, D.C. at {updated_time}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

plt.legend(prop={'size':15})

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
washington.plot(ax=ax, alpha=0.4, color='grey')
geo_df[geo_df['num_ebikes_available'] == 0].plot(ax=ax, 
                                       markersize=15, 
                                       color='red', 
                                       marker='o', 
                                       label='No E-Bikes')
geo_df[geo_df['num_ebikes_available'] > 0].plot(ax=ax, 
                                       markersize=20, 
                                       color='green', 
                                       marker='^', 
                                       label='E-Bikes available')

ax.set_xlim(washington.total_bounds[0], washington.total_bounds[2])
ax.set_ylim(washington.total_bounds[1], washington.total_bounds[3])

updated_time = df_stations_merged.iloc[1,10]


ax.set_title(f"Stations with available ebikes in Washington, D.C. at {updated_time}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

plt.legend(prop={'size':15})

plt.show()

In [None]:
geometry = [Point(xy) for xy in zip(df_bikes['longitude'], df_bikes['latitude'])]
geo_df_bikes = gpd.GeoDataFrame(df_bikes.drop('last_updated',axis=1),
                          crs=crs,
                          geometry=geometry) 

In [None]:
trips_11 = pd.read_csv('datasets/2011-capitalbikeshare-tripdata.csv')

trips_11.shape

In [None]:
df_tobemerged = df_stations_merged.drop(['type','has_kiosk','available_scooters', 'num_bikes_available', 'num_bikes_disabled',
       'num_ebikes_available', 'is_renting', 'is_returning',
       'num_docks_available', 'num_docks_disabled', 'last_updated_x','last_updated_y','last_reported'],axis=1)

In [None]:
df_tobemerged.rename(columns={'adress':'Start station'},inplace=True)

In [None]:
df_trip_with_stations = pd.merge(df_tobemerged, trips_11, on='Start station')
df_trip_with_stations.rename(columns={'latitude':'start_latitude', 'longitude':'start_longitude'},inplace=True)

In [None]:
start_station_coordinates = df_trip_with_stations.groupby("Start station").first()[["start_latitude", "start_longitude"]].to_dict()

def get_end_coordinates(start_station):
    return start_station_coordinates["start_latitude"].get(start_station), start_station_coordinates["start_longitude"].get(start_station)

df_trip_with_stations["end_latitude"], df_trip_with_stations["end_longitude"] = zip(*df_trip_with_stations["End station"].map(get_end_coordinates))

In [None]:
from shapely.geometry import LineString

station_pairs = df_trip_with_stations.groupby(['Start station', 'start_latitude', 'start_longitude', 'End station', 'end_latitude', 'end_longitude']).size().reset_index(name='count')


sorted_pairs = station_pairs.sort_values('count', ascending=False).head(100)

lines = []
for _, row in sorted_pairs.iterrows():
    start_coords = (row['start_longitude'], row['start_latitude'])
    end_coords = (row['end_longitude'], row['end_latitude'])
    line = LineString([start_coords, end_coords])
    lines.append(line)

geo_df_trips = gpd.GeoDataFrame(sorted_pairs, geometry=lines, crs=crs)

In [None]:
trips_15 = pd.read_csv('datasets/2015Q2-capitalbikeshare-tripdata.csv')
trips_15.head()

In [None]:
df_trip_with_stations_15 = pd.merge(df_tobemerged, trips_15, on='Start station')
df_trip_with_stations_15.rename(columns={'latitude':'start_latitude', 'longitude':'start_longitude'},inplace=True)

In [None]:
start_station_coordinates = df_trip_with_stations_15.groupby("Start station").first()[["start_latitude", "start_longitude"]].to_dict()

def get_end_coordinates(start_station):
    return start_station_coordinates["start_latitude"].get(start_station), start_station_coordinates["start_longitude"].get(start_station)

df_trip_with_stations_15["end_latitude"], df_trip_with_stations_15["end_longitude"] = zip(*df_trip_with_stations_15["End station"].map(get_end_coordinates))

In [None]:
station_pairs_15 = df_trip_with_stations_15.groupby(['Start station', 'start_latitude', 'start_longitude', 'End station', 'end_latitude', 'end_longitude']).size().reset_index(name='count')


sorted_pairs_15 = station_pairs_15.sort_values('count', ascending=False).head(100)

lines = []
for _, row in sorted_pairs_15.iterrows():
    start_coords = (row['start_longitude'], row['start_latitude'])
    end_coords = (row['end_longitude'], row['end_latitude'])
    line = LineString([start_coords, end_coords])
    lines.append(line)

geo_df_trips_15 = gpd.GeoDataFrame(sorted_pairs_15, geometry=lines, crs=crs)

In [None]:
trips_19 = pd.read_csv('datasets/201906-capitalbikeshare-tripdata.csv')
trips_19.head()

In [None]:
df_trip_with_stations_19 = pd.merge(df_tobemerged, trips_19, on='Start station')
df_trip_with_stations_19.rename(columns={'latitude':'start_latitude', 'longitude':'start_longitude'},inplace=True)

In [None]:
start_station_coordinates = df_trip_with_stations_19.groupby("Start station").first()[["start_latitude", "start_longitude"]].to_dict()

def get_end_coordinates(start_station):
    return start_station_coordinates["start_latitude"].get(start_station), start_station_coordinates["start_longitude"].get(start_station)

df_trip_with_stations_19["end_latitude"], df_trip_with_stations_19["end_longitude"] = zip(*df_trip_with_stations_19["End station"].map(get_end_coordinates))

In [None]:
station_pairs_19 = df_trip_with_stations_19.groupby(['Start station', 'start_latitude', 'start_longitude', 'End station', 'end_latitude', 'end_longitude']).size().reset_index(name='count')


sorted_pairs_19 = station_pairs_19.sort_values('count', ascending=False).head(100)

lines = []
for _, row in sorted_pairs_19.iterrows():
    start_coords = (row['start_longitude'], row['start_latitude'])
    end_coords = (row['end_longitude'], row['end_latitude'])
    line = LineString([start_coords, end_coords])
    lines.append(line)

geo_df_trips_19 = gpd.GeoDataFrame(sorted_pairs_19, geometry=lines, crs=crs)

In [None]:
trips_23 = pd.read_csv('datasets/202306-capitalbikeshare-tripdata.csv')
trips_23.head()

In [None]:
station_pairs_23 = trips_23.groupby(['start_station_name', 'start_lat', 'start_lng', 'end_station_name', 'end_lat', 'end_lng']).size().reset_index(name='count')

sorted_pairs_23 = station_pairs_23.sort_values('count', ascending=False).head(100)

lines = []
for _, row in sorted_pairs_23.iterrows():
    start_coords = (row['start_lng'], row['start_lat'])
    end_coords = (row['end_lng'], row['end_lat'])
    line = LineString([start_coords, end_coords])
    lines.append(line)

geo_df_trips_23 = gpd.GeoDataFrame(sorted_pairs_23, geometry=lines, crs=crs)

In [None]:
import folium
from folium import plugins

m = washington.explore(
    scheme="naturalbreaks",
    legend=True,  
    k=10,  
    tooltip=False,  
    popup=["FULLNAME"],  
    legend_kwds=dict(colorbar=False), 
    name="Washington D.C",
    overlay = False 
)

geo_df.explore(
    m=m,  
    color="blue",  
    marker_kwds=dict(radius=5, fill=True), 
    tooltip=['adress','capacity','num_bikes_available','num_ebikes_available'],
    tooltip_kwds=dict(labels=True),  
    name="Bike Stations", 
)

geo_df_bikes.explore(
    m=m, 
    color="green",  
    marker_type = 'circle_marker',
    marker_kwds=dict(radius=4, fill=True),  
    tooltip=['bike_type','reserved'],  
    tooltip_kwds=dict(labels=True),  
    name="Available Bikes",  
)

geo_df_trips.explore(
    m=m,  # pass the map object
    color="purple",  
    marker_kwds=dict(radius=8, fill=True), 
    tooltip=['count', 'Start station','End station'],  
    tooltip_kwds=dict(labels=True),  
    name="100 Most Popular Routes in 2011",
    show = False 
)

geo_df_trips_15.explore(
    m=m,  
    color="brown",  
    marker_kwds=dict(radius=8, fill=True), 
    tooltip=['count', 'Start station','End station'], 
    tooltip_kwds=dict(labels=True), 
    name="100 Most Popular Routes in 2015",
    show = False
)

geo_df_trips_19.explore(
    m=m,  
    color="black",  
    marker_kwds=dict(radius=8, fill=True), 
    tooltip=['count', 'Start station','End station'], 
    tooltip_kwds=dict(labels=True), 
    name="100 Most Popular Routes in 2019",
    show = False
)

geo_df_trips_23.explore(
    m=m,  
    color="red",  
    marker_kwds=dict(radius=8, fill=True), 
    tooltip=['count', 'start_station_name','end_station_name'], 
    tooltip_kwds=dict(labels=True), 
    name="100 Most Popular Routes in 2023",
    show = False
)

folium.TileLayer("Stamen Terrain", show=False).add_to(
    m
)  

folium.LayerControl().add_to(m)  
m.save("bike_trips_map.html")
m