# Getting the journey data

The TfL bike usage data is hosted as a number of CSV files on their website. I looped through each of these CSVs, aggregated them together into one dataset, and cleaned it all up.

In [4]:
#This is our code 

import pandas as pd
import urllib

#Change with the most up to date dataset from superset 
all_data = pd.read_csv("Dataset_map.csv")

print(all_data.shape)
print(all_data.head())

#NbEmptyDocks to int 
all_data.loc[:,"NbEmptyDocks"] = pd.to_numeric(all_data.loc[:,"NbEmptyDocks"])

(1000, 8)
               id                     commonName  NbBikes  NbEmptyDocks  \
0  BikePoints_204    Margery Street, Clerkenwell        3            16   
1  BikePoints_206       New Road 1 , Whitechapel       36             0   
2  BikePoints_207  Grosvenor Crescent, Belgravia        2            15   
3  BikePoints_208     Mallory Street, Marylebone        0            20   
4  BikePoints_209   Denyer Street, Knightsbridge        3            27   

   NbDocks        lat      lon    proc_timestamp  
0       19  51.526599 -0.11243  10/05/2021 10:51  
1       36  51.518154 -0.06270  10/05/2021 10:51  
2       18  51.501352 -0.15319  10/05/2021 10:51  
3       20  51.525051 -0.16630  10/05/2021 10:51  
4       30  51.493583 -0.16510  10/05/2021 10:51  


# Getting the bike station locations

TfL have a live "cycle hire updates" feed which lists information for each cycle hire station, updated once every minute or so. I don't utilise this live data - instead I just take the name, ID, lat/lon, and capacity for each bike station.

**Original code to get the data**

import requests
from xml.etree import ElementTree as ET
import pandas as pd

site = "https://tfl.gov.uk/tfl/syndication/feeds/cycle-hire/livecyclehireupdates.xml"

response = requests.get(site)
root = ET.fromstring(response.content)

id_list = [int(root[i][0].text) for i in range(0, len(root))]
name_list = [root[i][1].text for i in range(0, len(root))]
lat_list = [float(root[i][3].text) for i in range(0, len(root))]
lon_list = [float(root[i][4].text) for i in range(0, len(root))]
capacity_list = [int(root[i][12].text) for i in range(0, len(root))]

all_locs = pd.DataFrame(list(zip(name_list, id_list, lat_list, 
                                 lon_list, capacity_list)), columns = ["name","id","lat","lon","capacity"])

all_locs.to_csv('bike_point_locations_saved.csv', header=True, index=None)

print(all_locs.shape)
all_locs.head()

## Plotting all the bike stations in bokeh

Once I've got all the bike station locations, I generate a quick interactive bokeh plot of all of them. For the backgrounds I use two separate shapefiles I downloaded - one of all the buildings in London, and one of all the roads.

In [5]:
from bokeh.models import GeoJSONDataSource, ColumnDataSource
from bokeh.models.tools import PanTool, HoverTool, ResetTool, WheelZoomTool
from bokeh.io import output_notebook, output_file, save, show
from bokeh.sampledata.sample_geojson import geojson
import bokeh.plotting as bp
import json

In [6]:
## Load both roads and buildings geojson files into correct format 

# Load buildings but truncate heavily to 100 polygons (at first)
with open('Basemaps/London_buildings.geojson', 'r') as f:
    geojson_buildings = f.read()
    
with open('Basemaps/London_roads.geojson', 'r') as f:
    geojson_roads = f.read()

# Load geojson
json_buildings = GeoJSONDataSource(geojson=json.dumps(json.loads(geojson_buildings)))
json_roads = GeoJSONDataSource(geojson=json.dumps(json.loads(geojson_roads)))

In [7]:
# Sort out the dataframe for plotting - bounding box, size of plot, etc



df_points = all_data.loc[:,('commonName', 'NbEmptyDocks','lat', 'lon','proc_timestamp')]
df_points['size'] = 5

x_range = (df_points.lon.min() - 0.001, df_points.lon.max() + 0.003)
y_range = (df_points.lat.min() - 0.003, df_points.lat.max() + 0.003)

points_source = ColumnDataSource(ColumnDataSource.from_df(df_points))

print(x_range)
print(y_range)

plot_h = 600
plot_w = 900

df_points.head()

(-0.23777, 0.0007200000000000002)
(51.451752, 51.552369)


Unnamed: 0,commonName,NbEmptyDocks,lat,lon,proc_timestamp,size
0,"Margery Street, Clerkenwell",16,51.526599,-0.11243,10/05/2021 10:51,5
1,"New Road 1 , Whitechapel",0,51.518154,-0.0627,10/05/2021 10:51,5
2,"Grosvenor Crescent, Belgravia",15,51.501352,-0.15319,10/05/2021 10:51,5
3,"Mallory Street, Marylebone",20,51.525051,-0.1663,10/05/2021 10:51,5
4,"Denyer Street, Knightsbridge",27,51.493583,-0.1651,10/05/2021 10:51,5


In [8]:
## Set up the bokeh plot. I
#some modification in code

tools = [PanTool(), WheelZoomTool(), ResetTool()]

p = bp.figure(tools=tools, plot_width=plot_w, plot_height=plot_h,
    x_range=x_range, y_range=y_range, outline_line_color=None,
    min_border=0, min_border_left=0, min_border_right=0,
    min_border_top=0, min_border_bottom=0, title = 'London bike map') 

p.patches(xs='xs', ys='ys', fill_alpha=0.3, fill_color='#0C090A',
                   line_alpha=0, source=json_buildings)

circles = p.circle(x='lon', y='lat', size='size', color='blue', alpha=1, source=points_source)


tooltips=[
            ("Name", "@commonName"),
            ("Capacity", "@NbEmptyDocks"),
             ("Processed", "@proc_timestamp")
        ]
    

p.add_tools(HoverTool(tooltips=tooltips, renderers = [circles]))


p.background_fill_color = '#2C3539'
p.xaxis.visible = False
p.yaxis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

show(p)