In [16]:
import requests
import json
import zipfile, io
import pandas as pd
import folium
import os

Define the APIs and data locations.  
Set a function to be able to print the responses in a nice format.

In [12]:
DATA_PATH = '../data'
RAW_DATA_PATH = os.path.join(DATA_PATH, 'raw')
INT_DATA_PATH = os.path.join(RAW_DATA_PATH,'intermediate')

TFL_API = 'https://api.tfl.gov.uk'
STATION_DATA_API = TFL_API + '/stationdata/tfl-stationdata-detailed.zip'
SHPBUM_STATION_QUERY =  TFL_API +'/Stoppoint/Search/Shepherd?modes=tube'
STOPPOINT_TYPES =  TFL_API +'/StopPoint/Meta/StopTypes'
SHPBUM_ARRIVALS =  TFL_API +'/StopPoint/940GZZLUSBM/Arrivals'

def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=2)
    print(text)

# Get Shepherd's Bush Market Arrivals

In [6]:
response = requests.get(SHPBUM_ARRIVALS)
print(response.status_code)

200


In [7]:
jprint(response.json())

[
  {
    "$type": "Tfl.Api.Presentation.Entities.Prediction, Tfl.Api.Presentation.Entities",
    "bearing": "",
    "currentLocation": "At Hammersmith Platform 1",
    "destinationName": "Edgware Road (Circle Line) Underground Station",
    "destinationNaptanId": "940GZZLUERC",
    "direction": "outbound",
    "expectedArrival": "2023-05-07T10:32:56Z",
    "id": "-1952687116",
    "lineId": "circle",
    "lineName": "Circle",
    "modeName": "tube",
    "naptanId": "940GZZLUSBM",
    "operationType": 1,
    "platformName": "Eastbound - Platform 1",
    "stationName": "Shepherd's Bush Market Underground Station",
    "timeToLive": "2023-05-07T10:32:56Z",
    "timeToStation": 443,
    "timestamp": "2023-05-07T10:25:33.1801317Z",
    "timing": {
      "$type": "Tfl.Api.Presentation.Entities.PredictionTiming, Tfl.Api.Presentation.Entities",
      "countdownServerAdjustment": "00:00:00",
      "insert": "0001-01-01T00:00:00",
      "read": "2023-05-07T10:25:47.447Z",
      "received": "000

# Station Data Files
Get files from the station data API and store in the raw data folder.

In [17]:
#Only store if no files are in the folder
station_output_folder = os.path.join(RAW_DATA_PATH, 'StationDataOutput')
if len(os.listdir(station_output_folder) == 0):
    r = requests.get(STATION_DATA_API)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall(station_output_folder)

Import some of the files

In [18]:
modes_and_lines = pd.read_csv(RAW_DATA_PATH + 'ModesAndLines.csv')
station_points = pd.read_csv(RAW_DATA_PATH + 'StationPoints.csv')
stations = pd.read_csv(RAW_DATA_PATH + 'Stations.csv')

In [19]:
modes_and_lines.head(5)

Unnamed: 0,Mode,Name
0,cableCar,london-cable-car
1,dlr,dlr
2,nationalRail,thameslink
3,nationalRail,national-rail
4,overground,london-overground


In [20]:
stations.head(5)

Unnamed: 0,UniqueId,Name,FareZones,HubNaptanCode,Wifi,OutsideStationUniqueId,BlueBadgeCarParking,BlueBadgeCarParkSpaces,TaxiRanksOutsideStation,MainBusInterchange,PierInterchange,NationalRailInterchange,AirportInterchange,EmiratesAirLineInterchange
0,HUBABW,Abbey Wood,4,HUBABW,False,HUBABW-Outside,False,,False,,,,,
1,910GACTNCTL,Acton Central,3,,True,910GACTNCTL-Outside,False,,False,,,,,
2,910GACTONML,Acton Main Line,3,,False,910GACTONML-Outside,False,,False,,,,,
3,910GANERLEY,Anerley,4,,True,910GANERLEY-Outside,False,,False,,,,,
4,910GBCKNHMH,Beckenham Hill,4,,False,910GBCKNHMH-Outside,False,,False,,,,,


In [21]:
stations.columns

Index(['UniqueId', 'Name', 'FareZones', 'HubNaptanCode', 'Wifi',
       'OutsideStationUniqueId', 'BlueBadgeCarParking',
       'BlueBadgeCarParkSpaces', 'TaxiRanksOutsideStation',
       'MainBusInterchange', 'PierInterchange', 'NationalRailInterchange',
       'AirportInterchange', 'EmiratesAirLineInterchange'],
      dtype='object')

In [22]:
station_points.head(5)

Unnamed: 0,UniqueId,StationUniqueId,AreaName,AreaId,Level,Lat,Lon,FriendlyName
0,910GACTNCTL-1001002-AC-3,910GACTNCTL,AC,3,0,51.50862,-0.26351,AC
1,910GACTNCTL-1001002-Bus-1,910GACTNCTL,Bus,1,0,51.50651,-0.26372,Bus
2,910GACTNCTL-1001002-ENTR-7,910GACTNCTL,ENTR,7,0,51.50892,-0.2626,ENTR
3,910GACTNCTL-1001002-RLY-4,910GACTNCTL,RLY,4,1,51.50865,-0.26293,RLY
4,910GACTNCTL-1001002-RPL-2,910GACTNCTL,RPL,2,1,51.50865,-0.26306,RPL


In [23]:
station_points.columns

Index(['UniqueId', 'StationUniqueId', 'AreaName', 'AreaId', 'Level', 'Lat',
       'Lon', 'FriendlyName'],
      dtype='object')

In [24]:
st_cols = ['UniqueId', 'Name', 'FareZones']
stpts_cols = ['StationUniqueId', 'AreaName', 'AreaId', 'Level', 'Lat', 'Lon', 'FriendlyName']
st_all = pd.merge(stations[st_cols],station_points[stpts_cols], left_on='UniqueId', right_on='StationUniqueId')
st_all.drop(['UniqueId','StationUniqueId'], axis=1, inplace=True)
st_all

Unnamed: 0,Name,FareZones,AreaName,AreaId,Level,Lat,Lon,FriendlyName
0,Abbey Wood,4,Bus,5,1,51.49238,0.12128,Bus
1,Abbey Wood,4,EL EB,8,0,51.49128,0.12109,EL EB
2,Abbey Wood,4,EL WB,9,0,51.49122,0.12108,EL WB
3,Abbey Wood,4,FELIX,6,0,51.49143,0.12007,FELIX
4,Abbey Wood,4,HARRO,2,1,51.49163,0.12185,HARRO
...,...,...,...,...,...,...,...,...
3878,Barking Riverside,4,FCEnt,3,0,51.51935,0.11663,FCEnt
3879,Barking Riverside,4,RLY,5,0,51.51981,0.11653,RLY
3880,Barking Riverside,4,RPL-1,4,1,51.52052,0.11691,RPL-1
3881,Barking Riverside,4,RPL-2,2,1,51.52076,0.11692,RPL-2


In [26]:
st_all['AreaName'].value_counts().to_csv('area_names')

Bus      372
MET      269
BookH    232
RLY      190
Entr      99
        ... 
A-K-E      1
A-BAT      1
A-SHO      1
B-K-N      1
RREnt      1
Name: AreaName, Length: 1185, dtype: int64

In [68]:
st_all[['Name','Lat','Lon']].groupby('Name').mean().reset_index()

Unnamed: 0,Name,Lat,Lon
0,Abbey Road,51.532377,0.003720
1,Abbey Wood,51.491309,0.121233
2,Acton Central,51.508366,-0.263111
3,Acton Main Line,51.517070,-0.267873
4,Acton Town,51.502984,-0.280167
...,...,...,...
500,Woodgrange Park,51.549260,0.044200
501,Woodside,51.387410,-0.064697
502,Woodside Park,51.617900,-0.185438
503,Woolwich,51.491890,0.070613


In [10]:
COORDS = (51.505742,-0.108622)
map = folium.Map(location=COORDS, zoom_start=12.58)

In [73]:
for row in st_all[['Name','Lat','Lon']].groupby('Name').mean().iterrows():
    print(row[1])

Lat    51.532377
Lon     0.003720
Name: Abbey Road, dtype: float64
Lat    51.491309
Lon     0.121233
Name: Abbey Wood, dtype: float64
Lat    51.508366
Lon    -0.263111
Name: Acton Central, dtype: float64
Lat    51.517070
Lon    -0.267873
Name: Acton Main Line, dtype: float64
Lat    51.502984
Lon    -0.280167
Name: Acton Town, dtype: float64
Lat    51.356465
Lon    -0.032435
Name: Addington Village, dtype: float64
Lat    51.379897
Lon    -0.073132
Name: Addiscombe, dtype: float64
Lat    51.514099
Lon    -0.075446
Name: Aldgate, dtype: float64
Lat    51.515457
Lon    -0.071142
Name: Aldgate East, dtype: float64
Lat    51.511010
Lon    -0.012936
Name: All Saints, dtype: float64
Lat    51.540670
Lon    -0.299382
Name: Alperton, dtype: float64
Lat    51.674039
Lon    -0.607539
Name: Amersham, dtype: float64
Lat    51.382203
Lon    -0.123650
Name: Ampere Way, dtype: float64
Lat    51.412380
Lon    -0.065581
Name: Anerley, dtype: float64
Lat    51.532376
Lon    -0.105929
Name: Angel, dtype: f