In [93]:
#Importing libraries
#Import libraries
import pandas as pd
import requests
import plotly.express as px
from shapely.geometry import Point, Polygon
import numpy as np

In [94]:
#Access API with list of all lines of a mode of transport, tube as the selected mode as specified in the url

url_tube_lines = "https://api.tfl.gov.uk/Line/Mode/tube"

response_tl = requests.get(url_tube_lines)
data_tl = response_tl.json()


#Create data frame of the information
df_tl = pd.DataFrame(data_tl)

#create a list of the tubelines from the dataframe

TubeLines = []

for i in df_tl.loc[:, "id"]:
    TubeLines.append(i)

print(f"There are {len(TubeLines)} tube lines in the TfL underground network.")
print(f"These lines are: {', '.join([x.title() for x in TubeLines])}.")



There are 11 tube lines in the TfL underground network.
These lines are: Bakerloo, Central, Circle, District, Hammersmith-City, Jubilee, Metropolitan, Northern, Piccadilly, Victoria, Waterloo-City.


In [151]:
#creating a list of colours in the same order as the TubeLines list
tfl_colours = ['rgb(137, 78, 36)', 'rgb(220, 36, 31)', 'rgb(255, 206, 0)', 'rgb(0, 114, 41)', 'rgb(215, 153, 175)', 'rgb(134, 143, 152)', 'rgb(117, 16, 86)', 'rgb(0, 0, 0)', 
'rgb(0, 25, 168)', 'rgb(0, 160, 226)', 'rgb(118, 208, 189)']

#Joining the two lists together to make a dictionary using zip
colours_dict = {line:colour for (line,colour) in zip(TubeLines, tfl_colours)}

print(colours_dict)

{'bakerloo': 'rgb(137, 78, 36)', 'central': 'rgb(220, 36, 31)', 'circle': 'rgb(255, 206, 0)', 'district': 'rgb(0, 114, 41)', 'hammersmith-city': 'rgb(215, 153, 175)', 'jubilee': 'rgb(134, 143, 152)', 'metropolitan': 'rgb(117, 16, 86)', 'northern': 'rgb(0, 0, 0)', 'piccadilly': 'rgb(0, 25, 168)', 'victoria': 'rgb(0, 160, 226)', 'waterloo-city': 'rgb(118, 208, 189)'}


In [158]:
#reference the dictionary

print(colours_dict[f"{TubeLines[0]}"])

rgb(137, 78, 36)


In [None]:
print(colours_dict['bakerloo'])

In [95]:
#Collating data frames for tube analysis

#Accessing the API showing tube stations on one line
url_tube_stations = "https://api.tfl.gov.uk/Line/Victoria/StopPoints"


response_ts = requests.get(url_tube_stations)
data_ts = response_ts.json()

#Create data frame from the information from the API about the tube stops

df_ts = pd.DataFrame(data_ts)


#Creating a new data frame using only the columns I need for analysis

df_og = df_ts[["naptanId", "commonName", "lat", "lon"]]


df_og.head(10)


Unnamed: 0,naptanId,commonName,lat,lon
0,940GZZLUBLR,Blackhorse Road Underground Station,51.586919,-0.04115
1,940GZZLUBXN,Brixton Underground Station,51.462618,-0.114888
2,940GZZLUEUS,Euston Underground Station,51.527824,-0.131846
3,940GZZLUFPK,Finsbury Park Underground Station,51.564158,-0.106825
4,940GZZLUGPK,Green Park Underground Station,51.506947,-0.142787
5,940GZZLUHAI,Highbury & Islington Underground Station,51.54635,-0.103324
6,940GZZLUKSX,King's Cross St. Pancras Underground Station,51.530663,-0.123194
7,940GZZLUOXC,Oxford Circus Underground Station,51.515224,-0.141903
8,940GZZLUPCO,Pimlico Underground Station,51.489097,-0.133761
9,940GZZLUSKW,Stockwell Underground Station,51.472184,-0.122644


In [4]:
#Adding data from other Stations data frame using the naptanId
df_Station_Info = pd.read_csv("Stations.csv")  

#Creating a new data frame using only the columns I need for analysis
df_si = df_Station_Info[["UniqueId", "FareZones", "Wifi", "BlueBadgeCarParking", "BlueBadgeCarParkSpaces", "TaxiRanksOutsideStation"]]

#renaming the UniqueId column to naptanId to match the above data frame for the merge
df_si_nap = df_si.rename(columns={'UniqueId': 'naptanId'})

df_si_nap.head(10)

Unnamed: 0,naptanId,FareZones,Wifi,BlueBadgeCarParking,BlueBadgeCarParkSpaces,TaxiRanksOutsideStation
0,HUBABW,4,False,False,,False
1,910GACTNCTL,3,True,False,,False
2,910GACTONML,3,False,False,,False
3,910GANERLEY,4,True,False,,False
4,910GBCKNHMH,4,False,False,,False
5,910GBELNGHM,3,False,False,,False
6,910GBHILLPK,5,True,True,2.0,False
7,910GBICKLEY,5,False,False,,False
8,910GBNHAM,Outside,False,False,,False
9,910GBRBY,2,True,False,,False


In [5]:
#Adding the information from stations.csv to the tube line information based on the naptanId
df_og_si = pd.merge(df_og, df_si_nap, on='naptanId', how='left')

df_og_si.head(10)


Unnamed: 0,naptanId,commonName,lat,lon,FareZones,Wifi,BlueBadgeCarParking,BlueBadgeCarParkSpaces,TaxiRanksOutsideStation
0,940GZZLUBLR,Blackhorse Road Underground Station,51.586919,-0.04115,,,,,
1,940GZZLUBXN,Brixton Underground Station,51.462618,-0.114888,,,,,
2,940GZZLUEUS,Euston Underground Station,51.527824,-0.131846,,,,,
3,940GZZLUFPK,Finsbury Park Underground Station,51.564158,-0.106825,,,,,
4,940GZZLUGPK,Green Park Underground Station,51.506947,-0.142787,1.0,True,False,,False
5,940GZZLUHAI,Highbury & Islington Underground Station,51.54635,-0.103324,,,,,
6,940GZZLUKSX,King's Cross St. Pancras Underground Station,51.530663,-0.123194,,,,,
7,940GZZLUOXC,Oxford Circus Underground Station,51.515224,-0.141903,1.0,True,False,,False
8,940GZZLUPCO,Pimlico Underground Station,51.489097,-0.133761,1.0,True,False,,False
9,940GZZLUSKW,Stockwell Underground Station,51.472184,-0.122644,2.0,True,False,,False


In [6]:
#Checking if the merge has the right amount of rows
print(f"The dataset contains {df_og_si.shape[0]} rows and {df_og_si.shape[1]} columns.")

The dataset contains 16 rows and 9 columns.


In [87]:
#reading my custom toilet data csv file I created in TfLCSVs.ipynb
df_Toilets = pd.read_csv("CustomTfLToilet.csv")

#merging my toilet data csv with the above data frame
df_line_data = pd.merge(df_og_si, df_Toilets, on='naptanId', how='left')

df_line_data.head()


Unnamed: 0,naptanId,commonName,lat,lon,FareZones,Wifi,BlueBadgeCarParking,BlueBadgeCarParkSpaces,TaxiRanksOutsideStation,NumToilets,HasAccessibleToilets
0,940GZZLUBLR,Blackhorse Road Underground Station,51.586919,-0.04115,,,,,,,
1,940GZZLUBXN,Brixton Underground Station,51.462618,-0.114888,,,,,,,
2,940GZZLUEUS,Euston Underground Station,51.527824,-0.131846,,,,,,,
3,940GZZLUFPK,Finsbury Park Underground Station,51.564158,-0.106825,,,,,,,
4,940GZZLUGPK,Green Park Underground Station,51.506947,-0.142787,1.0,True,False,,False,2.0,False


5


In [88]:
#replacing data values to show up in hover template

df_line_data["FareZones"] = df_line_data["FareZones"].replace({np.nan: "n/a"})
df_line_data['Wifi'] = df_line_data['Wifi'].replace([True, False, np.nan], ["Yes", "No",  "n/a"])
df_line_data["BlueBadgeCarParking"] = df_line_data["BlueBadgeCarParking"].replace({True: "Yes", False: "None", np.nan: "n/a"})
df_line_data["BlueBadgeCarParkSpaces"] = df_line_data["BlueBadgeCarParkSpaces"].replace({False: "None", np.nan: "n/a"})
df_line_data["TaxiRanksOutsideStation"] = df_line_data["TaxiRanksOutsideStation"].replace({True: "Yes", False: "None", np.nan: "n/a"})
df_line_data["NumToilets"] = df_line_data["NumToilets"].replace({True: "Yes", False: "None", np.nan: "n/a"})
df_line_data["HasAccessibleToilets"] = df_line_data["HasAccessibleToilets"].replace({True: "Yes", False: "None", np.nan: "n/a"})

df_line_data


Unnamed: 0,naptanId,commonName,lat,lon,FareZones,Wifi,BlueBadgeCarParking,BlueBadgeCarParkSpaces,TaxiRanksOutsideStation,NumToilets,HasAccessibleToilets
0,940GZZLUBLR,Blackhorse Road Underground Station,51.586919,-0.04115,,,,,,,
1,940GZZLUBXN,Brixton Underground Station,51.462618,-0.114888,,,,,,,
2,940GZZLUEUS,Euston Underground Station,51.527824,-0.131846,,,,,,,
3,940GZZLUFPK,Finsbury Park Underground Station,51.564158,-0.106825,,,,,,,
4,940GZZLUGPK,Green Park Underground Station,51.506947,-0.142787,1.0,Yes,,,,2.0,
5,940GZZLUHAI,Highbury & Islington Underground Station,51.54635,-0.103324,,,,,,,
6,940GZZLUKSX,King's Cross St. Pancras Underground Station,51.530663,-0.123194,,,,,,,
7,940GZZLUOXC,Oxford Circus Underground Station,51.515224,-0.141903,1.0,Yes,,,,,
8,940GZZLUPCO,Pimlico Underground Station,51.489097,-0.133761,1.0,Yes,,,,,
9,940GZZLUSKW,Stockwell Underground Station,51.472184,-0.122644,2.0,Yes,,,,,


In [98]:
#counting the number of stations with wifi using for loop
wifi_count = 0
for x in df_line_data["Wifi"]:
    if x == "Yes":
        wifi_count += 1

print(wifi_count)

5


In [108]:
#using the sum of the "Yes" values in the wifi column to count the number of stations with wifi
wifi_count_3 = sum(x == "Yes" for x in df_line_data["Wifi"])
print(wifi_count_3)

5


In [109]:
#using the sum of the "Yes" values in the blue badge parking column to count the number of stations with blue badge parking
bb_parking_count = sum(x == "Yes" for x in df_line_data["BlueBadgeCarParking"])
print(bb_parking_count)

0


In [114]:
#using not equal to to count the number of stations with toilets
toilet_count = sum(x != "n/a" for x in df_line_data["NumToilets"])
print(toilet_count)

1


In [115]:
#using the sum of the "Yes" values to count the number of stations with accessible toilets
acc_toilet_count = sum(x == "Yes" for x in df_line_data["HasAccessibleToilets"])
print(acc_toilet_count)

0


In [116]:
#checking the different values in the taxi rank column in the original csv because I was unsure if there were numbers or just true/false
taxi_values_check = []

for x in df_Station_Info["TaxiRanksOutsideStation"]:
    if x not in taxi_values_check:
        taxi_values_check.append(x)

print(taxi_values_check)

[False, True]


In [117]:
#Using the sum of the yes values to count the number of taxi ranks
taxi_rank_count = sum(x == "Yes" for x in df_line_data["TaxiRanksOutsideStation"])
print(taxi_rank_count)

0


In [123]:
#creating a list of the fare zones to be used later in an f string
stop_zones = []

for x in df_line_data["FareZones"]:
    if x not in stop_zones and x != "n/a":
        stop_zones.append(x)

print(stop_zones)

['1', '2']


In [138]:
#simplifying the for loop using list comprehension
stop_zones_2 = []
[stop_zones_2.append(x) for x in df_line_data["FareZones"] if x not in stop_zones_2 and x != "n/a"]

print(", ".join(stop_zones_2))

1, 2


In [160]:
def line_stats(data_frame):
    wifi_count = sum(x == "Yes" for x in data_frame["Wifi"])
    bb_parking_count = sum(x == "Yes" for x in data_frame["BlueBadgeCarParking"])
    toilet_count = sum(x != "n/a" for x in df_line_data["NumToilets"])
    acc_toilet_count = sum(x == "Yes" for x in df_line_data["HasAccessibleToilets"])
    taxi_rank_count = sum(x == "Yes" for x in df_line_data["TaxiRanksOutsideStation"])
    stop_zones_5 = []
    [stop_zones_5.append(x) for x in df_line_data["FareZones"] if x not in stop_zones_5 and x != "n/a"]

    

line_stats(df_line_data)

In [None]:
taxi_ranks = sum(x == "Yes" for x in df_line_data["BlueBadgeCarParkSpaces"])

In [None]:
#try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


In [159]:
print(f"The Victoria line has {df_line_data.shape[0]} stops.")

fig_ld2 = px.scatter_map(df_line_data, lat="lat", lon="lon", hover_name="commonName", zoom=9.5, 
                         custom_data=['commonName', 'Wifi', 'NumToilets', 'HasAccessibleToilets', 'FareZones', 'BlueBadgeCarParking', 'BlueBadgeCarParkSpaces', 'TaxiRanksOutsideStation'])      
fig_ld2.update_layout(title='Victoria line stations', hovermode='closest', map_style="light", hoverlabel_bgcolor='rgb(0, 25, 168)')    
fig_ld2.update_traces(marker=dict(color='rgb(0, 160, 226)', size=9))
fig_ld2.update_traces(hovertemplate = 
                      "<b>%{customdata[0]}</b><br>" + 
                      "<br><b>Wifi:</b> %{customdata[1]}" + 
                      "<br><b>Toilets:</b> %{customdata[2]}" +
                      "<br><b>Accessible toilets:</b> %{customdata[3]}" +
                      "<br><b>Zone:</b> %{customdata[4]}" +
                      "<br><b>Accessible parking:</b> %{customdata[5]}" +
                      "<br><b>Accessible parking spaces:</b> %{customdata[6]}" +
                      "<br><b>Accessible Taxi rank:</b> %{customdata[7]}")
fig_ld2.show()




The Victoria line has 16 stops.
