# Connecting to London transport API

In [1]:
import requests
import json
import pandas as pd
reply = requests.get("https://api.tfl.gov.uk/BikePoint/")
data = reply.json()

In [2]:
# there are almost 800 stations in London
len(data)

798

In [3]:
# creating a loop to save data for each column I want to have in my DF

#Create a list with latitudes of bike stations inside first call:

latitudes_list = []
for i in data:
    latitudes_list.append(i["lat"])

longitudes_list = []
for i in data:
    longitudes_list.append(i["lon"])

location_list = []
for i in data:
    location_list.append(i["commonName"])

nb_ebikes_list = []
for i in data:
    nb_ebikes_list.append(i['additionalProperties'][10]["value"])

nb_stdbikes_list = []
for i in data:
    nb_stdbikes_list.append(i['additionalProperties'][9]["value"])

total_bikes_list = []
for i in data:
    total_bikes_list.append(i['additionalProperties'][6]["value"])

time = []
for i in data:
    time.append(i['additionalProperties'][0]["modified"])


In [4]:
# put all in a df
real_time_df = pd.DataFrame({'location': location_list, 'latitude': latitudes_list,
                            'longitude': longitudes_list, 'ebike_amount': nb_ebikes_list,
                            'std_bike_amount': nb_stdbikes_list, 'total_bikes': total_bikes_list,
                            'time': time})

In [5]:
real_time_df

Unnamed: 0,location,latitude,longitude,ebike_amount,std_bike_amount,total_bikes,time
0,"River Street , Clerkenwell",51.529163,-0.109970,0,12,12,2023-12-12T16:21:37.32Z
1,"Phillimore Gardens, Kensington",51.499606,-0.197574,0,22,22,2023-12-12T16:33:57.54Z
2,"Christopher Street, Liverpool Street",51.521283,-0.084605,1,23,24,2023-12-12T16:21:37.32Z
3,"St. Chad's Street, King's Cross",51.530059,-0.120973,1,12,13,2023-12-12T16:28:35.23Z
4,"Sedding Street, Sloane Square",51.493130,-0.156876,0,25,25,2023-12-12T16:31:57.05Z
...,...,...,...,...,...,...,...
793,"Crimscott Street, Bermondsey",51.495598,-0.078893,0,2,2,2023-12-12T16:07:29.183Z
794,"Brandon Street, Walworth",51.489102,-0.091548,0,9,9,2023-12-12T14:10:13.54Z
795,"The Blue, Bermondsey",51.492221,-0.062513,2,14,16,2023-12-12T16:20:23.867Z
796,"Coomer Place, West Kensington",51.483570,-0.202038,0,13,13,2023-12-12T16:31:57.05Z


In [6]:
#cleaning date column
real_time_df['time'] = pd.to_datetime(real_time_df['time'], format='ISO8601')
real_time_df['time'] = real_time_df['time'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Set datetime index
real_time_df.set_index('time', inplace=True)

# Display the DataFrame
print(real_time_df)

                                                 location   latitude  \
time                                                                   
2023-12-12 16:21:37            River Street , Clerkenwell  51.529163   
2023-12-12 16:33:57        Phillimore Gardens, Kensington  51.499606   
2023-12-12 16:21:37  Christopher Street, Liverpool Street  51.521283   
2023-12-12 16:28:35       St. Chad's Street, King's Cross  51.530059   
2023-12-12 16:31:57         Sedding Street, Sloane Square  51.493130   
...                                                   ...        ...   
2023-12-12 16:07:29          Crimscott Street, Bermondsey  51.495598   
2023-12-12 14:10:13              Brandon Street, Walworth  51.489102   
2023-12-12 16:20:23                  The Blue, Bermondsey  51.492221   
2023-12-12 16:31:57         Coomer Place, West Kensington  51.483570   
2023-12-12 15:56:53                        Strand, Strand  51.512582   

                     longitude ebike_amount std_bike_amount tot

In [15]:
# cleaning dtypes to be ablet to make aggregations
real_time_df.dtypes

location            object
latitude           float64
longitude          float64
ebike_amount         Int64
std_bike_amount      Int64
total_bikes          Int64
dtype: object

In [10]:
real_time_df['ebike_amount'] = pd.to_numeric(real_time_df['ebike_amount'], errors='coerce')
real_time_df['ebike_amount'] = real_time_df['ebike_amount'].astype('Int64')

real_time_df['std_bike_amount'] = pd.to_numeric(real_time_df['std_bike_amount'], errors='coerce')
real_time_df['std_bike_amount'] = real_time_df['std_bike_amount'].astype('Int64')

real_time_df['total_bikes'] = pd.to_numeric(real_time_df['total_bikes'], errors='coerce')
real_time_df['total_bikes'] = real_time_df['total_bikes'].astype('Int64')


In [18]:
real_time_df.dtypes

location            object
latitude           float64
longitude          float64
ebike_amount         Int64
std_bike_amount      Int64
total_bikes          Int64
dtype: object

In [19]:
total_bikes_sum = real_time_df['total_bikes'].sum()
print("Total Bikes Sum:", total_bikes_sum)


Total Bikes Sum: 10526


In [20]:
#saving the df to csv for visualizations
real_time_df.to_csv('bikes_12-12-23.csv', index=True)