In [3]:
import pandas as pd
import numpy as np
import sqlalchemy
pd.set_option('display.max.columns',None)
pd.options.display.max_colwidth = 100

In [4]:
# Uploading Edinburgh bikes data file.
edbikes_df = pd.read_csv('edinburgh_bikes_202205172048.csv',delimiter=';', decimal=',',index_col=0)
edbikes_df.head()

Unnamed: 0_level_0,started_at,ended_at,duration,start_station_id,start_station_name,start_station_description,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_description,end_station_latitude,end_station_longitude
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,2018-09-15 08:52:05,2018-09-15 09:11:48,1182,247,Charlotte Square,North Corner of Charlotte Square,55.95233546161639,-3.207101172107286,259,St Andrew Square,North East corner,55.95472778937899,-3.192652969253117
1,2018-09-15 09:24:33,2018-09-15 09:41:09,995,259,St Andrew Square,North East corner,55.95474881217642,-3.192773668658787,262,Canonmills,near Tesco's,55.96280408759764,-3.196283585062929
2,2018-09-15 09:48:54,2018-09-15 10:46:40,3466,262,Canonmills,near Tesco's,55.96280408759764,-3.196283585062929,250,Victoria Quay,Entrance to Scottish Government Office,55.97763778772698,-3.174115590621568
3,2018-09-16 12:01:36,2018-09-16 12:25:26,1430,255,Kings Buildings 4,X-Y Cafe,55.92200087060626,-3.176901814164921,254,Kings Building 3,Kings Building House,55.92347880784664,-3.175384584907078
4,2018-09-16 12:03:43,2018-09-16 12:11:16,452,255,Kings Buildings 4,X-Y Cafe,55.92200087060626,-3.176901814164921,253,Kings Building 2,Sanderson Building,55.92320215516664,-3.17164624536349


In [5]:
# Data exploration: time frame. Table contains Edinburgh bikes borrowings from September 15th, 2018 until end June 2021.

edbikes_df[['started_at','ended_at']].sort_values('started_at')

Unnamed: 0_level_0,started_at,ended_at
index,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2018-09-15 08:52:05,2018-09-15 09:11:48
1,2018-09-15 09:24:33,2018-09-15 09:41:09
2,2018-09-15 09:48:54,2018-09-15 10:46:40
3,2018-09-16 12:01:36,2018-09-16 12:25:26
4,2018-09-16 12:03:43,2018-09-16 12:11:16
...,...,...
12636,2021-06-30 23:30:31,2021-07-01 00:06:10
12637,2021-06-30 23:36:16,2021-07-01 00:05:40
12638,2021-06-30 23:49:03,2021-07-01 00:11:25
12639,2021-06-30 23:49:03,2021-07-01 00:11:52


In [7]:
# Start stations. There are 198 stations at least once used for start.
start = edbikes_df[['start_station_id','start_station_name']].drop_duplicates(subset='start_station_id').rename(columns={'start_station_id':'station_id','start_station_name':'station_name'})
print('Start stations:')
print('\n',start)

Start stations:

        station_id              station_name
index                                      
0             247          Charlotte Square
1             259          St Andrew Square
2             262                Canonmills
3             255         Kings Buildings 4
5             253          Kings Building 2
...           ...                       ...
4659         1860     Ingliston Park & Ride
579          2259          Leith Walk North
15127        2263          Musselburgh Lidl
21035        2265  Musselburgh Brunton Hall
5853         2268              Picady Place

[198 rows x 2 columns]


In [9]:
# End stations. There are 199 stations at least once used to end the borrowing.
end = edbikes_df[['end_station_id','end_station_name']].drop_duplicates(subset='end_station_id').rename(columns={'end_station_id':'station_id','end_station_name':'station_name'})
print("End stations:")
print('\n',end)

End stations:

        station_id              station_name
index                                      
0             259          St Andrew Square
1             262                Canonmills
2             250             Victoria Quay
3             254          Kings Building 3
4             253          Kings Building 2
...           ...                       ...
12197        1859    Edinburgh Park Central
855          2259          Leith Walk North
14980        2263          Musselburgh Lidl
21035        2265  Musselburgh Brunton Hall
5806         2268              Picady Place

[199 rows x 2 columns]


In [13]:
# All stations. There are 200 stations all together at least once used to start or at least once to end.
stations = pd.concat([end,start]).drop_duplicates().sort_values('station_id')
pd.options.display.max_rows=200
print("All stations at least once used to start or once to end:")
print('\n',stations)

All stations at least once used to start or once to end:

        station_id                                               station_name
index                                                                       
27            171                                              George Square
48            183                                            Waverley Bridge
14            189                                              City Chambers
135           225                                             Waverley Court
1278          241                                                      Depot
1126          242                                              Virtual Depot
10            246                                    Royal Commonwealth Pool
28            247                                           Charlotte Square
12            248                                              Bristo Square
66            249                                             Fountainbridge
2             250

According to stations names some of them should be temporary: Launch Day Event, Royal Highland Show - East Gate (19th to 23rd June), Royal Highland Show - West Gate (19th to 23rd June), 
Ingliston Park and Ride (19th to 23rd June), Meadows - Edinburgh Climate Festival, Holyrood Park - Woman's Tour Of Scotland (Event 11/08/19), Pleasance - Edinburgh University Sports Fair,
HSBC UK Lets Ride - Meadows Event, Cycling Scotland Conference.

