In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics import DistanceMetric
from math import radians

In [3]:
df_bikes = pd.read_csv('edinburgh_bikes.csv')
df_bikes = df_bikes.drop(columns=['Unnamed: 0'])
df_bikes

Unnamed: 0,index,started_at,ended_at,duration,start_station_id,start_station_name,start_station_description,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_description,end_station_latitude,end_station_longitude
0,0,2018-09-15 08:52:05,2018-09-15 09:11:48,1182,247,Charlotte Square,North Corner of Charlotte Square,55.952335,-3.207101,259,St Andrew Square,North East corner,55.954728,-3.192653
1,1,2018-09-15 09:24:33,2018-09-15 09:41:09,995,259,St Andrew Square,North East corner,55.954749,-3.192774,262,Canonmills,near Tesco's,55.962804,-3.196284
2,2,2018-09-15 09:48:54,2018-09-15 10:46:40,3466,262,Canonmills,near Tesco's,55.962804,-3.196284,250,Victoria Quay,Entrance to Scottish Government Office,55.977638,-3.174116
3,3,2018-09-16 12:01:36,2018-09-16 12:25:26,1430,255,Kings Buildings 4,X-Y Cafe,55.922001,-3.176902,254,Kings Building 3,Kings Building House,55.923479,-3.175385
4,4,2018-09-16 12:03:43,2018-09-16 12:11:16,452,255,Kings Buildings 4,X-Y Cafe,55.922001,-3.176902,253,Kings Building 2,Sanderson Building,55.923202,-3.171646
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
438254,12636,2021-06-30 23:30:31,2021-07-01 00:06:10,2139,1090,Hillside Crescent,East end of Hillside Crescent,55.957872,-3.175888,1728,Portobello - Kings Road,Foot of Kings Road next to the promenade,55.957915,-3.118332
438255,12637,2021-06-30 23:36:16,2021-07-01 00:05:40,1763,1814,Abbeyhill,Near Abbey Mount,55.955248,-3.172216,1728,Portobello - Kings Road,Foot of Kings Road next to the promenade,55.957915,-3.118332
438256,12638,2021-06-30 23:49:03,2021-07-01 00:11:25,1342,256,St Andrews House,beside Jacobs ladder,55.953164,-3.181682,1091,Holyrood Road,Opposite St Leonards Land,55.949560,-3.180413
438257,12639,2021-06-30 23:49:03,2021-07-01 00:11:52,1369,256,St Andrews House,beside Jacobs ladder,55.953164,-3.181682,1091,Holyrood Road,Opposite St Leonards Land,55.949560,-3.180413


In [22]:
#Get the list of all stations and their coordinates
df_stations = df_bikes[
    ['start_station_name', 
     'start_station_latitude', 
     'start_station_longitude']
    ].drop_duplicates()

# Convert the Lat/Long degress in Radians
df_stations['start_station_latitude'] = np.radians(df_stations['start_station_latitude'])
df_stations['start_station_longitude'] = np.radians(df_stations['start_station_longitude'])

# Scipy get_metrics()
dist = DistanceMetric.get_metric('haversine')

# Scipy Pairwise()
df_stations[['start_station_latitude','start_station_longitude']].to_numpy()

# dist.pairwise(df_stations [['start_station_latitude','start_station_longitude']].to_numpy())*6373

distances = pd.DataFrame(
    dist.pairwise(
        df_stations[
            ['start_station_latitude',
            'start_station_longitude']
            ].to_numpy())*6373000,  
            columns=df_stations.start_station_name, 
            index=df_stations.start_station_name).astype(int)

distances

start_station_name,Charlotte Square,St Andrew Square,Canonmills,Kings Buildings 4,Kings Building 2,Kings Buildings 1,Kings Building 3,Pollock Halls,Royal Commonwealth Pool,Meadows East,...,Waverley Bridge,Western General Hospital,Leith Walk North,Pleasance Courtyard,Musselburgh Lidl,Musselburgh Brunton Hall,Musselburgh Brunton Hall,Picardy Place,Leith Walk,Edinburgh Royal Infirmary
start_station_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Charlotte Square,0,931,1345,3863,3921,3754,3769,2605,2543,2060,...,948,2049,2712,1705,8791,9301,9313,1380,2397,5564
St Andrew Square,931,0,922,3774,3747,3598,3642,2108,2108,1775,...,312,2728,1890,1085,7941,8447,8460,452,1553,5087
Canonmills,1345,922,0,4696,4664,4517,4563,2969,2991,2693,...,1234,2356,1523,1947,8336,8831,8844,936,1279,5908
Kings Buildings 4,3863,3774,4696,0,353,309,189,2025,1899,2013,...,3462,5774,5111,2847,7281,7773,7782,3885,4802,2489
Kings Building 2,3921,3747,4664,353,0,174,235,1864,1762,1972,...,3440,5881,4975,2765,6928,7420,7429,3817,4677,2170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Musselburgh Brunton Hall,9301,8447,8831,7773,7420,7516,7632,7059,7212,7753,...,8355,11136,7644,7647,514,0,12,8077,7697,5500
Musselburgh Brunton Hall,9313,8460,8844,7782,7429,7526,7642,7070,7223,7764,...,8367,11149,7657,7659,526,12,0,8089,7710,5508
Picardy Place,1380,452,936,3885,3817,3680,3738,2057,2096,1873,...,616,3063,1491,1055,7573,8077,8089,0,1147,4973
Leith Walk,2397,1553,1279,4802,4677,4561,4638,2818,2914,2850,...,1763,3617,345,1991,7214,7697,7710,1147,0,5451


In [24]:
distances = distances.loc[:,~distances.columns.duplicated()]
distances

start_station_name,Charlotte Square,St Andrew Square,Canonmills,Kings Buildings 4,Kings Building 2,Kings Buildings 1,Kings Building 3,Pollock Halls,Royal Commonwealth Pool,Meadows East,...,Forth Bridge Visitors Centre,Hawes Pier,Scotstoun House,Tesco Ferrymuir,Port Edgar Marina,Ingliston Park & Ride,Leith Walk North,Musselburgh Lidl,Musselburgh Brunton Hall,Picardy Place
start_station_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Charlotte Square,0,931,1345,3863,3921,3754,3769,2605,2543,2060,...,12858,11894,12079,12587,13246,9368,2712,8791,9301,1380
St Andrew Square,931,0,922,3774,3747,3598,3642,2108,2108,1775,...,13635,12644,12877,13378,14003,10293,1890,7941,8447,452
Canonmills,1345,922,0,4696,4664,4517,4563,2969,2991,2693,...,13207,12179,12486,12975,13545,10272,1523,8336,8831,936
Kings Buildings 4,3863,3774,4696,0,353,309,189,2025,1899,2013,...,15906,15065,15045,15574,16366,11287,5111,7281,7773,3885
Kings Building 2,3921,3747,4664,353,0,174,235,1864,1762,1972,...,16138,15282,15283,15811,16590,11589,4975,6928,7420,3817
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Musselburgh Brunton Hall,9301,8447,8831,7773,7420,7516,7632,7059,7212,7753,...,22037,21009,21304,21798,22376,18514,7644,514,0,8077
Musselburgh Brunton Hall,9313,8460,8844,7782,7429,7526,7642,7070,7223,7764,...,22049,21022,21317,21811,22389,18525,7657,526,12,8089
Picardy Place,1380,452,936,3885,3817,3680,3738,2057,2096,1873,...,13976,12971,13230,13727,14333,10728,1491,7573,8077,0
Leith Walk,2397,1553,1279,4802,4677,4561,4638,2818,2914,2850,...,14384,13334,13685,14166,14702,11549,345,7214,7697,1147


In [29]:
# DataFrame with start and end stations latitudes and longitudes
df_start_end_stations = df_bikes[
    ['start_station_name', 
     'start_station_latitude', 
     'start_station_longitude',
    'end_station_name', 
     'end_station_latitude', 
     'end_station_longitude']
    ].drop_duplicates()


# Let’s create a haversine function using numpy
def haversine_vectorize(lon1, lat1, lon2, lat2):

    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    newlon = lon2 - lon1
    newlat = lat2 - lat1

    haver_formula = np.sin(newlat/2.0)**2 + np.cos(lat1) \
                    * np.cos(lat2) * np.sin(newlon/2.0)**2

    dist = 2 * np.arcsin(np.sqrt(haver_formula ))
    km = 6367000 * dist #6367 for distance in KM (6367000 in metres) for miles use 3958
    return km


# Let’s calculate the haversine distance between origin and destination city using numpy vectorize haversine function
haversine_vectorize(df_start_end_stations['start_station_longitude'],
                    df_start_end_stations['start_station_latitude'],
                    df_start_end_stations['end_station_longitude'], 
                    df_start_end_stations['end_station_latitude']
                   )


# Let’s create a new column called 'start-end-distance-metres' and add to the original dataframe
df_start_end_stations['distance-between-them-in-metres'] = haversine_vectorize(df_start_end_stations['start_station_longitude'],
                                                                               df_start_end_stations['start_station_latitude'],
                                                                               df_start_end_stations['end_station_longitude'], 
                                                                               df_start_end_stations['end_station_latitude']
                                                                              ).astype(int)

# Show new DataFrame
df_start_end_stations

IndentationError: unexpected indent (Temp/ipykernel_33292/1787129495.py, line 21)