# Import

In [1]:
import pandas as pd
import numpy as np

from robusta.metrics.pairwise import paired_distances
from robusta.preprocessing import ColumnGrouper, ColumnRenamer

# Data

In [2]:
PATH = './countries/countries.csv'

data = pd.read_csv(PATH)

In [3]:
data

Unnamed: 0,Code,Country,Latitude,Longitude
0,AD,Andorra,42.50,1.50
1,AE,United Arab Emirates,24.00,54.00
2,AF,Afghanistan,33.00,65.00
3,AG,Antigua and Barbuda,17.05,-61.80
4,AI,Anguilla,18.25,-63.17
...,...,...,...,...
235,YE,Yemen,15.00,48.00
236,YT,Mayotte,-12.83,45.17
237,ZA,South Africa,-29.00,24.00
238,ZM,Zambia,-15.00,30.00


# Random Lat/Lon pairs

In [4]:
features = ['Latitude', 'Longitude']

X = data.sample(100).reset_index(drop=True)
Y = data.sample(100).reset_index(drop=True)

X

Unnamed: 0,Code,Country,Latitude,Longitude
0,PS,Palestinian Territory,32.00,35.25
1,BZ,Belize,17.25,-88.75
2,LT,Lithuania,56.00,24.00
3,IT,Italy,42.83,12.83
4,VC,Saint Vincent and the Grenadines,13.25,-61.20
...,...,...,...,...
95,FJ,Fiji,-18.00,175.00
96,TM,Turkmenistan,40.00,60.00
97,CK,Cook Islands,-21.23,-159.77
98,SE,Sweden,62.00,15.00


In [5]:
Y

Unnamed: 0,Code,Country,Latitude,Longitude
0,NO,Norway,62.00,10.0
1,NG,Nigeria,10.00,8.0
2,CN,China,35.00,105.0
3,SK,Slovakia,48.67,19.5
4,HR,Croatia,45.17,15.5
...,...,...,...,...
95,AP,Asia/Pacific Region,35.00,105.0
96,KE,Kenya,1.00,38.0
97,VC,Saint Vincent and the Grenadines,13.25,-61.2
98,EG,Egypt,27.00,30.0


# Distances

## Manhattan

In [6]:
d1 = paired_distances(X[features], Y[features], 'manhattan')
d1 = pd.Series(d1)
d1

0      55.25
1     104.00
2     102.00
3      12.51
4     108.62
       ...  
95    123.00
96     61.00
97    133.05
98     50.00
99     51.00
Length: 100, dtype: float64

## Euclidean

In [7]:
d2 = paired_distances(X[features], Y[features], 'euclidean')
d2 = pd.Series(d2)
d2

0      39.211765
1      97.021261
2      83.677954
3       8.865354
4      83.076931
         ...    
95     87.800911
96     44.777226
97    104.426602
98     38.078866
99     42.953463
Length: 100, dtype: float64

## Radian

In [8]:
d3 = paired_distances(X[features], Y[features], 'radian')
d3 = pd.Series(d3)
d3

0      3793.785582
1     10387.238091
2      6320.822952
3       829.964749
4      7931.998005
          ...     
95     9441.397263
96     4875.421416
97    11412.571528
98     4048.712831
99     2450.403022
Length: 100, dtype: float64

# Distance Comparasion

In [17]:
D = pd.concat([d1, d2, d3], axis=1)
D = ColumnRenamer(['l1', 'l2', 'km']).fit_transform(D)

pd.concat([ColumnGrouper('COUNTRY1').fit_transform(X),
           ColumnGrouper('COUNTRY2').fit_transform(Y),
           ColumnGrouper('DISTANCE').fit_transform(D)], axis=1)

Unnamed: 0_level_0,COUNTRY1,COUNTRY1,COUNTRY1,COUNTRY1,COUNTRY2,COUNTRY2,COUNTRY2,COUNTRY2,DISTANCE,DISTANCE,DISTANCE
Unnamed: 0_level_1,Code,Country,Latitude,Longitude,Code,Country,Latitude,Longitude,l1,l2,km
0,PS,Palestinian Territory,32.00,35.25,NO,Norway,62.00,10.0,55.25,39.211765,3793.785582
1,BZ,Belize,17.25,-88.75,NG,Nigeria,10.00,8.0,104.00,97.021261,10387.238091
2,LT,Lithuania,56.00,24.00,CN,China,35.00,105.0,102.00,83.677954,6320.822952
3,IT,Italy,42.83,12.83,SK,Slovakia,48.67,19.5,12.51,8.865354,829.964749
4,VC,Saint Vincent and the Grenadines,13.25,-61.20,HR,Croatia,45.17,15.5,108.62,83.076931,7931.998005
...,...,...,...,...,...,...,...,...,...,...,...
95,FJ,Fiji,-18.00,175.00,AP,Asia/Pacific Region,35.00,105.0,123.00,87.800911,9441.397263
96,TM,Turkmenistan,40.00,60.00,KE,Kenya,1.00,38.0,61.00,44.777226,4875.421416
97,CK,Cook Islands,-21.23,-159.77,VC,Saint Vincent and the Grenadines,13.25,-61.2,133.05,104.426602,11412.571528
98,SE,Sweden,62.00,15.00,EG,Egypt,27.00,30.0,50.00,38.078866,4048.712831
