In [1]:
## import data manipulation packages
import pandas as pd
import numpy as np
from sklearn.neighbors import DistanceMetric
from math import radians

## import dataset "medium size" as variable 'city' and drop NaN
cities = pd.read_excel('data/worldcities.xlsx')
ct = cities.dropna(axis = 'rows', how = 'any')
## considero solo al di sopra dell'equatore
## ct = ct[ct.lat >= 0]

In [2]:
#add london Start (spostando lievemente la longitudine, così che sia più ad est di London)
#London_st = ct.loc[(ct['city'] == 'London') & (ct['iso3'] == 'GBR')]
#London_st['city']='London_st' 
#London_st['lng'] = London_st['lng'] + 0.2

In [3]:
## attacco la riga London_st al dataframe
#ct = ct.append(London_st)

In [4]:
## resetto l'index, che sia progressivo
ct = ct.reset_index()
print(ct)

      index          city    city_ascii      lat       lng      country iso2  \
0         0         Tokyo         Tokyo  35.6897  139.6922        Japan   JP   
1         1       Jakarta       Jakarta  -6.2146  106.8451    Indonesia   ID   
2         2         Delhi         Delhi  28.6600   77.2300        India   IN   
3         3        Mumbai        Mumbai  18.9667   72.8333        India   IN   
4         4        Manila        Manila  14.5958  120.9772  Philippines   PH   
...     ...           ...           ...      ...       ...          ...  ...   
6617  26058      Greytown      Greytown  10.9167  -83.7000    Nicaragua   NI   
6618  26061        Sayḩūt        Sayhut  15.2105   51.2454        Yemen   YE   
6619  26062    Río Cuarto    Rio Cuarto -33.1230  -64.3478    Argentina   AR   
6620  26063  La Esmeralda  La Esmeralda   3.1738  -65.5466    Venezuela   VE   
6621  26073       Hödrögö       Hodrogo  48.9664   96.7833     Mongolia   MN   

     iso3    admin_name  capital    pop

In [5]:
## concatenate iso2 and city to get unique id
ct['ID'] = ct['city'].map(str) + ct['iso2'].map(str)

In [6]:
## drop not usable columns
ct = ct.drop(['city_ascii', 'country', 'iso2', 'admin_name', 'capital', 'id'], axis = 1)

In [7]:
## droppo la colonna index duplicata
ct = ct.drop('index', axis = 1)

In [8]:
# population - dove la popolazione è > di 200,000 abitanti aggiungo peso 2
pop = np.where(ct['population'] < 200000 , 0 , 2 )
#same state  - dove lo stato è diverso aggiungo peso 2
i = ct['iso3'].to_numpy()
st = (i[:, None ] != i) * 2

In [9]:
print(st)

[[0 2 2 ... 2 2 2]
 [2 0 2 ... 2 2 2]
 [2 2 0 ... 2 2 2]
 ...
 [2 2 2 ... 0 2 2]
 [2 2 2 ... 2 0 2]
 [2 2 2 ... 2 2 0]]


In [10]:
# direction - lavoro sulla longitudine e prendo le longitudini ad "est", normalizzando le long negative aggiungendo 150
## forse dobbiamo aggiungere 180? visto che dovrebbe essere su 360 totale?

dr_x = np.where(ct['lng']>= 0 , ct['lng'] , (ct['lng'] + 180) + 180)
x = dr_x
drdf = (x[:, None ] < x)*1

In [11]:
print(x)

[139.6922 106.8451  77.23   ... 295.6522 294.4534  96.7833]


In [12]:
# metto i giusti indici alle matrici 
direction = pd.DataFrame(drdf, columns = ct['ID'], index = ct['ID'])
same_state = pd.DataFrame(st, columns = ct['ID'], index = ct['ID'])
population = pd.DataFrame(pop , index = ct['ID'])

In [13]:
print(direction)

ID              TokyoJP  JakartaID  DelhiIN  MumbaiIN  ManilaPH  ShanghaiCN  \
ID                                                                            
TokyoJP               0          0        0         0         0           0   
JakartaID             1          0        0         0         1           1   
DelhiIN               1          1        0         0         1           1   
MumbaiIN              1          1        1         0         1           1   
ManilaPH              1          0        0         0         0           1   
...                 ...        ...      ...       ...       ...         ...   
GreytownNI            0          0        0         0         0           0   
SayḩūtYE              1          1        1         1         1           1   
Río CuartoAR          0          0        0         0         0           0   
La EsmeraldaVE        0          0        0         0         0           0   
HödrögöMN             1          1        0         

In [14]:
## coversion of coordinated in degrees to radians
ct['lat'] = np.radians(ct['lat'])
ct['lng'] = np.radians(ct['lng'])

In [15]:
## get the scipy metrics
dist = DistanceMetric.get_metric('haversine')

In [16]:
## prepare the array to get in input for the distance calculation
ct[['lat','lng']].to_numpy()

array([[ 0.62290277,  2.43808883],
       [-0.10846523,  1.86479878],
       [ 0.50021136,  1.34791778],
       ...,
       [-0.57810541, -1.12308098],
       [ 0.05539326, -1.14400398],
       [ 0.8546249 ,  1.68918725]])

In [17]:
## pass pairwise function to get the distance / multiply for 6373 as earth sperical radius is 6373kms
dist.pairwise(ct [['lat','lng']].to_numpy())*6373

array([[    0.        ,  5787.90376514,  5835.24850156, ...,
        17802.44164785, 14963.27221178,  3763.88990049],
       [ 5787.90376514,     0.        ,  5011.26756513, ...,
        15548.07263324, 19112.74637668,  6215.36837225],
       [ 5835.24850156,  5011.26756513,     0.        , ...,
        16340.95395707, 14699.22965152,  2805.95968927],
       ...,
       [17802.44164785, 15548.07263324, 16340.95395707, ...,
            0.        ,  4039.25704802, 17665.78252814],
       [14963.27221178, 19112.74637668, 14699.22965152, ...,
         4039.25704802,     0.        , 13975.81579146],
       [ 3763.88990049,  6215.36837225,  2805.95968927, ...,
        17665.78252814, 13975.81579146,     0.        ]])

In [18]:
## create the distance matrix with cities in the indexes
distance = pd.DataFrame(dist.pairwise(ct[['lat','lng']].to_numpy())*6373, columns = ct['ID'], index = ct['ID'])

In [19]:
## identify 'London' ID arrival (se dovesse servire come indice)
ct.loc[(ct['city'] == 'London') & (ct['iso3'] == 'GBR')]

Unnamed: 0,city,lat,lng,iso3,population,ID
31,London,0.89897,-0.002225,GBR,10979000.0,LondonGB


In [20]:
## identify 'London_st' ID start (se dovesse servire come indice)
#ct.loc[(ct['city'] == 'London_st')]

In [21]:
## get starting line from distance matrix
#distance.loc[['London_stGB']]

In [22]:
from pandas import DataFrame

In [23]:
#create the graph 
graph = pd.DataFrame(np.zeros(shape=(len(ct),len(ct))) , columns = ct['ID'], index = ct['ID'])

## azzero tutti le combinazioni che non siano verso ovest
for i in distance :
    graph[i] = distance[i]*direction[i]
    
print(graph)

ID                  TokyoJP    JakartaID      DelhiIN     MumbaiIN  \
ID                                                                   
TokyoJP            0.000000     0.000000     0.000000     0.000000   
JakartaID       5787.903765     0.000000     0.000000     0.000000   
DelhiIN         5835.248502  5011.267565     0.000000     0.000000   
MumbaiIN        6736.368099  4662.287622  1166.997459     0.000000   
ManilaPH        2997.194340     0.000000     0.000000     0.000000   
...                     ...          ...          ...          ...   
GreytownNI         0.000000     0.000000     0.000000     0.000000   
SayḩūtYE        8894.127127  6573.440879  3059.249501  2331.235749   
Río CuartoAR       0.000000     0.000000     0.000000     0.000000   
La EsmeraldaVE     0.000000     0.000000     0.000000     0.000000   
HödrögöMN       3763.889900  6215.368372     0.000000     0.000000   

ID                 ManilaPH   ShanghaiCN   São PauloBR      SeoulKR  \
ID                

In [24]:
## forzo "LondonGB" ad ovest di "London_stGB" - questo andrà risolto
#graph.loc['London_stGB']['LondonGB'] = 0
print(graph)

ID                  TokyoJP    JakartaID      DelhiIN     MumbaiIN  \
ID                                                                   
TokyoJP            0.000000     0.000000     0.000000     0.000000   
JakartaID       5787.903765     0.000000     0.000000     0.000000   
DelhiIN         5835.248502  5011.267565     0.000000     0.000000   
MumbaiIN        6736.368099  4662.287622  1166.997459     0.000000   
ManilaPH        2997.194340     0.000000     0.000000     0.000000   
...                     ...          ...          ...          ...   
GreytownNI         0.000000     0.000000     0.000000     0.000000   
SayḩūtYE        8894.127127  6573.440879  3059.249501  2331.235749   
Río CuartoAR       0.000000     0.000000     0.000000     0.000000   
La EsmeraldaVE     0.000000     0.000000     0.000000     0.000000   
HödrögöMN       3763.889900  6215.368372     0.000000     0.000000   

ID                 ManilaPH   ShanghaiCN   São PauloBR      SeoulKR  \
ID                

In [25]:
## traspongo per lavorare sulle righe
graph = graph.T
print(graph)

ID                   TokyoJP     JakartaID       DelhiIN      MumbaiIN  \
ID                                                                       
TokyoJP             0.000000   5787.903765   5835.248502   6736.368099   
JakartaID           0.000000      0.000000   5011.267565   4662.287622   
DelhiIN             0.000000      0.000000      0.000000   1166.997459   
MumbaiIN            0.000000      0.000000      0.000000      0.000000   
ManilaPH            0.000000   2790.717691   4755.337989   5135.012933   
...                      ...           ...           ...           ...   
GreytownNI      13121.910379  18749.404488  15165.430715  15817.752584   
SayḩūtYE            0.000000      0.000000      0.000000      0.000000   
Río CuartoAR    17802.441648  15548.072633  16340.953957  15498.485119   
La EsmeraldaVE  14963.272212  19112.746377  14699.229652  14845.181740   
HödrögöMN           0.000000      0.000000   2805.959689   3965.565663   

ID                  ManilaPH    Shang

In [26]:
dis = graph.replace(0, 0)
dis

ID,TokyoJP,JakartaID,DelhiIN,MumbaiIN,ManilaPH,ShanghaiCN,São PauloBR,SeoulKR,Mexico CityMX,GuangzhouCN,...,LinxiCN,RodeoAR,GastreAR,TinogastaAR,TelsenAR,GreytownNI,SayḩūtYE,Río CuartoAR,La EsmeraldaVE,HödrögöMN
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TokyoJP,0.000000,5787.903765,5835.248502,6736.368099,2997.194340,1762.022389,0.0,1151.307887,0.000000,2903.800481,...,2042.104954,0.000000,0.000000,0.000000,0.000000,0.000000,8894.127127,0.0,0.000000,3763.889900
JakartaID,0.000000,0.000000,5011.267565,4662.287622,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,6573.440879,0.0,0.000000,6215.368372
DelhiIN,0.000000,0.000000,0.000000,1166.997459,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3059.249501,0.0,0.000000,0.000000
MumbaiIN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2331.235749,0.0,0.000000,0.000000
ManilaPH,0.000000,2790.717691,4755.337989,5135.012933,0.000000,0.000000,0.0,0.000000,0.000000,1248.624550,...,3229.094756,0.000000,0.000000,0.000000,0.000000,0.000000,7460.176624,0.0,0.000000,4413.827801
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GreytownNI,13121.910379,18749.404488,15165.430715,15817.752584,16090.269529,14624.981545,0.0,13749.352769,1906.564347,15808.268798,...,13578.384232,0.000000,0.000000,0.000000,0.000000,0.000000,14269.835575,0.0,0.000000,13360.413795
SayḩūtYE,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
Río CuartoAR,17802.441648,15548.072633,16340.953957,15498.485119,17891.554956,19432.233134,0.0,18879.695225,6915.216994,18885.313644,...,18846.857659,557.012514,1103.133020,641.220957,1054.985097,5310.978147,13292.341403,0.0,4039.257048,17665.782528
La EsmeraldaVE,14963.272212,19112.746377,14699.229652,14845.181740,17918.131316,16130.051721,0.0,15306.485745,4071.136107,17092.868130,...,14815.586234,3733.495868,5067.878714,3481.586365,5069.283104,2179.599062,12771.412015,0.0,0.000000,13975.815791


In [29]:
from pandas import DataFrame
from tqdm import tqdm

In [None]:
tqdm.pandas()

In [30]:
## popolare per criteri (più vicino +2, secondo più vicino +4, terzo più vicino +8, il resto 'no go')
dis = graph.replace(0, 0)

In [None]:
dis = distance.T.replace(0, 0)

In [None]:
dis = dis.replace(dis.progress_apply(lambda x: x[x > 0].min(axis=0)), 2)

In [None]:
dis = dis.replace(dis.progress_apply(lambda x: x[x > 0].min(axis=0)), 4)

In [None]:
dis = dis.replace(dis.progress_apply(lambda x: x[x > 0].min(axis=0)), 8)

In [None]:
#parte messa solo perchè l'utima riga sopra non mi va 
dis = dis.where((dis <= 8), 0) 
dis

In [None]:
print(dis)

In [None]:
for i in dis :
    graph[i] = (dis[i] + same_state[i] + pop) * dis[i] / dis[i]
    
graph

In [None]:
print(graph)

In [None]:
# parte messa solo perchè l'utima riga sopra non mi va 
graph = graph.where((graph > 1), 0) 
graph = graph.T # dataframe (la trasposta è per mettere i pesi sulle righe o sulle colonne)

In [None]:
## un tentativo con networkX (mi prende il peso minore però----)

import networkx as nx

In [None]:
## creo grafo con networkX direzionato da graph cheè matrice di adiacenza non simmetrica
D = nx.from_pandas_adjacency(graph, create_using = nx.DiGraph)

In [None]:
## visualizzo i nodi
D.nodes()

In [None]:
## link all'algoritmo usato
## https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.single_source_dijkstra.html#networkx.algorithms.shortest_paths.weighted.single_source_dijkstra
length, path = nx.single_source_dijkstra(D, source = 'LondonGB', target = 'LondonGB')
print(length)
print(path)

In [None]:
## visualizzo D grafo direzionato
nx.draw(D)

In [None]:
## se vogliamo altri formati
## gr_array = np.array(graph) ## array


In [None]:
## list
## gr_ls = gr_array.tolist()
## print(gr_ls)

In [None]:
## type(gr_ls)

In [None]:
# GR = nx.from_numpy_array(gr_array)
# GR.edges(data=True)

In [None]:
## GR.nodes()

In [None]:
## nx.shortest_path(GR, source = 0, target = 271)