In [None]:
## import data manipulation packages for data cleaning and distance calculation
import pandas as pd
import numpy as np
from sklearn.neighbors import DistanceMetric
from math import radians

In [None]:
## DATA CLEANING AND PREPARATION
## import dataset as variable 'city' and drop NaN
cities = pd.read_excel('worldcities.xlsx')
ct = cities.dropna(axis = 'rows', how = 'any')

In [None]:
## add london starting point as 'London_st' slights on the right (to facilitate the assignment resolution)
London_st = ct.loc[(ct['city'] == 'London') & (ct['iso3'] == 'GBR')]
London_st['city']='London_st' 
London_st['lng'] = London_st['lng'] + 0.2
ct = ct.append(London_st)

In [None]:
## resetting index after append
ct = ct.reset_index()

In [None]:
## concatenate iso2 and city to get unique id
ct['ID'] = ct['city'].map(str) + ct['iso2'].map(str)

In [None]:
## drop not usable columns
ct = ct.drop(['city_ascii', 'country', 'iso2', 'admin_name', 'capital', 'id'], axis = 1)
ct = ct.drop('index', axis = 1)

In [None]:
## identifying location of 'London_st' to be used later as 'source'
source = ct.loc[(ct['city'] == 'London_st')]

In [None]:
## identifying location of 'London' to be used later as 'target'
target = ct.loc[(ct['city'] == 'London') & (ct['iso3'] == 'GBR')]

In [None]:
## GETTING WEIGHTS - part I
## population weights '+2', where population > 200000
pop = np.where(ct['population'] < 200000 , 0, 2)

In [None]:
## same state weights '+2', where 'iso3' is different
i = ct['iso3'].to_numpy()
st = (i[:, None ] != i) * 2

In [None]:
## GETTING DIRECTION - getting an array comparing longitudes (0 if a city is west the other, 1 if a city is east)
## to get all positive longitudes we need to rescale from -180/+180 to 0/360 scale, where London is approx 0
dr_x = np.where(ct['lng']>= 0 , ct['lng'] , (ct['lng'] + 180) + 180)
x = dr_x
dr = (x[:, None] < x) * 1

In [None]:
## computing big distances (>60 degrees) as a '0' (no go area) to get the final matrix less 'heavy' to be handled
rang = (x[: , None] < x + 60 ) * 1

In [None]:
## QUESTO NON SERVE GIUSTO?
## dir_test = pd.DataFrame(dr*rang.T, columns = ct['ID'], index = ct['ID'])
## dir_test

In [None]:
## creating 3 dataframes with direction, same state and population weights
direction = pd.DataFrame(dr*rang.T, columns = ct['ID'], index = ct['ID'])
same_state = pd.DataFrame(st, columns = ct['ID'], index = ct['ID'])
population = pd.DataFrame(pop , index = ct['ID'])

In [None]:
## DISTANCE COMPUTATION - 'Harvesine'
## the earth is spheric, so a specific calculation ('Harvesine distance') is required to get the distance from places
ct['lat'] = np.radians(ct['lat'])
ct['lng'] = np.radians(ct['lng'])

In [None]:
## retrieve the 'harvesine' metric from scipy
dist = DistanceMetric.get_metric('haversine')

In [None]:
## calculating the pairwise distance between cities multiplying *6373 to get kms
## get a smaller size object by getting distance only if direction is 'east' (value 1 in 'direction' dataframe)
D = np.where(direction > 0, dist.pairwise(ct [['lat','lng']].to_numpy())*6373 , 0)
## create the distance matrix with cities in the indexes
distance = pd.DataFrame(D.T, columns = ct['ID'], index = ct['ID'])

In [None]:
## view matrix of distance

In [None]:
## QUESTO NON SERVE GIUSTO?
## distance.loc['London_stGB'].sum()

In [None]:
## secondo me questo è già risolto con import pandas as pd no?
## from pandas import DataFrame

In [None]:
## GETTING WEIGHTS - part II
## utilising the matrix of distance called 'distance' (which contains already directions)
## populate 'dis' with weights: '+2' if closest, '4' if second closest, '8' if third closest
## the rest of distances as '0', meaning 'no go'
dis = distance.T.replace(0, 0)
dis = dis.replace(dis.apply(lambda x: x[x > 0].min(axis=0)), 2)
dis = dis.replace(dis.apply(lambda x: x[x > 2].min(axis=0)), 4)
dis = dis.replace(dis.apply(lambda x: x[x > 4].min(axis=0)), 8)
dis = dis.where((dis <= 8), 0) 
dis

In [None]:
## SUMMING THE TOTAL WEIGHTS
## sum of dataframes: 'dis', 'same_state' and 'population' to get final weights
graph =((dis + same_state + pop.T) * dis / dis)
graph = graph.where((graph > 1), 0) 
graph

In [None]:
## preparation of final dataframe as array for 'NetworkX' 
gr_array = np.array(graph)
gr_array

In [None]:
## SHORTEST PATH ALGORITHM aka Dijkstra's algorithm
## import NetworkX
import networkx as nx

In [None]:
## convert the numpy array to GRAPH data structure, with has nodes (cities) and edges (weights between nodes)
## zeros are not taken into account, so the direction is taken into account in the built array
GR = nx.from_numpy_array(gr_array)

In [None]:
## edges visualization (optional)
GR.edges(data=True)

In [None]:
## nodes visualization (optional)
GR.nodes()

In [None]:
## retrieve location of 'London_st' as source and 'London' as origin
print(source)
print(target)

In [None]:
## using networkx.single_source_dijkstra()
## the command computes shortest paths and lengths in a weighted graph G
## it returns a tuple containing the 'length' of the shortest path, and the 'path' itself
length, path = nx.single_source_dijkstra(GR, 6622, 31)
print(length, path)

In [None]:
## get the names of the 'path' retrieving from 'ct' original object
ct.loc[path, 'city']

In [None]:
## quanti giorni per fare il giro del mondo?
days_to_london = length * 0.041667
days_to_london

In [None]:
## draw the graph (drop if too long to compute)
nx.draw(GR)

In [None]:
##crea il dataframe con i dati del percorso partendo da cities perchè in ct le lat e lng sono state sovrascritte dai calcoli precedenti
lista=cities.loc[path]

In [None]:
#non so se necessario ma ordina le città 
lista = lista.reset_index()

In [None]:
#import delle librerie per i grafici
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [None]:
##crea il primo grafico con le traiettorie tra le città sulla base della mappa mondiale
fig = go.Figure(data=go.Scattergeo(lat = lista['lat'],lon =lista['lng'],mode = 'lines',line = dict(width = 2, color = 'blue'),))

In [None]:
##aggiorna il grafico aggiungendo i marker per le città visitate con nome della città se selezionate con mouse
fig.add_trace(go.Scattergeo(
    locationmode = 'country names',
    lon = lista['lng'],
    lat = lista['lat'],
    hoverinfo = 'text',
    text = lista['city'],
    name = "Cities",
    mode = 'markers',
    marker = dict(
        size = 6,
        color = 'rgb(102,102,102)',
        line = dict(
            width = 3,
            color = 'rgba(68, 68, 68, 0)'
        )
    )))
fig.show()