In [3]:
import geopy.distance
import pandas as pd
import json

In [8]:
def is_in_center(center, radius, point):
    """
    Return if the point is inside the city or not.
    :param center: the coordinatse of the city center
    :param radius: the radius for the city center of the city. 
    :param point: the point we want to test
    """
    return int(geopy.distance.geodesic(center, [point['lat'], point['lon']]).km < radius)

In [9]:
# Load all our data
with open('radii.json', 'r') as f:
    radii = json.load(f)


with open('centers.json', 'r') as f:
    centers = json.load(f)

In [11]:
for city, center_coords in centers.items():
    # For each city, we load the name and the radius
    city = city.lower()
    radius = radii[city]
    # Load the stops information
    nodes_df = pd.read_csv(f'data/{city}/network_nodes.csv', sep=';')

    # Determine if each node is in the city center or not
    nodes_df['city_center'] = nodes_df.apply(lambda x: is_in_center(center_coords, radius, x), axis=1)
    
    print(f'City : {city}')
    print(nodes_df['city_center'].value_counts())
    print(nodes_df['city_center'].mean())
    print('================')

    # Save to disk
    nodes_df.to_csv(f'data/{city}/network_nodes_labeled.csv', index=False)

City : adelaide
0    5490
1    2058
Name: city_center, dtype: int64
0.2726550079491256
City : belfast
0    1565
1     352
Name: city_center, dtype: int64
0.18362023995826812
City : berlin
0    3841
1     760
Name: city_center, dtype: int64
0.16518148228645946
City : bordeaux
0    2900
1     535
Name: city_center, dtype: int64
0.15574963609898107
City : brisbane
0    7992
1    1653
Name: city_center, dtype: int64
0.1713841368584759
City : canberra
0    2307
1     457
Name: city_center, dtype: int64
0.16534008683068016
City : detroit
0    4984
1     699
Name: city_center, dtype: int64
0.12299841632940349
City : dublin
0    3994
1     577
Name: city_center, dtype: int64
0.12623058411726099
City : grenoble
0    1296
1     251
Name: city_center, dtype: int64
0.16224951519069167
City : helsinki
0    6071
1     915
Name: city_center, dtype: int64
0.13097623819066706
City : kuopio
0    470
1     79
Name: city_center, dtype: int64
0.14389799635701275
City : lisbon
0    6159
1     914
Name: city

## Graph properties generation

In [1]:
from utils import *
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import os
from functools import partial
%load_ext autoreload
%autoreload 2

In [2]:
graphs,nodes,city_names = get_all_city_graph("data")

In [3]:
final  = []
fcts = [ ("in_degree_distribution",nx.DiGraph.in_degree), ("out_degree_distribution",nx.DiGraph.out_degree), ("clustering", nx.clustering),("betweeness_centrality",partial(nx.betweenness_centrality,k=500)),("eigenvector_centrality",partial(nx.eigenvector_centrality,max_iter=500)), ("katz_centrality",nx.katz_centrality),("closeness_centrality",nx.closeness_centrality)]
for g, node in zip(graphs, nodes):
    di_graph_de_backup = nx.DiGraph()
    for u,v in g.edges():
        if not di_graph_de_backup.has_edge(u,v):
            di_graph_de_backup.add_edge(u, v, weight=1)

    for attr, fct in fcts:
        try: 
            node = add_attribute_to_name(node, attr, fct,g)
        except:
            node = add_attribute_to_name(node, attr, fct, di_graph_de_backup)

        if attr == fcts[-1][0]:
            final.append(node)

In [None]:
final_all = pd.concat(final)
final_all.to_csv(os.path.join("data","all_metrics.csv"))