# ETL file to convert the Heojson file into network files

In [1]:
try:
    import json 
    import csv
    import os
    from math import sin, cos, sqrt, atan2, radians

    import networkx as nx

    from folium import *

    import pandas as pd

    
except:
    %pip install folium
    %pip install pandas
    %pip install networkx

In [2]:
class ParisTransportation:
    @staticmethod
    def clear():
        if os.name == 'posix':
            os.system('clear')
        else:
            os.system('cls')

    @staticmethod
    def clean_data():
        nodefile = "vertex/vertex.csv"
        edgefile = "edge/edge.csv"
        jsonfile = "geojson/multigraph.geojson"
        graphmlfile = "graphml/multigraph.graphml"
        gmlfile = "gml/multigraph.gml"

        if not os.path.exists("edge"):
            os.makedirs("edge")
        if not os.path.exists("vertex"):
            os.makedirs("vertex")
        if not os.path.exists("gml"):
            os.makedirs("gml")
        
        with open(jsonfile, 'r') as jsfile:
            with open(nodefile, 'w+') as nodefiled:
                with open(edgefile, 'w+') as edgefiled:
                    node = csv.writer(nodefiled)
                    edge = csv.writer(edgefiled)
                    # Header
                    node.writerow(["# NodeID", "Lat", "Lon", "Layer"])
                    edge.writerow(["# EdgeID","Source NodeID","Target NodeID","Direction","Length","Layer","Name","degreeDual","Lat","Lon"])
                    for line in jsfile:
                        jsentry = json.loads(line)
                        #print(jsentry)
                        if jsentry['properties']['type'] == "node":
                            node.writerow([
                                    jsentry['_id']['$oid'], 
                                    jsentry['geometry']['coordinates'][0], 
                                    jsentry['geometry']['coordinates'][1], 
                                    jsentry['properties']['layer']])
                                    
                        elif jsentry['properties']['type'] == "edge":
                            if jsentry['properties']['name'].startswith("54"):
                                jsentry['properties']['name'] = "None"

                            if jsentry['properties'].get('length') is not None:
                                length = jsentry['properties']['length']
                            else:
                                length = "None"

                            if 'direction' in jsentry['properties']:
                                if jsentry['properties']['direction'] == "Double sens":
                                    direction = "TwoWay"
                                elif (jsentry['properties']['direction'] == "Sens inverse" 
                                    or jsentry['properties']['direction'] == "Sens unique"):
                                    direction = "OneWay"
                                    
                                edge.writerow([
                                    jsentry['properties']['mongo_org_id'],
                                    jsentry['properties']['mongo_dest_id'],
                                    jsentry['_id']['$oid'], 
                                    direction,
                                    length,
                                    jsentry['properties']['layer'],
                                    jsentry['properties']['name'],
                                    jsentry['properties']['degreeDual'],
                                    jsentry['geometry']['coordinates'][1][0],
                                    jsentry['geometry']['coordinates'][1][1]])
                            else:
                                edge.writerow([
                                    jsentry['properties']['mongo_org_id'],
                                    jsentry['properties']['mongo_dest_id'],
                                    jsentry['_id']['$oid'],
                                    "TwoWay",
                                    length, 
                                    jsentry['properties']['layer'], 
                                    jsentry['properties']['name'],
                                    jsentry['properties']['degreeDual'],
                                    jsentry['geometry']['coordinates'][1][0],
                                    jsentry['geometry']['coordinates'][1][1]])

        G = nx.DiGraph()
        H = nx.DiGraph()
        Q = nx.Graph()
        I = nx.Graph()
        O = nx.Graph()

        with open(nodefile, 'r') as node:
            reader = csv.reader(node)
            next(reader)
            for row in reader:
                if len(row) != 0:
                    lat = row[1]
                    lon = row[2]
                    ntype = row[3]
                    G.add_node(row[0], lat=lat, lon=lon, layer=ntype)
                    if ntype == "metro":
                        Q.add_node(row[0], lat=lat, lon=lon, layer=ntype)
                    elif ntype == "road":
                        H.add_node(row[0], lat=lat, lon=lon, layer=ntype)
                    elif ntype == "train":
                        I.add_node(row[0], lat=lat, lon=lon, layer=ntype)
                    elif ntype == "tram":
                        O.add_node(row[0], lat=lat, lon=lon, layer=ntype)

        # edge.writerow(["# EdgeID","Source NodeID","Target NodeID","Direction","Length","Layer","Name","degreeDual","Lat","Lon"])

        with open(edgefile, 'r') as node:
            reader = csv.reader(node)
            next(reader)
            for row in reader:
                if len(row) != 0:
                    if not G.has_node(row[2]):
                        G.add_node(row[2], lat=row[8], lon=row[9], layer=row[5])
                    G.add_edge(row[1], row[2], layer=row[5], length=row[4], name=row[6], degreeDual=row[7])

                    if row[3] == 'TwoWay':
                        G.add_edge(row[2], row[1], layer=row[5], length=row[4], name=row[6], degreeDual=row[7])

                        if row[5]=="metro":
                            Q.add_edge(row[2], row[1], layer=row[5], length=row[4], name=row[6], degreeDual=row[7])
                        elif row[5]=="road":
                            H.add_edge(row[2], row[1], layer=row[5], length=row[4], name=row[6], degreeDual=row[7])
                        elif row[5]=="train":
                            I.add_edge(row[2], row[1], layer=row[5], length=row[4], name=row[6], degreeDual=row[7])
                        elif row[5]=="tram":
                            O.add_edge(row[2], row[1], layer=row[5], length=row[4], name=row[6], degreeDual=row[7])


        nx.write_graphml(G, graphmlfile)
        nx.write_graphml(G, gmlfile)
        
        return(G, H, I, O, Q)

    def geoJSON(self):
        df = pd.read_json('geojson/multigraph.geojson', lines=True)
        id = pd.json_normalize(df["_id"])
        id = id.rename(columns={"$oid": "ID"})

        geom = pd.json_normalize(df["geometry"])
        geom = geom.rename(columns={"type": "LP"})

        prop = pd.json_normalize(df["properties"])

        new_df = pd.concat([id, geom, prop], axis=1)
        return(new_df)
    
    class MapMaker:
        def __init__(self, df):
            self.df = df
            self.only_point = df.loc[df['LP'] == "Point"]
            self.points = self.only_point.coordinates.tolist()
            self.center = (48.85654066902656, 2.349154275836)
            self.canvas = True,
            self.bounds = True,
            self.filename = "france.html"
            self.layerColor = {
                "road": "green",
                "train": "blue",
                "metro": "gray",
                "tram": "purple"
            }

        @staticmethod
        def calculate_distance(lon1, lat1, lon2, lat2, R=6357):
            lat1 = radians(lat1)
            lat2 = radians(lat2)
            lon1 = radians(lon1)
            lon2 = radians(lon2)

            dlon = lon2 - lon1
            dlat = lat2 - lat1

            a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
            c = 2 * atan2(sqrt(a), sqrt(1 - a))

            distance = R * c
            return distance
        
        def create_map(self):
            if not os.path.exists(self.filename):
                m = Map(location=[self.center[0], self.center[1]], 
                    tiles="CartoDB positron", 
                    min_zoom=7, 
                    zoom_start=9,
                    zoom_control=True, 
                    min_lat=42, 
                    max_lat=54, 
                    min_lon=-10, 
                    max_lon=14, 
                    max_bounds=self.bounds,
                    prefer_canvas=self.canvas)

                minimap = plugins.MiniMap()
                m.add_child(minimap)

                layers = self.only_point.layer.unique().tolist()

                fg = FeatureGroup(control=False, show=False)
                m.add_child(fg)

                f1 = plugins.FeatureGroupSubGroup(fg, layers[0].capitalize())
                m.add_child(f1)

                f2 = plugins.FeatureGroupSubGroup(fg, layers[1].capitalize())
                m.add_child(f2)

                f3 = plugins.FeatureGroupSubGroup(fg, layers[2].capitalize())
                m.add_child(f3)

                f4 = plugins.FeatureGroupSubGroup(fg, layers[3].capitalize())
                m.add_child(f4)

                i = 0
                layerTypes = self.only_point.layer.tolist()

                for lon, lat in self.points:

                    distance = self.calculate_distance(lon, lat, self.center[1], self.center[0])
                    radiusDistance = distance

                    if int(distance) > 0:
                        distance = int(distance)
                        distance = f"{distance} km"
                    else:
                        distance = distance * 1000
                        distance = int(distance)
                        distance = f"{distance} mt"

                    circle = Circle(
                        location=(lat, lon),
                        tooltip=f"<strong>Type:</strong> {layerTypes[i]}<br><strong>Center distance:</strong> {distance}",
                        radius=sqrt(radiusDistance * 1000),
                        popup=f"<strong>Type:</strong> {layerTypes[i]}<br><strong>Lat:</strong> {lat}<br><strong>Long:</strong> {lon}<br><strong>Center distance:</strong> {distance}",
                        color=self.layerColor.get(layerTypes[i]),
                        fill=False,
                        fill_color=self.layerColor.get(layerTypes[i])
                    )
                    
                    if layerTypes[i] == layers[0]:
                        f1.add_child(circle)
                    elif layerTypes[i] == layers[1]:
                        f2.add_child(circle)
                    elif layerTypes[i] == layers[2]:
                        f3.add_child(circle)
                    elif layerTypes[i] == layers[3]:
                        f4.add_child(circle)

                    i +=1

                paris = Circle(
                    location=(self.center[0], self.center[1]),
                    tooltip="Center",
                    popup="",
                    radius=80,
                    color="crimson",
                    fill=True,
                    fill_color="crimson"
                )
                paris.add_to(fg)

                plugins.Fullscreen(
                        position="topright",
                        title="Fullscreen",
                        title_cancel="Exit fullscreen",
                        force_separate_button=True,
                    ).add_to(m)

                LayerControl().add_to(m)
                m.save(self.filename)

In [3]:
G, ROAD, TRAIN, TRAM, METRO = ParisTransportation().clean_data()
df = ParisTransportation().geoJSON()
ParisTransportation().MapMaker(df).create_map()

In [4]:
summary_table = pd.DataFrame({
    "Nodes" : [
        len(METRO.nodes), 
        len(TRAIN.nodes),
        len(TRAM.nodes),
        len(ROAD.nodes)],
    "Edges" : [
        len(METRO.edges), 
        len(TRAIN.edges),
        len(TRAM.edges),
        len(ROAD.edges)],
    "Degree" : [
        sum(dict(METRO.degree()).values())/float(len(METRO)),
        sum(dict(TRAIN.degree()).values())/float(len(TRAIN)),
        sum(dict(TRAM.degree()).values())/float(len(TRAM)),
        sum(dict(ROAD.degree()).values())/float(len(ROAD))
    ],
    "Reference" : ["OSM", "OSM", "OSM", "IGN"]
}, index=["Metro", "Train", "Tram", "Road"])
summary_table

Unnamed: 0,Nodes,Edges,Degree,Reference
Metro,670,367,1.095522,OSM
Train,487,246,1.010267,OSM
Tram,286,140,0.979021,OSM
Road,34733,19926,1.147381,IGN


In [None]:
#temp = df.loc[df['LP'] == "Point"]
df.groupby(['ID', 'layer']).size().reset_index().groupby('layer')[[0]].max()
#df.loc[(df['layer'] == "train") & (df['LP'] == "Point")]
df.duplicated(subset="ID").value_counts()

In [None]:
import matplotlib.pyplot as plt
hist = nx.degree_histogram(G)
plt.plot(range(0, len(hist)), hist, ".")
plt.loglog()
plt.show()

In [None]:
degree_sequence = [G.degree(n) for n in G.nodes]
counts, bins, patches = plt.hist(degree_sequence, bins=100)

In [None]:
B = nx.betweenness_centrality(G, normalized=True)
betweenness_sequence = list(B.values())
counts, bins, patches = plt.hist(betweenness_sequence, bins=10)

In [None]:
from empiricaldist import Cdf
degrees = [G.degree(u) for u in G]
cdf = Cdf.from_seq(degrees, name="distribution")
(1-cdf).plot()

In [None]:
def degree_histogram_directed(G, in_degree=False, out_degree=False):
    """Return a list of the frequency of each degree value.

    Parameters
    ----------
    G : Networkx graph
       A graph
    in_degree : bool
    out_degree : bool

    Returns
    -------
    hist : list
       A list of frequencies of degrees.
       The degree values are the index in the list.

    Notes
    -----
    Note: the bins are width one, hence len(list) can be large
    (Order(number_of_edges))
    """
    nodes = G.nodes()
    if in_degree:
        in_degree = dict(G.in_degree())
        degseq=[in_degree.get(k,0) for k in nodes]
    elif out_degree:
        out_degree = dict(G.out_degree())
        degseq=[out_degree.get(k,0) for k in nodes]
    else:
        degseq=[v for k, v in G.degree()]
    dmax=max(degseq)+1
    freq= [ 0 for d in range(dmax) ]
    for d in degseq:
        freq[d] += 1
    return freq

In [None]:
in_degree_freq = degree_histogram_directed(G, in_degree=True)
out_degree_freq = degree_histogram_directed(G, out_degree=True)
degrees = range(len(in_degree_freq))
plt.figure(figsize=(12, 8)) 
plt.loglog(range(len(in_degree_freq)), in_degree_freq, 'go-', label='in-degree') 
plt.loglog(range(len(out_degree_freq)), out_degree_freq, 'bo-', label='out-degree')
plt.xlabel('Degree')
plt.ylabel('Frequency')

In [None]:
print(len(G.nodes()))
print(len(H.nodes()))
print(len(I.nodes()))
print(len(O.nodes()))
print(len(Q.nodes()))

In [None]:
print(len(G.nodes))
print(len(H.nodes))
print(len(G.nodes))
print(len(G.nodes))
print(len(G.nodes))
#temp = df[df.LP == "Point"]
#temp

In [None]:
df.layer.unique().tolist()

In [4]:
#def exportEdges(G, layers=['road','tram','train','metro']):
#    nx.write_edgelist

nx.to_dict_of_dicts(G)

{'5453b63355474a3362317270': {'54b7bef755474a2bb2745109': {'layer': 'road',
   'length': '0.41',
   'name': 'D137',
   'degreeDual': '57'}},
 '5453b63355474a3362317271': {'54b7bef855474a2bb27479db': {'layer': 'road',
   'length': '3.61',
   'name': 'None',
   'degreeDual': '4'},
  '54b7bef855474a2bb27485d1': {'layer': 'road',
   'length': '0.35',
   'name': 'None',
   'degreeDual': '1'}},
 '5453b63355474a3362317272': {'54b7bef755474a2bb2745729': {'layer': 'road',
   'length': '1.85',
   'name': 'D125',
   'degreeDual': '56'},
  '54b7bef855474a2bb274a0f8': {'layer': 'road',
   'length': '2.6',
   'name': 'D124',
   'degreeDual': '33'}},
 '5453b63355474a3362317273': {'54b7bef855474a2bb2748d5e': {'layer': 'road',
   'length': '1.13',
   'name': 'D915Z',
   'degreeDual': '12'}},
 '5453b63355474a3362317274': {'54b7bef755474a2bb2745896': {'layer': 'road',
   'length': '1.47',
   'name': 'D922',
   'degreeDual': '142'}},
 '5453b63355474a3362317275': {},
 '5453b63355474a3362317276': {'54b7bef8

In [None]:
from metro import multiplex as mx

In [None]:
m = mx.multiplex(layers=['metro','train','tram','road'], G=G)
m.summary()

In [None]:
m.as_graph()

In [None]:
for n in G.node:
    if G.node[n].get('layer') is None:
        G.node[n]['lat'] = None
        G.node[n]['lon'] = None
        G.node[n]['layer'] = ''
print(i)

In [4]:
# '5453b63355474a3362317270': {'lat': '3.5372355545105947', 'lon': '48.231939820695146', 'layer': 'road'}
# ...
# '5453b63355474a3362317270': {}
allNodes = dict(G.nodes())
for k, v in allNodes.items():
    print(k, v)

5453b63355474a3362317270 {'lat': '3.5372355545105947', 'lon': '48.231939820695146', 'layer': 'road'}
5453b63355474a3362317271 {'lat': '2.3955539609513337', 'lon': '49.049663371625094', 'layer': 'road'}
5453b63355474a3362317272 {'lat': '1.6563044786850607', 'lon': '49.05187906283865', 'layer': 'road'}
5453b63355474a3362317273 {'lat': '2.091532960687826', 'lon': '49.05588679019185', 'layer': 'road'}
5453b63355474a3362317274 {'lat': '2.146181135828161', 'lon': '49.05648991168742', 'layer': 'road'}
5453b63355474a3362317275 {'lat': '3.710933182434043', 'lon': '49.048990443259925', 'layer': 'road'}
5453b63355474a3362317276 {'lat': '1.9323217624796667', 'lon': '48.23040769636736', 'layer': 'road'}
5453b63355474a3362317277 {'lat': '3.622408227418039', 'lon': '48.23356000535538', 'layer': 'road'}
5453b63355474a3362317278 {'lat': '2.025541741445847', 'lon': '49.055324696730594', 'layer': 'road'}
5453b63355474a3362317279 {'lat': '2.1062584347872835', 'lon': '48.231819605304494', 'layer': 'road'}


In [7]:
.layer.value_counts()

road          37164
crosslayer     4071
metro           670
train           487
tram            286
Name: layer, dtype: int64

In [9]:
sources = []
dests = []
values = []

for source, dest in dict(G.edges()):
    sources.append(source)
    dests.append(dest)

for value in dict(G.edges()).items():
    values.append(value[1])

finalDF = pd.DataFrame({
    'source' : sources,
    'destination' : dests,
    'valori' : values
})
pd.concat([finalDF.drop(['valori'], axis=1), finalDF['valori'].apply(pd.Series)], axis=1).layer.value_counts()

road          42283
crosslayer     8142
metro           734
train           492
tram            280
Name: layer, dtype: int64