# EMT LINES AND STOPS NETWORK GRAPH

In [101]:
import networkx as nx

import pandas as pd

import numpy as np
from numpy import pi, sin, cos

import json

import datetime
from datetime import timedelta

import statistics
from statistics import mean
import math

from pandarallel import pandarallel
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
pandarallel.initialize()

import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px

import branca.colormap as cm
from colour import Color

pio.templates.default = 'plotly_white'

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [3]:
#Available colors
colors = [
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
]

In [4]:
#Token and styles for the mapbox api
mapbox_access_token = 'pk.eyJ1IjoiYWxlanAxOTk4IiwiYSI6ImNrNnFwMmM0dDE2OHYzZXFwazZiZTdmbGcifQ.k5qPtvMgar7i9cbQx1fP0w'
style_day = 'mapbox://styles/alejp1998/ck6z9mohb25ni1iod4sqvqa0d'

In [188]:
def gen_graph(G):
    N = G.number_of_nodes()
    V = G.number_of_edges()
    
    #pos=nx.spring_layout(G)
    
    Xv=[G.nodes[k]['coords'][0] for k in G.nodes()]
    Yv=[G.nodes[k]['coords'][1] for k in G.nodes()]
    
    center_x = mean(Xv)
    center_y = mean(Yv)
    
    edge_nodes,Xed,Yed,Wed,Led,Ted=[],[],[],[],[],[]
    for edge in G.edges:
        edge_nodes.append((edge[0],edge[1]))
        Xed.append([G.nodes[edge[0]]['coords'][0],G.nodes[edge[1]]['coords'][0]])
        Yed.append([G.nodes[edge[0]]['coords'][1],G.nodes[edge[1]]['coords'][1]])
        Wed+=[G.edges[edge]['weight']]
        Led+=[G.edges[edge]['lines']]
        Ted+=[G.edges[edge]['time']]
    
    cm = list(Color('lightblue').range_to(Color('darkblue'), max(Ted)+1))
    cm = ['rgb' + str(x.rgb) for x in cm]
    
    max_weight = max(Wed)
    
    line_traces = []
    for i in range(len(Wed)) :
        Xed_orig,Yed_orig = Xed[i],Yed[i]
        Xed_new,Yed_new = [],[]
        N = 10
        for k in range(N+1):
            Xed_new.append(Xed_orig[0]+(Xed_orig[1]-Xed_orig[0])*k/N)
            Yed_new.append(Yed_orig[0]+(Yed_orig[1]-Yed_orig[0])*k/N)
            
        line_trace=go.Scatter(
            x=Xed_new,
            y=Yed_new,
            mode='lines',
            line=dict(
                width=1+(Wed[i]/max_weight)*5,
                color=cm[Ted[i]] if Ted[i]!= 0 else 'purple'
            ),
            opacity=0.95,
            hoverinfo='text',
            text='Orig Stop: '+str(edge_nodes[i][0])+'<br>'+ \
                'Dest Stop: '+str(edge_nodes[i][1])+'<br>'+ \
                'Lines: '+str(Led[i])+'<br>'+ \
                'Time: '+str(Ted[i])+'s<br>'+ \
                'Weight: '+str(Wed[i])+'<br>'
        )
        line_traces.append(line_trace)
    
    trace4=go.Scatter(
        x=Xv,
        y=Yv,
        mode='markers',
        name='net',
        marker=dict(
            symbol='circle-dot',
            size=[G.out_degree(k,weight='weight')*2+3 for k in G.nodes()],
            color=colors[2],
            line=dict(
                color=['black'],
                width=1
            ),
            opacity=0.95
        ),
        text=['<b>[' + str(node) + '] ' + str(G.nodes[node]['name']) +  '</b>'\
              '<br>Out Degree: ' + str(G.out_degree(node,weight='weight')) + \
              '<br>In Degree: ' + str(G.in_degree(node,weight='weight')) + \
              '<br>Lines: ' + str(G.nodes[node]['lines']) \
              for node in G.nodes()],
        hoverinfo='text'
    )
    layout = go.Layout(
        title="<b>STOPS NETWORK GRAPH",
        showlegend=False,
        margin=dict(r=0, l=0, t=30, b=0),
        xaxis = {
            'showgrid':False,
            'visible':False
        },
        yaxis = {
            'showgrid':False,
            'showline':False,
            'zeroline':False,
            'visible':False
        },
        annotations=[
            dict(
                ax=(Xed[i][0] + Xed[i][1]) / 2,
                ay=(Yed[i][0] + Yed[i][1]) / 2, axref='x', ayref='y',
                x=(Xed[i][0] * 3 + Xed[i][1]) / 4,
                y=(Yed[i][0] * 3 + Yed[i][1]) / 4, xref='x', yref='y',
                showarrow=True,
                arrowhead=1+int((Wed[i]/max_weight))*2,
                arrowsize=1+int((Wed[i]/max_weight))*2,
                arrowwidth=1+int((Wed[i]/max_weight))*2,
                opacity=1
            ) for i in range(len(Xed))]
    )

    data=line_traces + [trace4]
    graph=go.Figure(data=data, layout=layout)
    
    return graph

def get_subnet_nodes (subnet_lines) :
    subnet_nodes = []
    for line in subnet_lines :
        for direction in line_stops_dict[line].keys() :
            ld_stops = line_stops_dict[line][direction]
            for stop in ld_stops :
                subnet_nodes.append(int(stop))

    return list(set(subnet_nodes))

def intersect(lst1, lst2): 
    return list(set(lst1) & set(lst2))

def issubset(lst1,lst2):
    lst2 = lst2 + [lst2[0]]
    for i in range(len(lst2)-len(lst1)) :
        if lst1 == lst2[i:i+len(lst1)] :
            return True
    return False

In [137]:
#Line collected dict
with open('../Data/Static/lines_collected_dict.json', 'r') as f:
    lines_collected_dict = json.load(f)

In [138]:
#Line stops dict
with open('../Data/Static/line_stops_dict.json', 'r') as f:
    line_stops_dict = json.load(f)

In [61]:
#Stops
stops = pd.read_csv('../Data/Static/stops.csv')[['id','stop_name','lat','lon']]
stops.head()

Unnamed: 0,id,stop_name,lat,lon
0,1,Avenida Valdemarín - Blanca De Castilla,40.4701,-3.78288
1,2,Avenida Valdemarín - La Salle,40.46862,-3.78596
2,3,Blanca De Castilla - Camino De La Zarzuela,40.46491,-3.78385
3,4,Pléyades - Ana Teresa,40.46139,-3.7841
4,5,Pléyades - Osa Mayor,40.45939,-3.78429


## GRAPH BY NEIGHBOUR STOPS

In [62]:
def build_net_graph(lineIds):
    invalid_lines = []
    for line in lineIds:
        if ['1','2'] != list(line_stops_dict[line].keys()):
            invalid_lines.append(line)

    #Initialize network graph
    G=nx.DiGraph()

    #Build nodes
    ld_stops_dict = {}
    for line in lineIds :
        if line in invalid_lines :
            continue
        ld_stops = line_stops_dict[line]['1'] + line_stops_dict[line]['2']

        #Remove stops that arent in the df
        pop_indexes = []
        for i in range(len(ld_stops)) :
            if len(ld_stops) > 0 :
                if stops[stops.id==int(ld_stops[i])].shape[0] < 1 :
                    pop_indexes.append(i)
        for pop_index in pop_indexes[-1::-1] :
            ld_stops.pop(pop_index)

        #Add to dict
        ld_stops_dict[line] = {}
        ld_stops_dict[line] = ld_stops

        for i in range(len(ld_stops)) :
            stop = int(ld_stops[i])
            stop_data = stops[stops.id==stop]
            #Add node to graph if not in
            try :
                node = G.nodes[stop]
            except :
                G.add_node(stop)

            #Add coordinates of node as position
            if stop_data.shape[0] > 0 : 
                stop_data = stop_data.iloc[0]
                G.nodes[stop]['name'] = stop_data.stop_name
                G.nodes[stop]['coords'] = (stop_data.lon,stop_data.lat)

                if line not in invalid_lines :
                    try :
                        G.nodes[stop]['lines'] += [int(line)]
                        G.nodes[stop]['lines'] = list(set(G.nodes[stop]['lines']))
                    except :
                        G.nodes[stop]['lines'] = [int(line)]

    #Build links
    for line in lineIds :
        if line in invalid_lines :
            continue
        ld_stops = ld_stops_dict[line]

        for i in range(len(ld_stops)) :
            stop = int(ld_stops[i])
            stop_bef = int(ld_stops[i-1])

            #Back link
            if stop_bef != stop :
                link_lines = intersect(G.nodes[stop_bef]['lines'], G.nodes[stop]['lines'])
                
                link_lines_good = []
                for line in link_lines :
                    if issubset([str(stop_bef),str(stop)],ld_stops_dict[str(line)]):
                        link_lines_good.append(line)

                G.add_edge(stop_bef, stop, weight=len(link_lines_good), lines=link_lines_good)

    return G

In [63]:
night_lines = [str(i) for i in range(500,600)]
lineIds_day = []
for line in line_stops_dict.keys():
    if line not in night_lines :
        lineIds_day.append(line)
lineIds_night = []
for line in line_stops_dict.keys():
    if line in night_lines :
        lineIds_night.append(line)

In [64]:
lineIds = line_stops_dict.keys()
G1 = build_net_graph(lineIds)

In [65]:
#nx.write_gpickle(G1, "../Data/Static/StopsNetworks/stops_net_graph")

In [66]:
#G1 = nx.read_gpickle("../Data/Static/StopsNetworks/stops_net_graph")

In [67]:
list(G1.edges(4924))

[(4924, 1307), (4924, 1310)]

In [68]:
G1.nodes[4924]

{'name': 'Avenida De Logroño - Bahía De Cádiz',
 'coords': (-3.59304, 40.46094),
 'lines': [166, 105, 112, 115, 151, 504]}

In [69]:
G1.edges[(4924, 1310)]

{'weight': 4, 'lines': [112, 105, 115, 151]}

In [74]:
#net_graph = gen_graph(G1)
#net_graph.show()
#net_graph.write_html("../Data/Static/StopsNetworks/stops_night_net_graph.html")

In [None]:
#subnet_nodes = get_subnet_nodes(['1','82','44','91','92','99','132','133','502','506'])

#subG1 = G1.subgraph(subnet_nodes)
#subnet_graph = gen_graph(subG1)
#subnet_graph.show()

In [None]:
pagerank = pd.Series(nx.pagerank(G1, alpha=0.9, weight='weight'))
deg_centrality = pd.Series(nx.degree_centrality(G1))
in_centrality = pd.Series(nx.in_degree_centrality(G1))
out_centrality = pd.Series(nx.out_degree_centrality(G1))

stops_pr = stops.set_index('id')[['stop_name']]
stops_pr['pagerank'] = pagerank/pagerank.max()
stops_pr['deg_centrality'] = deg_centrality/deg_centrality.max()
stops_pr['in_centrality'] = in_centrality/in_centrality.max()
stops_pr['out_centrality'] = out_centrality/out_centrality.max()

stops_pr.sort_values('pagerank',ascending=False).head(15)

## Graph with time bt stops

In [None]:
#Load times between stops data
times_bt_stops = pd.read_csv('../Data/Processed/times_bt_stops.csv',
    dtype={
        'line': 'str',
        'direction': 'uint16',
        'st_hour': 'uint16',
        'end_hour': 'uint16',
        'stopA': 'uint16',
        'stopB': 'uint16',
        'bus': 'uint16',
        'trip_time':'float32',
        'api_trip_time':'float32'
    }
)
#Parse the dates
times_bt_stops['date'] = pd.to_datetime(times_bt_stops['date'], format='%Y-%m-%d')

In [None]:
#Day types
day_type_dict = { #0 = Monday, 1 = Tuesday ...
    'LA' : [0,1,2,3,4], #LABORABLES
    'LJ' : [0,1,2,3], #LUNES A JUEVES
    'VV' : [4], #VIERNES
    'SA' : [5], #SABADOS
    'FE' : [6], #DOMIGOS O FESTIVOS
}

In [151]:
def build_net_graph(lineIds):
    invalid_lines = []
    for line in lineIds:
        if ['1','2'] != list(line_stops_dict[line].keys()):
            invalid_lines.append(line)

    #Initialize network graph
    G=nx.DiGraph()

    #Build nodes
    ld_stops_dict = {}
    for line in lineIds :
        if line in invalid_lines :
            continue
        ld_stops = line_stops_dict[line]['1'] + line_stops_dict[line]['2']

        #Remove stops that arent in the df
        pop_indexes = []
        for i in range(len(ld_stops)) :
            if len(ld_stops) > 0 :
                if stops[stops.id==int(ld_stops[i])].shape[0] < 1 :
                    pop_indexes.append(i)
        for pop_index in pop_indexes[-1::-1] :
            ld_stops.pop(pop_index)

        #Add to dict
        ld_stops_dict[line] = {}
        ld_stops_dict[line] = ld_stops

        for i in range(len(ld_stops)) :
            stop = int(ld_stops[i])
            stop_data = stops[stops.id==stop]
            #Add node to graph if not in
            try :
                node = G.nodes[stop]
            except :
                G.add_node(stop)

            #Add coordinates of node as position
            if stop_data.shape[0] > 0 : 
                stop_data = stop_data.iloc[0]
                G.nodes[stop]['name'] = stop_data.stop_name
                G.nodes[stop]['coords'] = (stop_data.lon,stop_data.lat)

                if line not in invalid_lines :
                    try :
                        G.nodes[stop]['lines'] += [int(line)]
                        G.nodes[stop]['lines'] = list(set(G.nodes[stop]['lines']))
                    except :
                        G.nodes[stop]['lines'] = [int(line)]

    #Build links
    now = datetime.datetime.now()
    #Day type
    if (now.weekday() >= 0) and (now.weekday() <= 4) :
        day_type = 'LA'
    elif now.weekday() == 5 :
        day_type = 'SA'
    else :
        day_type = 'FE'
    #Times bt stops in range
    times_bt_range = times_bt_stops[(times_bt_stops.st_hour >= now.hour) & \
                                    (times_bt_stops.st_hour < now.hour + 1)]
    for line in lineIds :
        if line in invalid_lines :
            continue
        ld_stops = ld_stops_dict[line]

        for i in range(len(ld_stops)) :
            stop = int(ld_stops[i])
            stop_bef = int(ld_stops[i-1])
            
            #Process mean time between stops
            time_bt = times_bt_range.loc[
                (times_bt_range.stopA == stop_bef) & (times_bt_range.stopB == stop) & \
                (times_bt_range.date.dt.weekday.isin(day_type_dict[day_type]))]

            #Group and get the mean values
            if time_bt.shape[0] > 0 :
                time_bt = int(time_bt.trip_time.mean())
            else :
                time_bt = 0
    
            #Back link
            if stop_bef != stop :
                link_lines = intersect(G.nodes[stop_bef]['lines'], G.nodes[stop]['lines'])
                
                link_lines_good = []
                for line in link_lines :
                    if issubset([str(stop_bef),str(stop)],ld_stops_dict[str(line)]):
                        link_lines_good.append(line)

                G.add_edge(stop_bef, stop, weight=len(link_lines_good), lines=link_lines_good, time=time_bt)

    return G

In [190]:
lineIds = line_stops_dict.keys()
G1 = build_net_graph(lineIds_night)

In [191]:
net_graph = gen_graph(G1)
net_graph.show()

In [195]:
times_bt_stops.sort_values('trip_time',ascending=False).describe()

Unnamed: 0.1,Unnamed: 0,direction,st_hour,end_hour,stopA,stopB,bus,trip_time,api_trip_time
count,1255639.0,1255639.0,1255639.0,1255639.0,1255639.0,1255639.0,1255639.0,1255639.0,1255639.0
mean,627819.0,1.475635,14.0874,15.0874,2068.164,2073.067,4502.321,87.0604,81.04919
std,362471.9,0.4994062,4.518119,4.518119,1319.58,1321.385,1940.129,125.1348,117.1143
min,0.0,1.0,0.0,1.0,9.0,9.0,102.0,0.0,-2570.0
25%,313909.5,1.0,10.0,11.0,1361.0,1364.0,4694.0,37.547,39.0
50%,627819.0,1.0,14.0,15.0,1685.0,1685.0,4721.0,59.724,58.0
75%,941728.5,2.0,18.0,19.0,3273.0,3273.0,4834.0,94.382,86.0
max,1255638.0,2.0,22.0,23.0,5912.0,5912.0,9256.0,1799.14,2354.0
