# Run:
Only notebook to run ? 

In [16]:
%%configure
{"pyFiles": ["/user/gottraux/dijkstra_algorithms.py"],
 "conf": {
    "spark.app.name": "dslab-group_final"
}}

A session has already been started. If you intend to recreate the session with new configurations, please include the -f argument.


### Imports and helper functions:

In [161]:
import pickle
import json
import networkx as nx
import pandas as pd
from pyspark.sql.functions import col

"""
To load (or reload) into hdfs:
hdfs dfs -rm /user/${JUPYTERHUB_USER}/dijkstra_algorithms.py 2>/dev/null
hdfs dfs -copyFromLocal notebooks/dijkstra_algorithms.py /user/${JUPYTERHUB_USER}/
"""
from dijkstra_algorithms import *

MAX_TRIP_DURATION = 2 #duration in hour 

days_dict = {0: 'monday', 1: 'tuesday', 2: 'wednesday', 3: 'thursday', 4: 'friday'}
def day_trips(*day_ids):
    """
    day_trips: gives the trip_ids that operate on certain days
    input: a variable number of day ids
    output:s spark dataframe with trip_ids
    
    """
    days = [days_dict[day_id] for day_id in day_ids]
    where_clause = " and ".join(days)

    day_services = calendar.where(where_clause).select('service_id')
    return day_services.join(trips, on='service_id').select('trip_id')

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [162]:
%%local
import os
import pandas as pd
username = os.environ['JUPYTERHUB_USER']
#username = 'gottraux'

In [163]:
%%send_to_spark -i username -t str -n username

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Successfully passed 'username' as 'username' to Spark kernel

## Load graph data

In [164]:
trips = spark.read.format('orc').load('/data/sbb/timetables/orc/trips/000000_0')
calendar = spark.read.format('orc').load('/data/sbb/timetables/orc/calendar/000000_0')

nodes_df = spark.read.orc("/user/{}/nodes.orc".format(username))
edges_df = spark.read.orc("/user/{}/edges_with_mean_and_std_sec.orc".format(username))

#durations_dicts = json.loads(sc.textFile('/user/{}/durations_for_confidence_.json'.format(username)).collect()[0])

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [165]:
nodes = nodes_df.rdd.map(lambda r: (r[0], {'name': r['stop_name'],
                                              'lat': r['stop_lat'],
                                              'lon': r['stop_lon']})).collect()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [166]:
%%local
walking_times = pd.read_pickle('walking_edges.pickle')

In [167]:
%send_to_spark -i walking_times -t df -m 20000

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Successfully passed 'walking_times' as 'walking_times' to Spark kernel

In [168]:
#reverse edges
#edges_walking = (walking_times.withColumnRenamed('source', 'temp')
#                 .withColumnRenamed('target', 'source')
#                 .withColumnRenamed('temp', 'target').toPandas())
edges_walking = walking_times.toPandas()
edges_walking['attrs'] = edges_walking.apply(lambda x: {'time': -1, 'duration': x['walk_duration']}, axis=1)
edges_walking = list(edges_walking[['source', 'target', 'attrs']].to_numpy())

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Remove dics from dijkstra time for the moment and make it return the mean and std as well because we need it to validate. 

In [260]:
def dijkstra_with_time(G, first_source, arrival_time, last_target, confidence=None, 
                       confidence_step=0.01, durations_dicts=None, paths=None):
    G = G.copy()
    departure_time = arrival_time - MAX_TRIP_DURATION*60
    
    while True:
        """
        # Update durations according to confidence
        if confidence != None:
            if durations_dicts == None:
                raise ValueError('You must pass durations_dicts for the confidence.')
            # Load dict with modifications
            if confidence not in durations_dicts:
                edge_and_data_tuple = zip(G.edges(keys=True), 
                              map(lambda x: x[2], G.edges(data=True)))
                edge_and_data_tuple = filter(lambda x: 'mean' in x[1] and 'std' in x[1], edge_and_data_tuple)
                durations_dicts[confidence] = {e: {'duration': data['mean'] + compute_delay_uncertainty(data['mean'], 
                                                                                                        data['std'], 
                                                                                                        confidence)
                                                   if data['mean'] != None and data['std'] != None
                                                   else data['duration']
                                                  } for e, data in edge_and_data_tuple}
            
            # Update graph
            nx.set_edge_attributes(G, durations_dicts[confidence])
        """
        
        if not G.is_directed():
            raise ValueError('Input graph is not directed while it should be.')

        G_succ = G.succ 
        
        # paths stores the nodes in dijkstra's shortest path
        paths = {first_source: [first_source]}
        
        # stores the edges in dijkstra's shortest path
        e_paths = {first_source: []}
        
        # dictionary of final distances to nodes
        dist = {}  
        
        # dictionnary of whether it's the first time a node is visited
        seen = {first_source: departure_time}

        # use heapq with (distance,label) tuples
        push = heappush
        pop = heappop
        c = count()
        fringe = []  
        
        # push the source as the first node on the heap
        push(fringe, (departure_time, next(c), first_source))

        # while heap not empty
        while fringe:
            
            # take the node to look at: 
            (d, _, source) = pop(fringe)

            # check if node has already been looked at and has a final shortest distance: 
            if source in dist:
                continue  # already searched this node so go to another

            # take the distance to the node from the heap 
            # source starts with distance = departure_time
            dist[source] = d

            #stop if the source is the last_target. 
            if source == last_target:
                break

            # Look at all direct descendents from the source node: 
            for target, edges in G_succ[source].items():
                # Because it's a multigraph, need to look at all edges between two nodes:
                for edge_id in edges:
                    
                    # Check if walking edge: 
                    # walking edges have a departure time of -1
                    dep_time_edge = G.get_edge_data(source, target, edge_id)['time']
                    
                    if dep_time_edge == -1:
                        walking_edge = True
                        current_trip_id = None
                        # set the departure time to the distance to that node as we can leave immediatly
                        dep_time_edge = d
                    else:
                        walking_edge = False
                        current_trip_id = G.get_edge_data(source, target, edge_id)['trip_id']
                        
                    # take only edges that have a departure time bigger 
                    # than the time it takes to get to the node
                    if dep_time_edge < dist[source]:
                       # move on to next edge if it's earlier 
                        continue
                        
                    # Check if edge is feasible (also accoring to confidence)
                    # Check if last edge taken was not a walking edge
                    # Check if there is at least a path of length 1 to the source node 
                    # (e.g. that this node is not the original source)
                    if len(e_paths[source]) >= 1 and not e_paths[source][-1][2]['walk']:
                        last_edge_source, last_edge_target, last_edge_info = e_paths[source][-1]
                        last_delay = compute_delay_uncertainty(last_edge_info['mean'], 
                                                                   last_edge_info['std'], 
                                                                   confidence)
                        # If we make a transport-> walk change
                        if walking_edge:
                            # add delay to departure time of walk as we will leave later
                            dep_time_edge += last_delay
                        else:
                            # If we make a transport->transport change, check if we have time to change
                            # To change we need that the next connection leaves >= 2 min + delay of transport
                            # If not we cannot take that edge
                            if current_trip_id != last_edge_info['trip_id']\
                            and dep_time_edge < dist[source] + 2 + last_delay:
                                continue

                    # Get the duration between two nodes:
                    duration_cost = G.get_edge_data(source, target, edge_id)['duration']
                    
                    if duration_cost is None:
                            raise ValueError('Edge without a duration.')

                    # Add the weight to the current distance to a node
                    current_dist = dep_time_edge + duration_cost

                    # if target has already been visited once and has a final distance:
                    if target in dist:
                            # if we find a distance smaller than the actual distance in dic
                            # raise error because dic distances contains only final distances
                            if current_dist < dist[target]:
                                raise ValueError('Contradictory paths found:',
                                                     'negative weights?')

                    # either node has been seen before or the current distance is smaller than the 
                    # proposed distance in seen[target]:
                    elif target not in seen or current_dist < seen[target]:
                        # update the seen distance
                        seen[target] = current_dist
                        # push it onto the heap so that we will look at its descendants later
                        push(fringe, (current_dist, next(c), target))

                        # update the paths till target:
                        if paths is not None:
                            edge_dict = G.get_edge_data(source, target, edge_id)
                            
                            edge_dict['walk'] = walking_edge
                            edge_dict['departure_time'] = dep_time_edge
                            
                            e_paths[target] = e_paths[source] + [(source, target, edge_dict)]


        # If there is no path to the last_target:
        if  last_target not in e_paths:
            print('Error: No paths to the source')
            return pd.DataFrame(columns=['from', 'from_id', 'to', 'to_id', 'duration', 'total_duration',
                                         'departure_time', 'walk', 'no_change', 'mean_std_null','mean','std'])

        
        # Validation: 
        if confidence == None or validate_path(e_paths[last_target], confidence, G):
            break
        else:
            # else increase confidence by a confidence step and start again: 
            confidence += confidence_step
            
    # Path validated
    if paths is not None:
        nodes_data = G.nodes(data=True)
        arrival_string = minute_to_string(dist[last_target])
        best_path = e_paths[last_target]
        departure_string = minute_to_string(best_path[0][2]['departure_time'])
        print('Going from {} ({}) to {} ({}) in {:.2f} minutes, departure at {}'.format(nodes_data[first_source]['name'],
                                                                                      first_source,
                                                                                      nodes_data[last_target]['name'],
                                                                                      last_target, 
                                                                                      dist[last_target] - departure_time,
                                                                                      minute_to_string(departure_time)))
        
        # Construct best path's data structure
        best_path_df = pd.DataFrame(columns=['from', 'from_id', 'to', 'to_id', 'duration', 'total_duration',
                                          'departure_time', 'walk', 'no_change', 'mean_std_null', 'mean','std'])
        last_edge_info = False
        for source, target, edge_info in best_path:
            no_change = ('trip_id' in edge_info                                   # We're in a transport
                         and last_edge_info and 'trip_id' in last_edge_info       # and last edge also
                         and last_edge_info['trip_id'] == edge_info['trip_id'])   # and same trip_id
            mean_std_null = 'trip_id' in edge_info and 'mean' not in edge_info or 'std' not in edge_info
            
            if not mean_std_null:
                mean = edge_info['mean']
                std = edge_info['std']
                if  edge_info['mean'] == None or  edge_info['std'] == None: 
                    mean = edge_info['duration']
                    std = 0
            if 'mean' not in edge_info or 'std' not in edge_info:
                mean = edge_info['duration']
                std = 0
                
            
            current_path_dict = {'from': nodes_data[source]['name'],
                                 'from_id': source, 
                                 'to': nodes_data[target]['name'], 
                                 'to_id': target, 
                                 'duration': edge_info['duration'], 
                                 'total_duration': dist[target] - departure_time,
                                 'departure_time': minute_to_string(edge_info['departure_time']), 
                                 'walk':edge_info['walk'], 
                                 'no_change': no_change, 
                                 'mean_std_null': mean_std_null,
                                'mean':mean,
                                'std':std}
            best_path_df = best_path_df.append(current_path_dict, ignore_index=True)
            last_edge_info = edge_info
        
        with pd.option_context('display.max_rows', None, 
                               'display.max_columns', None, 
                               'display.max_colwidth', 15,
                               'display.expand_frame_repr', False):
            print(best_path_df)
        return best_path_df
    raise ValueError('Should not be here')
    return dist

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

#### Validation:

##### Feasible paths:
Create a function that looks through a path to see if it is valid. 
So it looks for:
- missed connections
- transfer time of less than 2 minutes between two transports

In [357]:
"""Returns true if there is time to take all edges, and if 
when chaning from a connection to another you have at least 2 minutes. """

def is_path_valid(path):
    last_target = path['from_id'][len(path['from_id'])-1]
    time = convertToMinute(path['departure_time'][0]) + path['duration'][0]
    
    for i in range(1, len(path['from_id'])):
        #in case an edge taken actually left before we got there (only for transport edges, not for walks)
        if not path['walk'][i] and convertToMinute(path['departure_time'][i]) < time:
            print('You miss this connection. Time is {} while this edge leaves at {} from {} to {}'\
                  .format(minute_to_string(time), path['departure_time'][i], path['from'][i], path['to'][i]))
            return False
        
        #in case of change type transport -> trasnport need 2 minutes transfer:
        if not path['no_change'][i] and not path['walk'][i]:
            if not path['walk'][i-1]:
                if convertToMinute(path['departure_time'][i]) < time + 2:
                    print('You do not have time to change to this connection between {} to {} leaving at {}. You arrive at {} and need at least 2 min transfer'\
                          .format(path['from'][i],path['to'][i], path['departure_time'][i], minute_to_string(time)))
                    return False
        
        else: 
            time = convertToMinute(path['departure_time'][i]) + path['duration'][i]
    return True

# test of is path valid:
#assert(is_path_valid(best_path1))
#assert(is_path_valid(best_path2))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

##### Validate a path:
Then for a given path, we sample felays for transfers where we go from a transport -> walk or transport -> transport. 

For transport 1 -> transport 2: the delay of transport 1 will be added to its trip duration
For transport -> walk: the delay of transport will be added to the departure time of walk 

After modifying these values, we check whether the path is still feasible. We repeat this operation a ceertain number of times and report the percentage of feasible paths. 

For the moment, delays are sampled from an absolute normal distribution (**?good?**). 

In [360]:
def validate_path_(path, confidence):
    num_tries = 10
    num_valids = 0
    
    for i in range(num_tries):
        path_copy = path.copy()
        for i in range(len(path['from_id'])):
            #only for transfers etiher to other trains or to walking: 
            if i > 1 and not path['no_change'][i]:
                mean = path['mean'][i-1]
                std = path['std'][i-1]
                #sample a delay:
                #delay = compute_delay_uncertainty(mean, std, confidence)
                
                # calcluate delay for connection of before:
                if std != 0:
                    
                    #delay = np.absolute(np.random.normal(mean, std))
                    delay = np.random.normal(mean, std)
                else: delay = 0
                
                # if its between two transports we just add it to trip duration:
                if not path['walk'][i] and not path['walk'][i-1]:
                    print('Delay of {} for {} to {}'.format(delay, path_copy['from'][i-1], path_copy['to'][i-1]))
                    path_copy['duration'][i-1] += delay
                
                # transfer from trans to walk:
                if not path['walk'][i-1] and path['walk'][i]:
                    # if a train to a walk is delayed, the walk needs to leave later:
                    #need to leave at the time it takes for the delayed connection to arrive, 
                    # so if delayed need to start walking later: 
                    
                    if delay !=0:
                        print('Delay of {} for {} to {}, need to start walking later from {}'\
                                  .format(delay, path_copy['from'][i-1], path_copy['to'][i-1], path_copy['to'][i-1]))
                    
                    #add duration to transp: 
                    path_copy['duration'][i-1] += delay
                    
                    #delay the start of walk:
                    arrival_of_edge_before = path_copy['duration'][i-1]+convertToMinute(path_copy['departure_time'][i-1])
                    
                    # need to start later:
                    new_dep_time = minute_to_string(arrival_of_edge_before)
                    path_copy['departure_time'][i] = new_dep_time
        
        if is_path_valid(path_copy):
            num_valids += 1
    return num_valids/float(num_tries)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [361]:
# Test pour voir si on peut rater une connection: 
test = dijkstra_with_time(graph, '8503000', arrival_hour*60+arrival_minute, last_target='8591122', confidence=0.98, durations_dicts=durations_dicts)
test['mean'][10] = 6
test['std'][10] = 4
validate_path_(test, 0.95)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Going from Zurich HB (8503000) to Zurich, ETH Honggerberg (8591122) in 32.00 minutes, departure at 10:30
              from  from_id              to    to_id  duration  total_duration departure_time   walk no_change mean_std_null      mean       std
0        Zurich HB  8503000  Zurich, Bah...  8587349  4.704125        4.704125          10:30   True     False          True  4.704125         0
1   Zurich, Bah...  8587349  Zurich, Sta...  8591379  1.000000        7.000000          10:36  False     False         False  1.918103    1.3509
2   Zurich, Sta...  8591379  Zurich, Nor...  8591291  3.000000       10.000000          10:37  False      True         False  1.183184   1.15276
3   Zurich, Nor...  8591291  Zurich, Let...  8591251  1.000000       11.000000          10:40  False      True         False  1.297521   1.28782
4   Zurich, Let...  8591251  Zurich Wipk...  8591066  1.000000       12.000000          10:41  False      True         False  1.255934   1.03589
5   Zurich Wipk...  85910

## Create duration dictionnaries if needed

Code commented, don't have the permission to **change** a file, can write if put another path

durations_dicts = {}
edge_and_data_tuple = zip(graph.edges(keys=True),
              map(lambda x: x[2], graph.edges(data=True)))
edge_and_data_tuple = filter(lambda x: 'mean' in x[1] and 'std' in x[1], edge_and_data_tuple)
for c in [0.25, 0.5, 0.75, 0.9, 0.95, 0.98, 0.99]:
    durations_dicts[c] = {e: {'duration': data['mean'] + compute_delay_uncertainty(data['mean'], 
                                                                                            data['std'], 
                                                                                            c)
                                       if data['mean'] != None and data['std'] != None
                                       else data['duration']
                                      } for e, data in edge_and_data_tuple}
    
""" Conversion to json """
durations_dicts_for_json = {}
for c in durations_dicts.keys():
    durations_dicts_for_json[c] = {str(k): v for k, v in durations_dicts[c].items()}

print('Length of json:', len(json.dumps(durations_dicts_for_json))) -> 30106955

""" Save to hdfs """
sc.parallelize([json.dumps(durations_dicts_for_json)]).coalesce(1).saveAsTextFile('/user/{}/durations_for_confidence_.json'.format(username))

## Choose time of arrival

In [26]:
day_id, arrival_hour, arrival_minute = 4, 12, 30

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

## Create graph

In [27]:
def create_edges_for_trip(edges_df, day_id, arrival_time):
    """
    create_edges_for_trip: constructs edges (and thus trips) that exist in a window of two hours before a given input time
    @input:
    - edges_df: df from which we construct the edges
    - day_id: id of week-day (e.g. wednesday is day id 2, see dictionnary above)
    - hour, minute: time at which we want to arrive somewhere (e.g. 11:30)
    @output: data frame of selected edges
    """
    #select only the trips that occur on that day:
    edges_df= edges_df.join(day_trips(day_id), on='trip_id')
    
    min_dep_time = arrival_time - 60*MAX_TRIP_DURATION
    
    #keep only those in a window of two hours:
    edges_df = edges_df.filter((col('departure_time') > min_dep_time) & 
                                            (col('arrival_time') <= arrival_time))
    
    edges = edges_df.rdd.map(lambda r: (r['stop_id'], r['next_stop'], {'duration': r['trip_duration'],
                                                                       'time': float(r['departure_time']),
                                                                       'trip_id': r['trip_id'],
                                                                       'mean': r['mean'],
                                                                       'std': r['std']})).collect()
    
    return edges + edges_walking

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [28]:
edges = create_edges_for_trip(edges_df, day_id, arrival_hour*60+arrival_minute)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [29]:
graph = nx.MultiDiGraph()
graph.add_nodes_from(nodes)
graph.add_edges_from(edges)

old_number_of_nodes = graph.number_of_nodes()
# Remove unreachable nodes
dists, paths = normal_dijkstra(graph, '8503000')
not_reachable = set(graph.nodes) - set(dists.keys())
_ = graph.remove_nodes_from(list(not_reachable))
print('{} nodes removed'.format(old_number_of_nodes - graph.number_of_nodes()))

# Temp for problem of name's encoding
import unicodedata
nodes_data = graph.nodes(data=True)
for n in graph.nodes:
    nodes_data[n]['name'] = unicodedata.normalize('NFKD', nodes_data[n]['name']).encode('ascii','ignore')

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

18 nodes removed

## Run algorithm

In [46]:
# Tao's example (except for the departure time)
print('Without minimum confidence ->')
best_path1 = dijkstra_with_time(graph, '8503000', arrival_hour*60+arrival_minute, last_target='8591049',confidence = 0.98, durations_dicts=durations_dicts)
print('\nWith minimum confidence ->')
best_path2 = dijkstra_with_time(graph, '8503000', arrival_hour*60+arrival_minute, last_target='8591049', confidence=0.98, durations_dicts=durations_dicts)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Without minimum confidence ->
Going from Zurich HB (8503000) to Zurich, Auzelg (8591049) in 29.00 minutes, departure at 10:30
             from         from_id              to           to_id  duration  total_duration departure_time   walk no_change mean_std_null
0       Zurich HB         8503000       Zurich HB  8503000:0:4...  2.135259        2.135259          10:30   True     False          True
1       Zurich HB  8503000:0:4...  Zurich Hard...     8503020:0:3  2.000000        9.000000          10:37  False     False         False
2  Zurich Hard...     8503020:0:3  Zurich Oerl...     8503006:0:8  5.000000       14.000000          10:39  False      True         False
3  Zurich Oerl...     8503006:0:8      Glattbrugg     8503310:0:3  2.000000       17.000000          10:45  False      True         False
4      Glattbrugg     8503310:0:3  Glattbrugg,...         8590620  3.063448       20.063448          10:47   True     False          True
5  Glattbrugg,...         8590620  Glattbrugg,

In [38]:
# From Triemli to Altstetten
print('Without minimum confidence ->')
best_path1 = dijkstra_with_time(graph, '8503610', arrival_hour*60+arrival_minute, last_target='8591057')
print('\nWith minimum confidence ->')
best_path2 = dijkstra_with_time(graph, '8503610', arrival_hour*60+arrival_minute, last_target='8591057', confidence=0.95, durations_dicts=durations_dicts)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Without minimum confidence ->
Going from Zurich, Triemli (8503610) to Zurich Altstetten, Bahnhof N (8591057) in 17.14 minutes, departure at 10:30
             from  from_id              to    to_id  duration  total_duration departure_time   walk no_change mean_std_null
0  Zurich, Tri...  8503610  Zurich, In ...  8591214  1.000000        2.000000          10:31  False     False         False
1  Zurich, In ...  8591214  Zurich, Gol...  8591163  1.000000        3.000000          10:32  False      True         False
2  Zurich, Gol...  8591163  Zurich, Alb...  8591036  2.000000        5.000000          10:33  False      True         False
3  Zurich, Alb...  8591036  Zurich, Alb...  8591037  0.000000        5.000000          10:35  False      True         False
4  Zurich, Alb...  8591037  Zurich, Unt...  8591408  2.000000        7.000000          10:35  False      True         False
5  Zurich, Unt...  8591408  Zurich, Rau...  8591311  1.000000        8.000000          10:37  False      True 

## Cells to keep

In [22]:
# Weird attributes?
print(graph.get_edge_data('8503000:0:41/42', '8503020:0:3', 0))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

{'duration': 2.0, 'std': 1.05697167217, 'time': 682.0, 'trip_id': u'234.TA.26-15-j19-1.41.H', 'mean': 0.27319172912666667}

In [23]:
# Proportion of null mean or std in non-walking edges
(len(filter(lambda x: x[2]['mean'] == None or x[2]['std'] == None, filter(lambda x: 'mean' in x[2] and 'std' in x[2], graph.edges(data=True))))
 / float(len(filter(lambda x: 'mean' in x[2] and 'std' in x[2], graph.edges(data=True)))))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

0.046632828786368166