# Import libs


In [1]:
import osmnx as ox
import networkx as nx
import folium
from tqdm import tqdm

#From paper
from collections import namedtuple, Counter
import numpy as np

import torch
import torch.nn.functional as F
 

import dgl
from dgl.data import (
    load_data, 
    TUDataset, 
    CoraGraphDataset, 
    CiteseerGraphDataset, 
    PubmedGraphDataset
)
#from ogb.nodeproppred import DglNodePropPredDataset not needed
from dgl.data.ppi import PPIDataset
from dgl.dataloading import GraphDataLoader

from sklearn.preprocessing import StandardScaler
from typing import Tuple, List, Dict

Using backend: pytorch


In [146]:
from networkx.classes.multidigraph import MultiDiGraph
from dgl.heterograph import DGLHeteroGraph
import random

from sklearn.preprocessing import StandardScaler

In [2]:
import copy

# Load data

In [112]:
wroclaw_2020_path = "../osmnx-extractor/graphml_files/europe_poland_Wroclaw_2020_v3.xml"
wroclaw_2020 = ox.io.load_graphml(wroclaw_2020_path)

# Process data


In [113]:
def encode_data(graph_nx: MultiDiGraph, selected_keys: List = [], default_values: Dict = {}, onehot_key : Dict = {}) -> MultiDiGraph:
    graph_nx_copy = graph_nx.copy()
    for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            graph_edge = graph_nx_copy[edge[0]][edge[1]][connection]
            for key in selected_keys:
                #decide if key exists if not create
                if key in graph_edge.keys():
                    # if value of edge key is a list take first element
                    if type(graph_edge[key]) == list:
                        graph_edge[key] = graph_edge[key][0]
                    
                    if key in onehot_key.keys():
                        if graph_edge[key] in onehot_key[key].keys():
                            graph_edge[key] = onehot_key[key][graph_edge[key]]
                        else:
                            if key in default_values.keys():
                                graph_edge[key] = default_values[key]
                            else:
                                graph_edge[key] = 0
                                
                    if type(graph_edge[key]) == str:
                        try:
                            graph_edge[key] = float(graph_edge[key])
                        except ValueError as e:
                            graph_edge[key] = 0.0
                            
                else:
                    #create key with default values or set to 0
                    if key in default_values.keys():
                        graph_edge[key] = default_values[key]
                    else:
                        graph_edge[key] = 0
    return graph_nx_copy

In [114]:
selected_keys = ['oneway', 'lanes', 'highway', 'maxspeed',
                 'length', 'access', 'bridge', 'junction',
                 'width', 'service', 'tunnel'] # not used 'cycleway', 'bycycle']
default_values = {'oneway': False, 'lanes': 2, 'highway': 11, 'maxspeed': 50,
                 'length':0, 'access':6, 'bridge': 0, 'junction': 0,
                 'width':2, 'service':0, 'tunnel':0}
highway_coding = {'highway': {'primary':0, 'unclassified':1, 'tertiary_link':2, 'secondary':3,
                      'residential':4, 'track':5, 'service':6, 'trunk':7, 'tertiary':8,
                      'primary_link':9, 'pedestrian':10, 'path':11, 'living_street':12,
                      'trunk_link':13, 'cycleway':14, 'bridleway':15, 'secondary_link':16},
                  'access':{'customers':0, 'delivery':1, 'designated':2, 'destination':3,
                      'emergency':4, 'military':5, 'no':6, 'permissive':7,'permit':8, 'yes':9},
                  'bridge':{'1':1, 'viaduct':1, 'yes':1},
                  'junction':{ 'yes':1, 'roundabout':2, 'y_junction':3,},
                  'tunnel': {'yes':1, 'building_passage': 2, 'passage':3 },
                  'service': {'alley':1, 'bus':2, 'drive-through':3, 'driveway':4,
                      'emergency_access':5, 'ground':6, 'parking_aisle':7, 'spur':8}}

In [115]:
wroclaw_2020_encoded = encode_data(wroclaw_2020, selected_keys, default_values, highway_coding)

In [116]:
def get_all_key_and_unique_values(graph_nx: MultiDiGraph, selected_keys: Dict = {}) -> Dict:
    seen_values = {}
    if not selected_keys:
        selected_keys = ['oneway', 'lanes', 'highway', 'maxspeed',
                         'length', 'access', 'bridge', 'junction',
                         'width', 'service', 'tunnel', 'cycleway', 'bycycle']

    #get all values by selected key for each edge
    for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            for key, val in graph_nx[edge[0]][edge[1]][connection].items():
                if key in selected_keys:
                    if key not in seen_values:
                        seen_values[key] = [val]
                    else:
                        if type(val) == list:
                            seen_values[key].extend(val)
                        else:
                            seen_values[key].extend([val])


    for key in seen_values.keys():
        seen_values[key] = set(seen_values[key])
    return seen_values

In [117]:
seen_values = get_all_key_and_unique_values(wroclaw_2020_encoded)
seen_values['highway']

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}

In [118]:
def generate_cycle_label(graph_nx: MultiDiGraph, highway_coding: Dict = {}) -> MultiDiGraph:
    graph_nx_copy = graph_nx.copy()
    for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            for key, val in graph_nx[edge[0]][edge[1]][connection].items():
                graph_edge = graph_nx_copy[edge[0]][edge[1]][connection]
                road_type = graph_edge['highway']
                if road_type == 14:
                    graph_edge['label'] = 1
                else:
                    graph_edge['label'] = 0
    return graph_nx_copy


In [119]:
wroclaw_2020_labels = generate_cycle_label(wroclaw_2020_encoded, highway_coding['highway'])

In [120]:
wroclaw_2020_labels[95584835][6152142174]

AtlasView({0: {'osmid': 28458110, 'oneway': True, 'lanes': 2.0, 'ref': '94', 'name': 'Aleja Armii Krajowej', 'highway': 0, 'maxspeed': 50.0, 'length': 12.949, 'access': 6, 'bridge': 0, 'junction': 0, 'width': 2, 'service': 0, 'tunnel': 0, 'label': 0}})

In [128]:
def convert_nx_to_dgl_as_linegraph(graph_nx: MultiDiGraph, selected_keys : List = []) -> DGLHeteroGraph:
    graph_dgl = dgl.from_networkx(graph_nx, edge_attrs = (selected_keys + ['label']))
    graph_dgl_line_graph = dgl.line_graph(graph_dgl)
    #populate linegraph with nodes
    
    features_to_line_graph = [graph_dgl.edata[key] for key in selected_keys]
    
    graph_dgl_line_graph.ndata['feat'] = torch.cat(features_to_line_graph).reshape((-1, len(selected_keys)))
    graph_dgl_line_graph.ndata['label'] = graph_dgl.edata['label']
    return graph_dgl_line_graph
    

In [136]:
wroclaw_2020_dgl = convert_nx_to_dgl_as_linegraph(wroclaw_2020_labels, selected_keys)
wroclaw_2020_dgl

Graph(num_nodes=113802, num_edges=319881,
      ndata_schemes={'feat': Scheme(shape=(13,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={})

# Test on 2014

In [135]:
wroclaw_2014_path = "../osmnx-extractor/graphml_files/europe_poland_Wroclaw_2014_v3.xml"
wroclaw_2014 = ox.io.load_graphml(wroclaw_2014_path)
wroclaw_2014_encoded = encode_data(wroclaw_2014, selected_keys, default_values, highway_coding)
wroclaw_2014_labels = generate_cycle_label(wroclaw_2014_encoded, highway_coding['highway'])
wroclaw_2014_dgl = convert_nx_to_dgl_as_linegraph(wroclaw_2014_labels, selected_keys)
wroclaw_2014_dgl

Graph(num_nodes=42745, num_edges=121144,
      ndata_schemes={'feat': Scheme(shape=(13,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={})

# ADD train, val, test MASK
Mask size coefs taken from dataset Cora

In [139]:
def get_random_split(number_of_nodes, train_size_coef = 0.05, val_size_coef = 0.18, test_size_coef = 0.37):
    split_idx = {}
    train_size = int(number_of_nodes * train_size_coef)
    val_size = int(number_of_nodes * val_size_coef)
    test_size = int(number_of_nodes * test_size_coef)
    split_idx['train'] = random.sample(range(0, number_of_nodes), train_size)
    split_idx['train'].sort()
    split_idx['valid'] = random.sample(range(0, number_of_nodes), val_size)
    split_idx['valid'].sort()
    split_idx['test'] = random.sample(range(0, number_of_nodes), test_size)
    split_idx['test'].sort()
    
    return split_idx

In [140]:
def scale_feats(x):
    scaler = StandardScaler()
    feats = x.numpy()
    scaler.fit(feats)
    feats = torch.from_numpy(scaler.transform(feats)).float()
    return feats, scaler

In [141]:
def preprocess(graph):
    feat = graph.ndata["feat"]
    #graph = dgl.to_bidirected(graph)
    graph.ndata["feat"] = feat

    graph = graph.remove_self_loop().add_self_loop()
    graph.create_formats_()
    return graph

In [148]:
def create_mask(graph: DGLHeteroGraph) -> Tuple[DGLHeteroGraph, list, StandardScaler]: 
    num_nodes = graph.num_nodes()

    split_idx = get_random_split(num_nodes)
    train_idx, val_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
    graph = preprocess(graph)

    if not torch.is_tensor(train_idx):
        train_idx = torch.as_tensor(train_idx)
        val_idx = torch.as_tensor(val_idx)
        test_idx = torch.as_tensor(test_idx)

    feat = graph.ndata["feat"]
    feat, scaler = scale_feats(feat)
    graph.ndata["feat"] = feat

    train_mask = torch.full((num_nodes,), False).index_fill_(0, train_idx, True)
    val_mask = torch.full((num_nodes,), False).index_fill_(0, val_idx, True)
    test_mask = torch.full((num_nodes,), False).index_fill_(0, test_idx, True)
    graph.ndata["train_mask"], graph.ndata["val_mask"], graph.ndata["test_mask"] = train_mask, val_mask, test_mask
    num_features = graph.ndata["feat"].shape[1]
    num_classes = 2
    return graph, (num_features, num_classes), scaler

In [149]:
wro_2020_with_masks, elem_20, scaler_20 = create_mask(wroclaw_2020_dgl)

In [150]:
wro_2014_with_masks, elem_14, scaler_14 = create_mask(wroclaw_2014_dgl)

In [153]:
from dgl.data.utils import save_graphs
save_graphs("./wro_14_20_masks.graph", [wro_2014_with_masks, wro_2020_with_masks])

In [154]:
import pickle

with open('standardScaler_14_20.pkl', 'wb+') as handle:
    pickle.dump([scaler_14, scaler_20], handle, protocol=pickle.HIGHEST_PROTOCOL)

In [151]:
print("Train:", wro_2020_with_masks.ndata["train_mask"].unique(return_counts=True))
print("Val:", wro_2020_with_masks.ndata["val_mask"].unique(return_counts=True))
print("Test:", wro_2020_with_masks.ndata["test_mask"].unique(return_counts=True))

Train: (tensor([False,  True]), tensor([108112,   5690]))
Val: (tensor([False,  True]), tensor([93318, 20484]))
Test: (tensor([False,  True]), tensor([71696, 42106]))


In [152]:
print("Train:", wro_2014_with_masks.ndata["train_mask"].unique(return_counts=True))
print("Val:", wro_2014_with_masks.ndata["val_mask"].unique(return_counts=True))
print("Test:", wro_2014_with_masks.ndata["test_mask"].unique(return_counts=True))

Train: (tensor([False,  True]), tensor([40608,  2137]))
Val: (tensor([False,  True]), tensor([35051,  7694]))
Test: (tensor([False,  True]), tensor([26930, 15815]))
