# Import libs


In [1]:
import osmnx as ox
import networkx as nx
import folium
from tqdm import tqdm

#From paper
from collections import namedtuple, Counter
import numpy as np

import torch
import torch.nn.functional as F
 

import dgl
from dgl.data import (
    load_data, 
    TUDataset, 
    CoraGraphDataset, 
    CiteseerGraphDataset, 
    PubmedGraphDataset
)
#from ogb.nodeproppred import DglNodePropPredDataset not needed
from dgl.data.ppi import PPIDataset
from dgl.dataloading import GraphDataLoader

from sklearn.preprocessing import StandardScaler
from typing import Tuple, List, Dict

Using backend: pytorch


In [2]:
from networkx.classes.multidigraph import MultiDiGraph
from dgl.heterograph import DGLHeteroGraph
import random

from sklearn.preprocessing import StandardScaler

In [3]:
import copy

# Load data

In [4]:
wroclaw_2020_path = "./data/final_results/wroclaw.xml"
wroclaw_2020 = ox.io.load_graphml(wroclaw_2020_path)

# Process data


In [None]:
def encode_data(graph_nx: MultiDiGraph, selected_keys: List = [], default_values: Dict = {}, onehot_key : Dict = {}) -> MultiDiGraph:
    graph_nx_copy = graph_nx.copy()
    for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            graph_edge = graph_nx_copy[edge[0]][edge[1]][connection]
            for key in selected_keys:
                #decide if key exists if not create
                if key in graph_edge.keys():
                    # if value of edge key is a list take first element
                    if type(graph_edge[key]) == list:
                        graph_edge[key] = graph_edge[key][0]
                    
                    if key in onehot_key.keys():
                        if graph_edge[key] in onehot_key[key].keys():
                            graph_edge[key] = onehot_key[key][graph_edge[key]]
                        else:
                            if key in default_values.keys():
                                graph_edge[key] = default_values[key]
                            else:
                                graph_edge[key] = 0
                                
                    if type(graph_edge[key]) == str:
                        try:
                            graph_edge[key] = float(graph_edge[key])
                        except ValueError as e:
                            graph_edge[key] = 0.0
                            
                else:
                    #create key with default values or set to 0
                    if key in default_values.keys():
                        graph_edge[key] = default_values[key]
                    else:
                        graph_edge[key] = 0
    return graph_nx_copy

In [None]:
selected_keys = ['oneway', 'lanes', 'highway', 'maxspeed',
                 'length', 'access', 'bridge', 'junction',
                 'width', 'service', 'tunnel'] # not used 'cycleway', 'bycycle']
default_values = {'oneway': False, 'lanes': 2, 'highway': 11, 'maxspeed': 50,
                 'length':0, 'access':6, 'bridge': 0, 'junction': 0,
                 'width':2, 'service':0, 'tunnel':0}
highway_coding = {'highway': {'primary':0, 'unclassified':1, 'tertiary_link':2, 'secondary':3,
                      'residential':4, 'track':5, 'service':6, 'trunk':7, 'tertiary':8,
                      'primary_link':9, 'pedestrian':10, 'path':11, 'living_street':12,
                      'trunk_link':13, 'cycleway':14, 'bridleway':15, 'secondary_link':16},
                  'access':{'customers':0, 'delivery':1, 'designated':2, 'destination':3,
                      'emergency':4, 'military':5, 'no':6, 'permissive':7,'permit':8, 'yes':9},
                  'bridge':{'1':1, 'viaduct':1, 'yes':1},
                  'junction':{ 'yes':1, 'roundabout':2, 'y_junction':3,},
                  'tunnel': {'yes':1, 'building_passage': 2, 'passage':3 },
                  'service': {'alley':1, 'bus':2, 'drive-through':3, 'driveway':4,
                      'emergency_access':5, 'ground':6, 'parking_aisle':7, 'spur':8}}

In [None]:
wroclaw_2020_encoded = encode_data(wroclaw_2020, selected_keys, default_values, highway_coding)

In [74]:
def get_all_key_and_unique_values(graph_nx: MultiDiGraph, selected_keys: Dict = {}) -> Dict:
    seen_values = {}
    if not selected_keys:
        selected_keys = ['oneway', 'lanes', 'highway', 'maxspeed',
                         'length', 'access', 'bridge', 'junction',
                         'width', 'service', 'tunnel', 'surface']

    #get all values by selected key for each edge
    for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            for key, val in graph_nx[edge[0]][edge[1]][connection].items():
                if key in selected_keys:
                    if key not in seen_values:
                        seen_values[key] = [val]
                    else:
                        if type(val) == list:
                            seen_values[key].extend(val)
                        else:
                            seen_values[key].extend([val])


    for key in seen_values.keys():
        seen_values[key] = set(seen_values[key])
    return seen_values

In [11]:
seen_values = get_all_key_and_unique_values(wroclaw_2020)

unique_vals = 0
for key in seen_values.keys():
    if key != 'length' and key != 'width':
        print(key, seen_values[key])
        unique_vals += len(seen_values[key])

oneway {False, True}
lanes {'7', '4', '6', '2', '1', '3', '5'}
highway {'tertiary_link', 'service', 'tertiary', 'track', 'secondary', 'residential', 'motorway', 'primary', 'secondary_link', 'motorway_link', 'living_street', 'path', 'trunk_link', 'footway', 'unclassified', 'road', 'cycleway', 'trunk', 'primary_link'}
maxspeed {'110', '70', '80', '100', '60', '50', '40', '90', '30', '5', '20', '120', '10', '15'}
access {'no', 'permissive', 'destination', 'private', 'yes'}
bridge {'covered', 'viaduct', 'yes'}
junction {'roundabout'}
tunnel {'building_passage', 'yes'}
service {'parking_aisle', 'emergency_access', 'driveway'}


In [12]:
unique_vals

56

In [None]:
def generate_cycle_label(graph_nx: MultiDiGraph, highway_coding: Dict = {}) -> MultiDiGraph:
    graph_nx_copy = graph_nx.copy()
    edge_id = 0
    for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            for key, val in graph_nx[edge[0]][edge[1]][connection].items():
                graph_edge = graph_nx_copy[edge[0]][edge[1]][connection]
                road_type = graph_edge['highway']
                if road_type == 14:
                    graph_edge['label'] = 1
                else:
                    graph_edge['label'] = 0
                graph_edge['id'] = edge_id
        edge_id += 1
                
    return graph_nx_copy


In [None]:
wroclaw_2020_labels = generate_cycle_label(wroclaw_2020_encoded, highway_coding['highway'])

In [None]:
wroclaw_2020_labels[95584835][6152142174]

In [None]:
def convert_nx_to_dgl_as_linegraph(graph_nx: MultiDiGraph, selected_keys :[]) -> DGLHeteroGraph:
    graph_dgl = dgl.from_networkx(graph_nx, edge_attrs = (selected_keys + ['label']+['id']))
    graph_dgl_line_graph = dgl.line_graph(graph_dgl)
    #populate linegraph with nodes
    
    features_to_line_graph = [graph_dgl.edata[key] for key in selected_keys]
    
    graph_dgl_line_graph.ndata['feat'] = torch.cat(features_to_line_graph).reshape((-1, len(selected_keys)))
    graph_dgl_line_graph.ndata['label'] = graph_dgl.edata['label']
    graph_dgl_line_graph.ndata['id'] = graph_dgl.edata['id']


    return graph_dgl_line_graph
    

In [None]:
wroclaw_2020_dgl = convert_nx_to_dgl_as_linegraph(wroclaw_2020_labels, selected_keys)
wroclaw_2020_dgl

# Test on 2014

In [None]:
wroclaw_2014_path = "../osmnx-extractor/graphml_files/europe_poland_Wroclaw_2014_v3.xml"
wroclaw_2014 = ox.io.load_graphml(wroclaw_2014_path)
wroclaw_2014_encoded = encode_data(wroclaw_2014, selected_keys, default_values, highway_coding)
wroclaw_2014_labels = generate_cycle_label(wroclaw_2014_encoded, highway_coding['highway'])
wroclaw_2014_dgl = convert_nx_to_dgl_as_linegraph(wroclaw_2014_labels, selected_keys)
wroclaw_2014_dgl

# ADD train, val, test MASK
Mask size coefs taken from dataset Cora

In [129]:
def get_random_split(number_of_nodes, train_size_coef = 0.05, val_size_coef = 0.18, test_size_coef = 0.37):
    split_idx = {}
    train_size = int(number_of_nodes * train_size_coef)
    val_size = int(number_of_nodes * val_size_coef)
    test_size = int(number_of_nodes * test_size_coef)
    split_idx['train'] = random.sample(range(0, number_of_nodes), train_size)
    split_idx['train'].sort()
    split_idx['valid'] = random.sample(range(0, number_of_nodes), val_size)
    split_idx['valid'].sort()
    split_idx['test'] = random.sample(range(0, number_of_nodes), test_size)
    split_idx['test'].sort()
    
    return split_idx

In [130]:
def randome_sample_with_exceptions(max_range, size, exceptions):
    not_cycle = list(range(0, max_range))
    for elem in exceptions:
        not_cycle.remove(elem)
    return random.sample(not_cycle, size)
        
    

In [131]:
def get_stratified_split(labels, train_bicycle_coef = 0.3, val_bicycle_coef = 0.4, test_bicycle_coef = 0.5):
    number_of_nodes = labels.shape[0]
    cycle_ids = ((labels == True).nonzero(as_tuple=True)[0]).tolist()
    number_of_cycle = len(cycle_ids)
    train_size = int(number_of_cycle * train_bicycle_coef)
    val_size = int(number_of_cycle * val_bicycle_coef)
    test_size = int(number_of_cycle * test_bicycle_coef)

    assert number_of_cycle > train_size
    assert number_of_cycle > val_size
    assert number_of_cycle > test_size

    split_idx = {}
    train_cycle_idx = random.sample(cycle_ids, train_size)
    train_noncycle_idx = randome_sample_with_exceptions(number_of_nodes, train_size, cycle_ids)
    split_idx['train'] = train_cycle_idx + train_noncycle_idx
    split_idx['train'].sort()
    
    val_cycle_idx = random.sample(cycle_ids, val_size)
    val_noncycle_idx = randome_sample_with_exceptions(number_of_nodes, val_size, cycle_ids)
    split_idx['valid'] = val_cycle_idx + val_noncycle_idx
    split_idx['valid'].sort()

    test_cycle_idx = random.sample(cycle_ids, test_size)
    test_noncycle_idx = randome_sample_with_exceptions(number_of_nodes, test_size, cycle_ids)
    split_idx['test'] = test_cycle_idx + test_noncycle_idx
    split_idx['test'].sort()
    
    return split_idx

In [132]:
def scale_feats(x):
    scaler = StandardScaler()
    feats = x.numpy()
    scaler.fit(feats)
    feats = torch.from_numpy(scaler.transform(feats)).float()
    return feats, scaler

In [133]:
def preprocess(graph):
    feat = graph.ndata["feat"]
    #graph = dgl.to_bidirected(graph)
    graph.ndata["feat"] = feat

    graph = graph.remove_self_loop().add_self_loop()
    graph.create_formats_()
    return graph

In [134]:
def create_mask(graph: DGLHeteroGraph) -> Tuple[DGLHeteroGraph, list, StandardScaler]: 
    num_nodes = graph.num_nodes()

    #split_idx = get_random_split(num_nodes)
    split_idx = get_stratified_split(graph.ndata['label'])
    
    train_idx, val_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
    graph = preprocess(graph)

    if not torch.is_tensor(train_idx):
        train_idx = torch.as_tensor(train_idx)
        val_idx = torch.as_tensor(val_idx)
        test_idx = torch.as_tensor(test_idx)

    feat = graph.ndata["feat"]
    feat, scaler = scale_feats(feat)
    graph.ndata["feat"] = feat

    train_mask = torch.full((num_nodes,), False).index_fill_(0, train_idx, True)
    val_mask = torch.full((num_nodes,), False).index_fill_(0, val_idx, True)
    test_mask = torch.full((num_nodes,), False).index_fill_(0, test_idx, True)
    graph.ndata["train_mask"], graph.ndata["val_mask"], graph.ndata["test_mask"] = train_mask, val_mask, test_mask
    num_features = graph.ndata["feat"].shape[1]
    num_classes = 2
    return graph, (num_features, num_classes), scaler

In [None]:
wro_2020_with_masks, elem_20, scaler_20 = create_mask(wroclaw_2020_dgl)

In [None]:
wro_2014_with_masks, elem_14, scaler_14 = create_mask(wroclaw_2014_dgl)

In [None]:
from dgl.data.utils import save_graphs
save_graphs("./wro_14_stratify.graph", [wro_2014_with_masks])

In [None]:
import pickle

with open('standardScaler_14_20.pkl', 'wb+') as handle:
    pickle.dump([scaler_14, scaler_20], handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
print("Train:", wro_2020_with_masks.ndata["train_mask"].unique(return_counts=True))
print("Val:", wro_2020_with_masks.ndata["val_mask"].unique(return_counts=True))
print("Test:", wro_2020_with_masks.ndata["test_mask"].unique(return_counts=True))

In [None]:
print("Train:", wro_2014_with_masks.ndata["train_mask"].unique(return_counts=True))
print("Val:", wro_2014_with_masks.ndata["val_mask"].unique(return_counts=True))
print("Test:", wro_2014_with_masks.ndata["test_mask"].unique(return_counts=True))


# Wizualize


In [None]:
from dgl.data.utils import load_graphs

glist, label_dict = load_graphs("./data/wro_2020_with_masks.graph")

In [None]:
wro_2020_masks = glist[0]

In [None]:
first_graph = wro_2020_masks
wro_2020_nx_g = dgl.to_networkx(wro_2020_masks, node_attrs=['node_labels'])


In [None]:
wro_2020_nx_g.graph = {'created_date': '2022-06-15 04:16:09',
 'created_with': 'OSMnx 1.1.2',
 'crs': 'epsg:4326',
 'name': 'europe_poland_Wroclaw_2020'}

In [None]:
mask_temp = wro_2014_with_masks.ndata['train_mask'] 

In [None]:
def visualise_masked_roads(grapf_networkx, mask):
    assert grapf_networkx.number_of_edges() == mask.shape[0]
    
    mask_ids = ((mask == True).nonzero(as_tuple=True)[0]).tolist()
    
    year = str(grapf_networkx.name.split("_")[3])
    dif_masked_cycle = nx.create_empty_copy(grapf_networkx)
    dif_masked_road = dif_masked_cycle.copy()

    diff_unmasked = dif_masked_cycle.copy()
    
    
    for x in tqdm(set(grapf_networkx.edges()), total = len(set(grapf_networkx.edges()))):
        edge = grapf_networkx[x[0]][x[1]][0]
        if edge['id'] in mask_ids:
            dif_attributes = edge.copy()

            if dif_attributes['label'] == 1: #if cycle
                vis_data = dict(
                href=f"https://www.openstreetmap.org/way/{edge['osmid']}", 
                years=['cycle', 'masked'], 
                data=dict()
                )
                vis_data['data'] = {year:[dif_attributes['label'],True]}
                dif_attributes['vis_data'] = vis_data
                dif_masked_cycle.add_edges_from([(x[0], x[1], dif_attributes)])
            else:
                vis_data = dict(
                href=f"https://www.openstreetmap.org/way/{edge['osmid']}", 
                years=['cycle', 'masked'], 
                data=dict()
                )
                vis_data['data'] = {year:[dif_attributes['label'],True]}
                dif_attributes['vis_data'] = vis_data
                dif_masked_road.add_edges_from([(x[0], x[1], dif_attributes)])
        else:
            vis_data = dict(
            href=f"https://www.openstreetmap.org/way/{edge['osmid']}", 
            years=['cycle', 'masked'], 
            data=dict()
            )
            dif_attributes = edge.copy()
            vis_data['data'] = {year:[dif_attributes['label'],False]}

            dif_attributes['vis_data'] = vis_data
            diff_unmasked.add_edges_from([(x[0], x[1], dif_attributes)])
            
    m = ox.plot_graph_folium(diff_unmasked, popup_attribute='vis_data', color="blue")
    m = ox.plot_graph_folium(dif_masked_cycle, popup_attribute='vis_data', graph_map=m, color="green")
    m = ox.plot_graph_folium(dif_masked_road, popup_attribute='vis_data', graph_map=m, color="red")

    return m, [diff_unmasked, dif_masked_cycle, dif_masked_road]

In [None]:
m, parts = visualise_masked_roads(wroclaw_2014_labels ,mask_temp)


In [None]:
m.save(f"data/wro_2014_with_masks.html")

In [None]:
ox.plot_graph_folium(parts[1], color="green")

In [None]:
ox.plot_graph_folium(parts[0], color="blue")

In [None]:
def show_preds(grapf_networkx, mask, preds):
    assert grapf_networkx.number_of_edges() == mask.shape[0]
    
    mask_ids = ((mask == True).nonzero(as_tuple=True)[0]).tolist()
    pred_ids = ((preds == True).nonzero(as_tuple=True)[0]).tolist()
    
    year = str(grapf_networkx.name.split("_")[3])
    dif_masked_cycle = nx.create_empty_copy(grapf_networkx)
    dif_masked_road = dif_masked_cycle.copy()
    dif_masked_different = dif_masked_cycle.copy()

    diff_unmasked = dif_masked_cycle.copy()
    
    
    for x in tqdm(set(grapf_networkx.edges()), total = len(set(grapf_networkx.edges()))):
        edge = grapf_networkx[x[0]][x[1]][0]
        if edge['id'] in pred_ids:
            dif_attributes = edge.copy()
            if dif_attributes['label'] == 1: #if cycle
                vis_data = dict(
                href=f"https://www.openstreetmap.org/way/{edge['osmid']}", 
                years=['cycle', 'masked'], 
                data=dict()
                )
                vis_data['data'] = {year:[dif_attributes['label'],True]}
                dif_attributes['vis_data'] = vis_data
                dif_masked_cycle.add_edges_from([(x[0], x[1], dif_attributes)])
            else: #if cycle
                vis_data = dict(
                href=f"https://www.openstreetmap.org/way/{edge['osmid']}", 
                years=['cycle', 'masked'], 
                data=dict()
                )
                vis_data['data'] = {year:[dif_attributes['label'],True]}
                dif_attributes['vis_data'] = vis_data
                dif_masked_road.add_edges_from([(x[0], x[1], dif_attributes)])
        else:
            vis_data = dict(
            href=f"https://www.openstreetmap.org/way/{edge['osmid']}", 
            years=['cycle', 'masked'], 
            data=dict()
            )
            dif_attributes = edge.copy()
            vis_data['data'] = {year:[dif_attributes['label'],False]}

            dif_attributes['vis_data'] = vis_data
            diff_unmasked.add_edges_from([(x[0], x[1], dif_attributes)])
            
    m = ox.plot_graph_folium(diff_unmasked, popup_attribute='vis_data', color="blue")
    m = ox.plot_graph_folium(dif_masked_road, popup_attribute='vis_data', graph_map=m, color="#d95f02")
    m = ox.plot_graph_folium(dif_masked_cycle, popup_attribute='vis_data', graph_map=m, color="green")
    #m = ox.plot_graph_folium(dif_masked_road, popup_attribute='vis_data', graph_map=m, color="red")

    return m, [diff_unmasked, dif_masked_cycle, dif_masked_road]

In [None]:
import pickle

with open('data/best_pred_2014.pickle', 'rb') as handle:
    best_pred = pickle.load(handle)

In [None]:
y_pred = best_pred
y_true = wro_2014_with_masks.ndata['label']
y_true = y_true.squeeze().long()
preds = y_pred.max(1)[1].type_as(y_true)

In [None]:
preds.unique(return_counts=True)

In [None]:
mask_temp = preds.type_as(mask_temp)
m, parts = show_preds(wroclaw_2014_labels,wro_2014_with_masks.ndata['label'] ,mask_temp)


In [None]:
mask_temp.unique(return_counts=True)

In [None]:


for place in places_to_download:
    gdf = ox.geocoder.geocode_to_gdf(place)

In [64]:
places_to_download = ["Wrocław, województwo dolnośląskie, Polska",
            "Gdańsk, województwo pomorskie, Polska",
            "Poznań, województwo wielkopolskie, Polska",
            "Warszawa, województwo mazowieckie, Polska",
            "Kraków, województwo małopolskie, Polska",
            "Berlin, Niemcy",
            "Mediolan, Lombardia, Włochy",
            "Amsterdam, Holandia Północna, Niderlandy, Holandia",
            "Londyn, Greater London, Anglia, Wielka Brytania",
            "Budapeszt, Środkowe Węgry, Węgry",
            "Sztokholm, Solna kommun, Stockholm County, Szwecja",
            "Oslo, Norwegia",
            "Wilno, Samorząd miasta Wilna, Okręg wileński, Litwa",
            "Bruksela, Brussels-Capital, Belgia",
            "Rzym, Roma Capitale, Lacjum, Włochy",
            "Florencja, Metropolitan City of Florence, Toskania, Włochy",
            "Bolonia, Emilia-Romania, Włochy",
            "Lizbona, Lisbon, Portugalia",
            "Madryt, Área metropolitana de Madrid y Corredor del Henares, Wspólnota Madrytu, Hiszpania",
            "Sewilla, Sevilla, Andaluzja, Hiszpania",
            "Walencja, Comarca de València, Walencja, Wspólnota Walencka, Hiszpania",
            "Barcelona, Barcelonès, Barcelona, Katalonia, 08001, Hiszpania",
            "Bilbao, Biscay, Kraj Basków, Hiszpania",
            "Saragossa, Zaragoza, Saragossa, Aragonia, Hiszpania",
            "Marsylia, Marseille, Bouches-du-Rhône, Prowansja-Alpy-Lazurowe Wybrzeże, Francja metropolitalna, 13000, Francja",
            "Lyon, Métropole de Lyon, Departemental constituency of Rhône, Owernia-Rodan-Alpy, Francja metropolitalna, Francja",
            "Bordeaux, Żyronda, Nowa Akwitania, Francja metropolitalna, Francja",
            "Paryż, Ile-de-France, Francja metropolitalna, Francja",
            "Rennes, Ille-et-Vilaine, Brittany, Francja metropolitalna, Francja",
            "Lille, Nord, Hauts-de-France, Francja metropolitalna, Francja ",
            "Amiens, Somme, Hauts-de-France, Francja metropolitalna, Francja",
            "Dublin, Dublin 1, Leinster, Irlandia",
            "Rotterdam, Holandia Południowa, Niderlandy, Holandia",
            "Haga, Holandia Południowa, Niderlandy, Holandia",
            "Dordrecht, Holandia Południowa, Niderlandy, Holandia",
            "Antwerpia, Flanders, Belgia",
            "Essen, Nadrenia Północna-Westfalia, Niemcy",
            "Hanower, Region Hannover, Dolna Saksonia, Niemcy",
            "Monachium, Bawaria, Niemcy",
            "Berno, Bern-Mittelland administrative district, Bernese Mittelland administrative region, Berno, Szwajcaria",
            "Zurych, District Zurich, Zurych, Szwajcaria",
            "Bazylea, Basel-City, Szwajcaria",
            "Salzburg, 5020, Austria",
            "Wiedeń, Austria",
            "Praga, Czechy",
            "Malmo, Malmö kommun, Skåne County, Szwecja",
            "Central Region, Malta",
            "Ljubljana, Upravna Enota Ljubljana, Słowenia",
            "Zagrzeb, City of Zagreb, Chorwacja",
            "Budapeszt, Środkowe Węgry, Węgry",
            "Bukareszt, Rumunia",
            "Helsinki, Helsinki sub-region, Uusimaa, Southern Finland, Mainland Finland, Finlandia",
            "Wenecja, Venezia, Wenecja Euganejska, Włochy",
            "Arnhem, Geldria, Niderlandy, Holandia",
            "Bratysława, Kraj bratysławski, Słowacja",
            "Tallinn, Prowincja Harju, Estonia",
            "Ryga, Liwonia, Łotwa",
            "Neapol, Napoli, Kampania, Włochy",
            "Bari, Apulia, Włochy",
            "Cardiff, Walia, CF, Wielka Brytania",
            "Birmingham, Attwood Green, West Midlands Combined Authority, Anglia, Wielka Brytania",
            "Lwów, Lviv Urban Hromada, Rejon lwowski, Obwód lwowski, Ukraina"]

In [13]:
wro = "Wrocław, województwo dolnośląskie, Polska"

In [65]:
gdf = ox.geocoder.geocode_to_gdf(places_to_download)

In [18]:
gdf.columns

Index(['geometry', 'bbox_north', 'bbox_south', 'bbox_east', 'bbox_west',
       'place_id', 'osm_type', 'osm_id', 'lat', 'lon', 'display_name', 'class',
       'type', 'importance'],
      dtype='object')

In [30]:
gdf

Unnamed: 0,geometry,bbox_north,bbox_south,bbox_east,bbox_west,place_id,osm_type,osm_id,lat,lon,display_name,class,type,importance
0,"POLYGON ((16.80734 51.13895, 16.80859 51.13887...",51.21006,51.042669,17.176219,16.807339,285394989,relation,2805691,51.126311,16.978196,"Wrocław, Lower Silesian Voivodeship, Poland",boundary,administrative,0.77475
1,"POLYGON ((18.42950 54.38505, 18.42963 54.38503...",54.447218,54.274919,18.950244,18.429496,285393939,relation,2723259,54.361193,18.628609,"Gdańsk, Gdansk, Pomeranian Voivodeship, Poland",boundary,administrative,0.768692
2,"MULTIPOLYGON (((13.08835 52.41963, 13.09021 52...",52.675509,52.338245,13.761161,13.088345,285001329,relation,62422,52.517037,13.38886,"Berlin, Germany",boundary,administrative,0.897539
3,"POLYGON ((9.04089 45.44745, 9.04093 45.44727, ...",45.535848,45.386738,9.27811,9.040887,283684439,relation,44915,45.464194,9.189635,"Milan, Lombardy, Italy",boundary,administrative,0.739065
4,"MULTIPOLYGON (((4.72876 52.40071, 4.73371 52.4...",52.431064,52.278174,5.079162,4.728756,284964520,relation,271110,52.37276,4.893604,"Amsterdam, North Holland, Netherlands",boundary,administrative,0.836813
5,"POLYGON ((16.73159 52.46375, 16.73162 52.46365...",52.509328,52.291924,17.071707,16.731588,285284713,relation,2989158,52.400663,16.919733,"Poznań, Greater Poland Voivodeship, Poland",boundary,administrative,0.774541


In [66]:
import folium
from folium.plugins import MarkerCluster
#empty map
world_map= folium.Map(tiles="cartodbpositron")
#marker_cluster = MarkerCluster().add_to(world_map)
#for each coordinate, create circlemarker of user percent
for i in range(len(gdf)):
        lat = gdf.iloc[i]['lat']
        long = gdf.iloc[i]['lon']
        radius=2
        folium.CircleMarker(location = [lat, long], color= "Black", radius=radius, fill =True).add_to(world_map)#.add_to(marker_cluster)add_to(world_map)
#show the map
world_map

In [67]:
from pathlib import Path

In [78]:
def get_all_key_and_unique_values(graph_nx: MultiDiGraph, selected_keys: Dict = {}) -> Dict:
    seen_values = {}
    if not selected_keys:
        selected_keys = ['oneway', 'lanes', 'highway', 
                         'maxspeed', 'access', 'bridge',
                         'junction', 'service', 'tunnel', 'surface'] #'width','length'

    #get all values by selected key for each edge
    for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            for key, val in graph_nx[edge[0]][edge[1]][connection].items():
                if key in selected_keys:
                    if key not in seen_values:
                        seen_values[key] = [val]
                    else:
                        if type(val) == list:
                            seen_values[key].extend(val)
                        else:
                            seen_values[key].extend([val])


    for key in seen_values.keys():
        seen_values[key] = list(set(seen_values[key]))
    return seen_values

In [122]:
    import pickle


In [123]:
def get_unique_val_for_many_graphs(directory, target_dir):
    found_files = list(Path(directory).glob('*.xml'))
    all_seen = {}
    for path in tqdm(found_files):
        raw_graphml = ox.io.load_graphml(path)
        seen_values = get_all_key_and_unique_values(raw_graphml)
        
        for key, val in seen_values.items():
            if key not in all_seen.keys():
                all_seen[key] = val                  
            else:
                if type(val) == list:
                    all_seen[key].extend(val)
                else:
                    all_seen[key].extend([val])
    for key in seen_values.keys():
        all_seen[key] = list(set(all_seen[key]))
        

    with open(target_dir + "unique_kv.pkl", 'wb+') as handle:
        pickle.dump(all_seen, handle, protocol=pickle.HIGHEST_PROTOCOL) 
        
    return all_seen
            
    

In [87]:
unique_val = get_unique_val_for_many_graphs("./data_raw/", "./data/")

100%|██████████| 2/2 [00:47<00:00, 23.88s/it]


In [97]:
!dir data-raw

 Volume in drive C is Acer
 Volume Serial Number is 98FE-6CA9

 Directory of C:\Users\fdrew\Documents\praca_magisterska\SpatialGraphMAE



File Not Found


In [113]:
graph_nx = ox.io.load_graphml("./data_raw/Wrocław_Polska_recent.xml")
graph_nx_copy = graph_nx.copy()

selected_keys = ['oneway', 'lanes', 'highway', 
                         'maxspeed', 'access', 'bridge',
                         'junction', 'service', 'tunnel', 'surface'] 

for edge in graph_nx.edges():
        for connection in graph_nx[edge[0]][edge[1]].keys():
            for key, val in graph_nx[edge[0]][edge[1]][connection].items():
                if key in selected_keys:
                    new_key = key + "_" + str(val)
                    graph_nx_copy[edge[0]][edge[1]][connection][new_key] = 1
            for key, val in unique_val.items():
                for elem in val:
                    new_key = key + "_" + str(elem)
                    if new_key not in graph_nx[edge[0]][edge[1]][connection]:
                        graph_nx_copy[edge[0]][edge[1]][connection][new_key] = 0
#ox.save_graphml(graph_nx_copy, filepath="./data_raw/{}_onehot.xml".format('Wrocław_Polska_recent'))


In [149]:
def encode_onehot(graph_nx, unique_val, selected_keys):
    graph_nx_copy = graph_nx.copy()
    
    key_width_and_length = ['length', 'width']

    for edge in graph_nx.edges():
            for connection in graph_nx[edge[0]][edge[1]].keys():
                for key, val in graph_nx[edge[0]][edge[1]][connection].items():
                    if key in selected_keys:
                        new_key = key + "_" + str(val)
                        graph_nx_copy[edge[0]][edge[1]][connection][new_key] = 1
                    if key in key_width_and_length:
                        try:
                            if type(val) == list:
                                graph_nx_copy[edge[0]][edge[1]][connection][key] = float(val[0])
                            else:
                                graph_nx_copy[edge[0]][edge[1]][connection][key] = float(val)
                        except ValueError as e:
                            graph_nx_copy[edge[0]][edge[1]][connection][key] = 0.0
                for key, val in unique_val.items():
                    for elem in val:
                        new_key = key + "_" + str(elem)
                        if new_key not in graph_nx[edge[0]][edge[1]][connection]:
                            graph_nx_copy[edge[0]][edge[1]][connection][new_key] = 0
                if 'length' not in graph_nx[edge[0]][edge[1]][connection].keys():
                    graph_nx_copy[edge[0]][edge[1]][connection]['length'] = 0.0
                if 'width' not in graph_nx[edge[0]][edge[1]][connection].keys():
                    graph_nx_copy[edge[0]][edge[1]][connection]['width'] = 0.0

    return graph_nx_copy

In [150]:
selected_keys = ['oneway', 'lanes', 'highway', 
                 'maxspeed', 'access', 'bridge',
                 'junction', 'service', 'tunnel', 'surface'] 
graph_nx = ox.io.load_graphml("./data_raw/Wrocław_Polska_recent.xml")

unique_val = get_unique_val_for_many_graphs("./data_raw/", "./data/")

all_keys = []
for key, val in unique_val.items():
    for elem in val:
        new_key = key + "_" + str(elem)
        all_keys.append(new_key)
        
all_keys.append('length')
all_keys.append('width')
print(len(all_keys))

        
graph_nx_encoded = encode_onehot(graph_nx, unique_val, selected_keys)
print('nesxr')
dgl_line_graph = convert_nx_to_dgl_as_linegraph_v2(graph_nx_encoded, all_keys)

dgl_line_graph_with_masks, elem_n, scaler_n = create_mask(dgl_line_graph)


100%|██████████| 2/2 [00:37<00:00, 18.68s/it]


81
nesxr


ValueError: too many dimensions 'str'

In [None]:
ox.save_graphml(dgl_line_graph_with_masks, filepath="./data_raw/{}_onehot.xml".format('Wrocław_Polska_recent'))

In [None]:
graph_nx_encoded[95584835][6152142174]


In [140]:
def convert_nx_to_dgl_as_linegraph_v2(graph_nx: MultiDiGraph, selected_keys :[]) -> DGLHeteroGraph:
    graph_dgl = dgl.from_networkx(graph_nx, edge_attrs = (selected_keys+['label', 'idx']))
    graph_dgl_line_graph = dgl.line_graph(graph_dgl)
    #populate linegraph with nodes
    
    features_to_line_graph = [graph_dgl.edata[key] for key in selected_keys]
    
    graph_dgl_line_graph.ndata['feat'] = torch.cat(features_to_line_graph).reshape((-1, len(selected_keys)))
    graph_dgl_line_graph.ndata['label'] = graph_dgl.edata['label']
    graph_dgl_line_graph.ndata['id'] = graph_dgl.edata['id']


    return graph_dgl_line_graph

In [119]:
dgl_graoh = convert_nx_to_dgl_as_linegraph_v2(graph_nx_copy, all_keys)

In [120]:
dgl_graoh

Graph(num_nodes=142983, num_edges=416463,
      ndata_schemes={'feat': Scheme(shape=(79,), dtype=torch.int64)}
      edata_schemes={})

In [None]:
from dgl.data.utils import save_graphs
save_graphs("./data_transformed/wro_onehoted.graph", [dgl_line_graph_with_masks])

In [None]:
def count_keys_in_edges(directory, target_dir, all_seen):
    found_files = list(Path(directory).glob('*.xml'))
    all_seen = {}
    for path in tqdm(found_files):
        raw_graphml = ox.io.load_graphml(path)
        seen_values = get_all_key_and_unique_values(raw_graphml)
        
        for key, val in seen_values.items():
            if key not in all_seen.keys():
                all_seen[key] = val                  
            else:
                if type(val) == list:
                    all_seen[key].extend(val)
                else:
                    all_seen[key].extend([val])
    for key in seen_values.keys():
        all_seen[key] = list(set(all_seen[key]))
        

    with open(target_dir + "unique_kv.pkl", 'wb+') as handle:
        pickle.dump(all_seen, handle, protocol=pickle.HIGHEST_PROTOCOL) 
        
    return all_seen    