In [3]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import torch
import random
import pickle

from data.Graph_Generator_Dynamic import OSM_dynamic_graph


In [4]:
# DATASET GENERATION -- NODES AND EDGES: Generating real-time co-ordinates from Open Street Map
# Run only when required, otherwise load from saved graphs using code below

# Search query
#city = "Berlin"
#shop_types = ["supermarket", "convenience", "greengrocer"]
#
## Generate coordinates and nodes dataframe
#graph_generator = OSM_graph()
#node_coordinates = graph_generator.get_coordinates(address=city, shop_types=shop_types)
#nodes_df = graph_generator.generate_nodes(node_coordinates)
#
##Generate edge feature matrix
#edge_matrix = graph_generator.generate_edges(nodes_df, edge_type = "distance")
#
#
#
## Save for future use
#file_path = "./data/"+f"berlin_coordinates_n1800.pickle"
#graph_generator.save_data(nodes_df, file_path)
#
#file_path = "./data/"+f"berlin_edges_n1800.pickle"
#graph_generator.save_data(edge_matrix, file_path)

In [5]:
# DATASET GENERATION -- TRAIN AND TEST GRAPHS

graph_generator = OSM_dynamic_graph()

# Load saved nodes_df
file_path = "./data/"+f"berlin_coordinates_n1800.pickle"
nodes_df = graph_generator.load_data(file_path)

#Load edge matrix
file_path = "./data/"+f"berlin_edges_n1800.pickle"
edges_df = graph_generator.load_data(file_path)

# Delays matrix
file_path = "./data/"+f"berlin_delays_index.pickle"
delays_df = graph_generator.load_data(file_path)

In [6]:
nodes_df.head()

Unnamed: 0,index,raw_coordinates,coordinates,demands
0,0,"[13.4073729, 52.5098189]","[0.46110191838080894, 0.4925389340695574]",0.407692
1,1,"[13.3124756, 52.5716761]","[0.29653919426827, 0.7394734699026344]",0.110256
2,2,"[13.3169308, 52.5664033]","[0.30426501837987274, 0.718424401717499]",0.120513
3,3,"[13.321144, 52.5704861]","[0.3115711869350477, 0.7347229784862463]",0.335897
4,4,"[13.3228827, 52.4913971]","[0.3145862907200261, 0.4189989317382299]",0.24359


In [7]:
edges_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1790,1791,1792,1793,1794,1795,1796,1797,1798,1799
0,0.0,12582.2,11777.2,12071.6,7340.5,11394.2,12204.9,9434.7,11675.7,25570.7,...,650.7,730.4,6579.0,8528.0,8125.2,7235.4,4422.9,14482.1,6951.4,8550.8
1,10835.2,0.0,970.4,603.1,12433.4,19200.9,3301.4,3586.8,19482.4,60663.3,...,11071.3,11160.4,5454.6,12929.9,25397.8,23142.3,14455.9,22288.8,8434.5,16357.5
2,9928.4,885.8,0.0,768.7,12526.2,18294.1,4077.1,2680.0,18575.6,60291.7,...,10164.5,10253.6,4547.8,13022.7,25490.6,23235.1,13549.1,21382.0,8527.2,15450.7
3,10239.7,603.1,788.1,0.0,13036.5,18605.4,3794.4,2991.3,18886.9,60603.0,...,10475.8,10564.9,4859.1,13533.0,26000.9,23745.4,13860.4,21693.3,9037.5,15762.0
4,7370.4,12956.4,12925.5,13449.4,0.0,18145.7,16147.7,8644.8,18427.2,34493.9,...,8128.8,8515.3,9033.4,1187.4,15068.1,12812.6,11571.6,21233.6,2249.6,15302.3


In [8]:
delays_df.head()

Unnamed: 0,Day,Time,Time_Tag,Delay
0,Sun,0:00,0,7%
1,Sun,1:00,1,5%
2,Sun,2:00,2,4%
3,Sun,3:00,3,3%
4,Sun,4:00,4,2%


In [None]:
# DATASET GENERATION: Generating graphs from real-time co-ordinates from Open Street Map
graph_size = 10
n_tr_instances = 10000
n_te_instances = 10000

train_graphs = graph_generator.generate_graphs(nodes_df, edges_df, delays_df, graph_size=graph_size, n_instances=n_tr_instances)
test_graphs = graph_generator.generate_graphs(nodes_df, edges_df, delays_df, graph_size=graph_size, n_instances=n_te_instances)

Generated 0 instances
Generated 1000 instances
Generated 2000 instances


In [None]:
print(test_graphs[-1])

In [None]:
# For Saving the generated graphs 

train_data_path = "./data/train/"+ f"train_graphs_dynamic_{n_tr_instances}x{graph_size}.pickle"
test_data_path = "./data/test/"+ f"test_graphs_dynamic_{n_te_instances}x{graph_size}.pickle"

with open(train_data_path, 'wb') as handle:
    pickle.dump(train_graphs, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open(test_data_path, 'wb') as handle:
    pickle.dump(test_graphs, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Loading saved graphs -- for subsampling if required

#train_data_path = data_dir + "/train/train_graphs_100000x10.pickle"
#test_data_path = data_dir + "/test/test_graphs_dynamic_50000x10.pickle"
#
#with open(train_data_path, 'rb') as train_handle:
#    load_train_graphs = pickle.load(train_handle)
#    
#with open(test_data_path, 'rb') as test_handle:
    #load_test_graphs = pickle.load(test_handle) 

In [None]:
# Creating and saving subsampled training datasets
# Only run when need to create more datasets, otherwise load from saved ones

#n = 10000

#sampled_train_graphs = random.sample(load_train_graphs, n)
#train_data_path = "./data/train/"+f"train_graphs_{n}x{graph_size}.pickle"
#
#with open(train_data_path, 'wb') as handle:
#    pickle.dump(sampled_train_graphs, handle, protocol=pickle.HIGHEST_PROTOCOL)
#print(f"# of sampled Training graphs: {len(sampled_train_graphs)}")

   
#sampled_test_graphs = random.sample(load_test_graphs, n)
#test_data_path = "./data/test/"+f"test_graphs_{n}x{graph_size}.pickle"

#with open(test_data_path, 'wb') as handle:
    #pickle.dump(sampled_test_graphs, handle, protocol=pickle.HIGHEST_PROTOCOL)
#print(f"# of sampled Test graphs: {len(sampled_test_graphs)}")
    

