In [45]:
import random
import numpy as np
import zstandard
import networkx as nx
import copy
import osmnx as ox
import pandas as pd

import random
from collections import defaultdict
import matplotlib.pyplot as plt
import os
import subprocess
import ast

In [46]:
%load_ext autoreload
%autoreload 2

from src import utils as ut


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [47]:
place = 'Buenos Aires, Argentina'
g = ut.get_nx_from_ox(place)
wgt = 'length'

G = ox.graph_from_place(place, network_type='drive')

In [48]:
st_pairs = 30 #number of source, target pairs

In [49]:
st = ut.sample_st(g,st_pairs, seed = 117)

In [50]:
sp_length,sp_nodes = ut.list_shortest_path_nx(g, wgt, st[0], st[1])
lfp_length,lfp_nodes,dag_nodes = ut.list_longest_forward_path(g, wgt, st[0], st[1])


unreachable targets: 0


In [51]:
#create pandas df, with columns
#save/load
#add new info

In [52]:
columns = ["Dataset", "# nodes", "# edges", "SP length", "LFP length", "LFP/SP length", "LFP/SP length (std)", "SP different nodes", 
           "FP different nodes", "FP/SP visited nodes", "FP/SP visited nodes (std)"]

df = pd.DataFrame(columns=columns)

file_path = 'results/dataset_fp.csv'

os.makedirs(os.path.dirname(file_path), exist_ok=True)

if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)

df = pd.read_csv(file_path)
df

Unnamed: 0,Dataset,# nodes,# edges,SP length,LFP length,LFP/SP length,LFP/SP length (std),SP different nodes,FP different nodes,FP/SP visited nodes,FP/SP visited nodes (std)
0,"Florence, Italy",6096,11737,6909.0,7287.0,1.05,0.05,75.1,121.27,1.58,0.65
1,"Kyoto, Japan",44828,118087,8501.0,9243.0,1.09,0.06,117.9,536.77,4.16,2.21
2,"Essaouira, Morocco",1277,3429,3650.0,3967.0,1.13,0.14,37.2,107.03,2.52,1.33
3,"Piedmont, California, USA",352,937,1860.06996,2060.58421,1.106162,0.113537,19.91,39.48,1.814726,0.662455


In [53]:
def add_dataset_to_dataframe(df, dataset, nodes, edges, sp_length, lfp_length, lfp_sp_length, lfp_sp_length_std, sp_diff_nodes, fp_diff_nodes, fp_sp_visited_nodes, fp_sp_visited_nodes_std):
    # Check if the dataset already exists in the DataFrame
    if dataset not in df['Dataset'].values:
        # Create a new row
        new_row = pd.DataFrame({
            "Dataset": [dataset],
            "# nodes": [nodes],
            "# edges": [edges],
            "SP length": [sp_length],
            "LFP length": [lfp_length],
            "SP different nodes": [sp_diff_nodes],
            "FP different nodes": [fp_diff_nodes],
            "LFP/SP length": [lfp_sp_length],
            "LFP/SP length (std)": [lfp_sp_length_std],
            "FP/SP visited nodes": [fp_sp_visited_nodes],
            "FP/SP visited nodes (std)": [fp_sp_visited_nodes_std]
        })
        
        # Append the new row to the DataFrame
        df = pd.concat([df, new_row], ignore_index=True)
        
        # Save the updated DataFrame to the CSV file
        df.to_csv('results/dataset_fp.csv', index=False)
    else:
        print(f"Dataset {dataset} already exists in the DataFrame.")

    return df

In [54]:
places = ['Buenos Aires, Argentina']

for place in places:
    g = ut.get_nx_from_ox(place)
    st = ut.sample_st(g,30, seed = 118)
    sp_length,sp_nodes = ut.list_shortest_path_nx(g, wgt, st[0], st[1])
    lfp_length,lfp_nodes,dag_nodes = ut.list_longest_forward_path(g, wgt, st[0], st[1])
    
    add_dataset_to_dataframe(df, place, len(g.nodes()), len(g.edges()), 
                             np.mean(sp_length), np.mean(lfp_length), 
                             np.mean([x/y for x,y in zip(lfp_length,sp_length)]),
                             np.std([x/y for x,y in zip(lfp_length,sp_length)]),
                             np.mean(sp_nodes), np.mean(dag_nodes), 
                             np.mean([x/y for x,y in zip(dag_nodes,sp_nodes)]),
                             np.std([x/y for x,y in zip(dag_nodes,sp_nodes)]))

unreachable targets: 0


In [55]:
def round_dataframe_columns(df):
    # Round "SP length" and "LFP length" to the nearest integer
    df["SP length"] = df["SP length"].round(0).astype(int)
    df["LFP length"] = df["LFP length"].round(0).astype(int)
    
    # Round columns from "SP different nodes" onward to two decimal places
    df.iloc[:, 5:] = df.iloc[:, 5:].round(2)
    
    return df

In [56]:
df = pd.read_csv(file_path)
round_dataframe_columns(df)

Unnamed: 0,Dataset,# nodes,# edges,SP length,LFP length,LFP/SP length,LFP/SP length (std),SP different nodes,FP different nodes,FP/SP visited nodes,FP/SP visited nodes (std)
0,"Florence, Italy",6096,11737,6909,7287,1.05,0.05,75.1,121.27,1.58,0.65
1,"Kyoto, Japan",44828,118087,8501,9243,1.09,0.06,117.9,536.77,4.16,2.21
2,"Essaouira, Morocco",1277,3429,3650,3967,1.13,0.14,37.2,107.03,2.52,1.33
3,"Piedmont, California, USA",352,937,1860,2061,1.11,0.11,19.91,39.48,1.81,0.66
4,"Buenos Aires, Argentina",17890,37474,9065,9642,1.06,0.04,89.1,387.5,4.0,2.12
