In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import janux as jx
import networkx as nx

In [2]:
def parse_trips(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    trips_data = []
    
    for trip in root.findall("trip"):
        trip_info = trip.attrib
        trips_data.append(trip_info)

    return pd.DataFrame(trips_data)

In [3]:
xml_file_path = "cologne8.rou.xml"
df = parse_trips(xml_file_path)

In [4]:
df.sample(10)

Unnamed: 0,id,type,depart,from,to
147,162709_422_0,pkw,25501.0,28675510#1,42925825#0
1192,199442_437_0,pkw,27073.0,-4936412,8716827#0
463,188336_433_0,pkw,25981.0,-186623965#18,-186623965#14
983,149128_417_0,pkw,26719.0,186623965#9,42925825#0
442,146521_416_0,pkw,25953.0,-186623965#18,-186623965#14
252,192710_435_0,pkw,25679.0,22959550#0,297047309#0
320,136072_411_0,pkw,25779.0,-186623965#18,-186623965#14
31,155353_420_0,pkw,25234.0,22917421#3,186623965#17
2031,252335_457_0,pkw,28763.0,8716807#1,-186623965#14
1028,188680_434_0,pkw,26819.0,-23283579#1,4936412


In [5]:
df["depart"] = df["depart"].astype(float)
df["depart"] = df["depart"].astype(int)
df["depart"] = df["depart"] - min(df["depart"])
df["id"] = df.index
df["kind"] = "Human"
df.rename(columns={"depart": "start_time"}, inplace=True)
df.rename(columns={"from": "origin"}, inplace=True)
df.rename(columns={"to": "destination"}, inplace=True)
df.drop(columns=["type"], inplace=True)


In [6]:
network = jx.build_digraph("cologne.con.xml", "cologne.edg.xml", "cologne.rou.xml")

In [7]:
origins, destinations = df["origin"].unique(), df["destination"].unique()
bad_origins, bad_destinations = ["-194017408#1", "194017408#0", "-25168493", "25168493", "28675510#7", "-28675510#11"], ["-194017408#1", "194017408#0", "-25168493", "25168493", "28675510#7", "-28675510#11"]

for o, d in zip(origins, destinations):
    if not nx.has_path(network, o, d):
        bad_origins.append(o)
        bad_destinations.append(d)

for idx, row in df.iterrows():
    if row["origin"] in bad_origins or row["destination"] in bad_destinations:
        df.drop(idx, inplace=True)

In [8]:
"""
origins, destinations = df["origin"].unique(), df["destination"].unique()
bad_ods = []

for o, d in zip(origins, destinations):
    if not len(list(nx.all_simple_paths(network, o, d))) >= 3:
        bad_ods.append((o, d))
        
for idx, row in df.iterrows():
    if (row["origin"], row["destination"]) in bad_ods:
        df.drop(idx, inplace=True)
"""
;

''

In [9]:
origin_indices = {origin_name : idx for idx, origin_name in enumerate(df["origin"].unique())}
destination_indices = {destination_name : idx for idx, destination_name in enumerate(df["destination"].unique())}

origin_names = {value: key for key, value in origin_indices.items()}
destination_names = {value: key for key, value in destination_indices.items()}

for idx, row in df.iterrows():
    df.at[idx, "origin"] = origin_indices[row["origin"]]
    df.at[idx, "destination"] = destination_indices[row["destination"]]

In [10]:
print(len(df))
print(max(df["start_time"]))
print(min(df["start_time"]))
print(max(df["start_time"]) - min(df["start_time"]))

1610
3589
0
3589


In [11]:
df["id"] = [i for i in range(len(df))]
# reset indices
df.reset_index(drop=True, inplace=True)

In [None]:
import signal

class TimeoutException(Exception):
    pass

def handler(signum, frame):
    raise TimeoutException("Function timed out")

def run_with_timeout(func, timeout, *args, **kwargs):
    signal.signal(signal.SIGALRM, handler)
    signal.alarm(timeout)  # Set the timeout alarm

    try:
        result = func(*args, **kwargs)  # Call the function with arguments
        signal.alarm(0)  # Cancel the alarm if execution completes in time
        return result
    except TimeoutException as e:
        print(e)
        return None

# Example function with arguments
def slow_function(x, y):
    import time
    time.sleep(5)  # Simulating a slow operation
    return x + y


for num_paths in range(6):
    path_gen_kwargs = {
                "number_of_paths": num_paths+1,
                "random_seed": 42,
                "num_samples": 20,
                "beta": -1.5,
                "weight": "time",
                "verbose": False
            }
    print(f"\n------Number of paths: {num_paths+1}--------")
    for idx, row in df.iterrows():
        print(f"\r{idx}/{len(df)}", end="")
        routes = run_with_timeout(jx.basic_generator, 10, network, [origin_names[row["origin"]]], [destination_names[row["destination"]]], as_df=True, calc_free_flow=True, **path_gen_kwargs)
        if routes is None:
            print(f"{row['id']} failed for {origin_names[row['origin']]} to {destination_names[row['destination']]}")
            for idx1, row1 in df.iterrows():
                if row1["origin"] == row['origin'] or row1["destination"] == row['destination']:
                    df.drop(idx1, inplace=True)
            
#routes = jx.basic_generator(network, origin_indices.keys(), destination_indices.keys(), as_df=True, calc_free_flow=True, **path_gen_kwargs)


------Number of paths: 1--------
1609/1610
------Number of paths: 2--------
4/1610Function timed out
4 failed for -186623965#18 to -22917421#4
7/1299Function timed out
7 failed for -23283579#1 to 23283579#1
9/1058Function timed out
9 failed for 186623965#9 to 42925825#0
10/857Function timed out
10 failed for 186623965#9 to 42925825#0
12/857Function timed out
12 failed for 22917421#3 to 186623965#17
14/704Function timed out
14 failed for 22917421#3 to 186623965#17
15/704Function timed out
15 failed for -42925825#2 to 42925825#0
16/447Function timed out
16 failed for 22917421#3 to 186623965#17
23/447Function timed out
23 failed for 22917421#3 to 186623965#17
26/447Function timed out
26 failed for 186623965#9 to 42925825#0
28/447Function timed out
28 failed for -186623965#18 to -22917421#4
30/447Function timed out
30 failed for 22917421#3 to 186623965#17
32/447Function timed out
32 failed for 186623965#9 to 42925825#0
39/447Function timed out
39 failed for 22917421#3 to 186623965#17
44/4