In [7]:
import pandas as pd
import torch
from torch_geometric.data import Data
# set working directory to the directory where this notebook is located
import os
os.chdir(os.path.dirname(os.path.abspath("__file__")))

In [8]:
def get_random_nodes_from_timeseries(ts, n=10, seed=None):
    """
    Get random nodes from a time series DataFrame.

    Parameters:
    ts (pd.DataFrame): Time series data with nodes as columns.
    n (int): Number of random nodes to select.
    seed (int, optional): Random seed for reproducibility.

    Returns:
    pd.DataFrame: DataFrame containing the selected random nodes.
    """
    
    nodes = pd.Series(ts["node_id"].unique())  # Get unique node IDs from the DataFrame
    if n > len(nodes):
        raise ValueError("n cannot be greater than the number of unique nodes in the DataFrame.")
    if n <= 0:
        raise ValueError("n must be a positive integer.")
    if not isinstance(ts, pd.DataFrame):
        raise TypeError("ts must be a pandas DataFrame.")
    
    selected_nodes = nodes.sample(n=n, replace=False, random_state=seed)  # Randomly select nodes
    return selected_nodes.tolist()


def set_smart_traffic_lights_for_nodes_in_timeseries(ts, nodes):
    
    """
    Set smart traffic lights for specified nodes in a time series DataFrame.

    Parameters:
    ts (pd.DataFrame): Time series data with nodes as columns.
    nodes (list): List of node IDs to set smart traffic lights for.

    Returns:
    pd.DataFrame: Updated DataFrame with smart traffic lights set for specified nodes.
    """
    if not isinstance(ts, pd.DataFrame):
        raise TypeError("ts must be a pandas DataFrame.")
    
    if not all(node in ts["node_id"].unique() for node in nodes):
        raise ValueError("All nodes must be present in the time series DataFrame.")
    
    # Set smart traffic light and wait time for specified nodes, 0 for smart traffic light otherwise
    ts["smart_traffic_light"] = 0
    ts.loc[ts["node_id"].isin(nodes), "smart_traffic_light"] = 1
    ts.loc[ts["node_id"].isin(nodes), "status"] = 1
    ts.loc[ts["node_id"].isin(nodes), "wait_time"] = 0
    return ts

In [9]:
from concurrent.futures import ThreadPoolExecutor

def get_data_obj_paths(data_objects_root):
    data_obj_paths = []
    for folder in os.listdir(data_objects_root):
        if folder.startswith("network_") and os.path.isdir(os.path.join(data_objects_root, folder)):
            folder_paths = os.listdir(os.path.join(data_objects_root, folder))
            folder_paths = [os.path.join(data_objects_root, folder, path) for path in folder_paths if path.endswith(".pt")]
            data_obj_paths.extend(folder_paths)
    return data_obj_paths


def get_data_objects(data_obj_paths:list[str]):
    def load_data_obj(path):
        return torch.load(path, weights_only=False)

    data_objects = []
    with ThreadPoolExecutor() as executor:
        for i, data_obj in enumerate(executor.map(load_data_obj, data_obj_paths)):
            print(f"Loading object {i+1}/{len(data_obj_paths)}")
            data_objects.append(data_obj)
    return data_objects


def add_smart_traffic_lights_to_data_object(data_object:Data, nodes):    
    ts = data_object.timeseries
    ts = set_smart_traffic_lights_for_nodes_in_timeseries(ts, nodes)
    data_object.timeseries = ts
    
    # start_node 
    # end_node
    # time_series
    # x
    # edge_index
    # edge_attr
    # G_sub
    # G_pt
    
    new_features = torch.tensor(
        ts.drop_duplicates(subset="node_id")["smart_traffic_light"].values,
        dtype=torch.float32
    ).unsqueeze(1)
    new_features = torch.cat((data_object.x, new_features), dim=1)
    
    new_data_object = Data(
        x=new_features,
        edge_index=data_object.edge_index,
        edge_attr=data_object.edge_attr,
    )
    new_data_object.start_node = data_object.start_node
    new_data_object.end_node = data_object.end_node
    new_data_object.time_series = data_object.timeseries
    new_data_object.G_sub = data_object.G_sub
    new_data_object.G_pt = data_object.G_pt
   
    return new_data_object


In [22]:
train_data_objects_paths = get_data_obj_paths("data/training_data_2/not_smart")
val_data_objects_paths = get_data_obj_paths("data/validation_data_2/not_smart")
test_data_objects_paths = get_data_obj_paths("data/test_data_2/not_smart")


train_data_objects = get_data_objects(train_data_objects_paths)
val_data_objects = get_data_objects(val_data_objects_paths)
test_data_objects = get_data_objects(test_data_objects_paths)



Loading object 1/64
Loading object 2/64
Loading object 3/64
Loading object 4/64
Loading object 5/64
Loading object 6/64
Loading object 7/64
Loading object 8/64
Loading object 9/64
Loading object 10/64
Loading object 11/64
Loading object 12/64
Loading object 13/64
Loading object 14/64
Loading object 15/64
Loading object 16/64
Loading object 17/64
Loading object 18/64
Loading object 19/64
Loading object 20/64
Loading object 21/64
Loading object 22/64
Loading object 23/64
Loading object 24/64
Loading object 25/64
Loading object 26/64
Loading object 27/64
Loading object 28/64
Loading object 29/64
Loading object 30/64
Loading object 31/64
Loading object 32/64
Loading object 33/64
Loading object 34/64
Loading object 35/64
Loading object 36/64
Loading object 37/64
Loading object 38/64
Loading object 39/64
Loading object 40/64
Loading object 41/64
Loading object 42/64
Loading object 43/64
Loading object 44/64
Loading object 45/64
Loading object 46/64
Loading object 47/64
Loading object 48/64
L

In [23]:
for data_objects, file_path in [
    (train_data_objects, train_data_objects_paths),
    (val_data_objects, val_data_objects_paths),
    (test_data_objects, test_data_objects_paths)
]:
    for i, data_object in enumerate(data_objects):
        nodes = get_random_nodes_from_timeseries(data_object.timeseries, n=10, seed=42)
        data_objects[i] = add_smart_traffic_lights_to_data_object(data_object, nodes)
        folders = file_path[i].split(os.sep)
        new_folder = "smart_lights"
        new_dir = os.path.join(*folders[:-3], new_folder, *folders[3:])
        print(new_dir)
        os.makedirs(os.path.dirname(new_dir), exist_ok=True)
        torch.save(data_object, new_dir)
        
        



data/training_data_2/smart_lights/network_8/network_8_34_159.pt
data/training_data_2/smart_lights/network_8/network_8_202_354.pt
data/training_data_2/smart_lights/network_8/network_8_90_228.pt
data/training_data_2/smart_lights/network_8/network_8_339_81.pt
data/training_data_2/smart_lights/network_13/network_13_28_116.pt
data/training_data_2/smart_lights/network_13/network_13_244_124.pt
data/training_data_2/smart_lights/network_13/network_13_213_12.pt
data/training_data_2/smart_lights/network_13/network_13_183_27.pt
data/training_data_2/smart_lights/network_3/network_3_17_21.pt
data/training_data_2/smart_lights/network_3/network_3_196_104.pt
data/training_data_2/smart_lights/network_3/network_3_11_180.pt
data/training_data_2/smart_lights/network_3/network_3_279_127.pt
data/training_data_2/smart_lights/network_7/network_7_129_126.pt
data/training_data_2/smart_lights/network_7/network_7_145_124.pt
data/training_data_2/smart_lights/network_7/network_7_5_112.pt
data/training_data_2/smart_l

In [6]:
train_data_objects_paths

[]