In [None]:
import sys
sys.path.append("..")

In [None]:
from collections import defaultdict

import numpy as np
import pandas as pd
import torch

# Exploring vehicle counter data

In [None]:
ls ../data/train/london/input | tail -n 5

In [None]:
df = pd.read_parquet("../data/train/london/input/counters_2020-01-27.parquet")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df[(df.day == "2020-01-27") & (df.t == 1)]

In [None]:
df.head().explode("volumes_1h")

In [None]:
ls data/speed_classes/london | head -n 5

In [None]:
pd.read_parquet("data/speed_classes/london/speed_classes_2019-07-01.parquet").head()

In [None]:
ls data/train/london/labels

In [None]:
df = pd.read_parquet("data/train/london/labels/cc_labels_2019-07-01.parquet")

In [None]:
df.head()

In [None]:
ls data/train/madrid/labels

In [None]:
df = pd.read_parquet("data/train/madrid/labels/cc_labels_2021-06-01.parquet")

In [None]:
df.head()

In [None]:
import pickle

In [None]:
with open("data/london.pkl", "rb") as f:
    smth = pickle.load(f)

In [None]:
smth.keys()

In [None]:
import numpy as np

In [None]:
len(list(smth.values())[0])

In [None]:
len(list(smth.values()))

In [None]:
df_edges = pd.read_parquet("../data/road_graph/london/road_graph_edges.parquet")

In [None]:
df_edges.shape

In [None]:
df_edges.head()

In [None]:
df_edges.to_dict("records")[0]

In [None]:
df_nodes = pd.read_parquet("../data/road_graph/london/road_graph_nodes.parquet")

In [None]:
df_nodes.shape

In [None]:
df_nodes.head()

# Breaking down road graph mapping

In [None]:
edge_records = df_edges.to_dict("records")
edges = [(r["u"], r["v"]) for r in edge_records]

In [None]:
noncounter_nodes = [r["node_id"] for r in df_nodes.to_dict("records") if r["counter_info"] == ""]
counter_nodes = [r["node_id"] for r in df_nodes.to_dict("records") if r["counter_info"] != ""]
nodes = counter_nodes + noncounter_nodes

In [None]:
from collections import defaultdict

In [None]:
node_to_int_mapping = defaultdict(lambda: -1)
for i, k in enumerate(nodes):
    node_to_int_mapping[k] = i

In [None]:
edge_index = torch.tensor(
    [[node_to_int_mapping[n] for n, _ in edges],
     [node_to_int_mapping[n] for _, n in edges]], dtype=torch.long
)

In [None]:
edge_index_d = defaultdict(lambda: -1)
for i, (u, v) in enumerate(edges):
    edge_index_d[(u, v)] = i

In [None]:
edge_attributes=["speed_kph", "parsed_maxspeed", "length_meters", "counter_distance",
                                                 "importance", "highway", "oneway", ]

In [None]:
edge_attr = None

In [None]:
if edge_attributes is not None:
    edge_attr = torch.full(size=(len(edges), len(edge_attributes)), fill_value=float("nan"))
    highway_dict = {}
    oneway_dict = {}
    
    for i, edge in enumerate(edge_records):
        for j, attr in enumerate(edge_attributes):
            if attr == "highway":
                if edge[attr] not in highway_dict:
                    temp = len(highway_dict)
                    highway_dict[edge[attr]] = temp
                edge_attr[i, j] = highway_dict[edge[attr]]
            elif attr == "oneway":
                if edge[attr] not in oneway_dict:
                    temp = len(oneway_dict)
                    oneway_dict[edge[attr]] = temp
                edge_attr[i, j] = oneway_dict[edge[attr]]
            else:
                edge_attr[i, j] = edge[attr]

In [None]:
edge_attr[:5]

In [None]:
edge_attr[:, 4].unique()

In [None]:
highway_dict

In [None]:
from t4c22.t4c22_config import cc_dates, load_basedir

In [None]:
basedir = load_basedir()

In [None]:
cc_dates(basedir, "london")[:5]

In [None]:
import pickle
cluster_map = {}
with open("data/london.pkl", 'rb') as f:
    maps = pickle.load(f)
for i in range(20):
    for day, t, _ in maps[i]:
        cluster_map['%s-%d' % (day, t)] = i

In [None]:
len(cluster_map)

In [None]:
from t4c22.dataloading.road_graph_mapping import TorchRoadGraphMapping
from t4c22.t4c22_config import day_t_filter_weekdays_daytime_only

In [None]:
torch_road_graph_mapping = TorchRoadGraphMapping(
    city="london",
    edge_attributes=edge_attributes,
    root=basedir,
    df_filter=day_t_filter_weekdays_daytime_only
)

# load_inputs_day_t function from raod_graph_mapping

In [None]:
from t4c22.t4c22_config import load_inputs, load_basedir

In [None]:
basedir = load_basedir()

In [None]:
day = "2020-01-27"
t = 30
df_x = load_inputs(basedir, city="london", split="train", day=day)

In [None]:
df_x.head()

In [None]:
df_x.t.unique()

In [None]:
df_x["node_id"] = df_x["node_id"].astype("int64")
df_x = df_x.explode("volumes_1h")
df_x.head()

In [None]:
df_x = df_x.reset_index()
df_x["slot"] = df_x.index % 4
df_x["volumes_1h"] = df_x["volumes_1h"].astype("float")
df_x.head()

In [None]:
x = torch.full(size=(len(torch_road_graph_mapping.node_to_int_mapping), 4), fill_value=float("nan"))
x.shape

In [None]:
# (Mis-)use (day,t) for dataloading test sets where we do not exhibit day,t
if day == "test":
    data = df_x[(df_x["test_idx"] == idx)].copy()
else:
    data = df_x[(df_x["day"] == day) & (df_x["t"] == t)].copy()
data.head()

In [None]:
data["node_index"] = [torch_road_graph_mapping.node_to_int_mapping[x] for x in data["node_id"]]

In [None]:
x[data["node_index"].values, data["slot"].values] = torch.tensor(data["volumes_1h"].values).float()

In [None]:
x

In [None]:
x.shape