In [57]:
depth_tree = 7

In [58]:
""" Libraries """
import os
import json
import numpy as np
from tqdm.auto import tqdm
import pandas as pd
import nbimporter
from Core import get_cell, next_point, intermediate_points, encode_morton, get_linestring, json_numpy_serializer

pd.set_option('display.float_format', lambda x: '%.6f' % x)
pd.set_option('display.max_rows', None)
tqdm.pandas(desc="Processing time")

In [59]:
""" Parameters """
with open("para.json", "r") as json_file:
    para = json.load(json_file)

fold_test = para["fold_test"]
fold_data = para["fold_data"]
fold_para = para["fold_para"]
x_min, y_min, t_min = para["x_min"], para["y_min"], para["t_min"]
x_max, y_max, t_max = para["x_max"], para["y_max"], para["t_max"]
para["depth_tree"] = depth_tree

In [60]:
""" Offsetting and Scaling"""
# Delete existing data files
if os.path.exists(fold_data):
    files = os.listdir(fold_data)
    for file in files:
        os.remove("{}/{}".format(fold_data, file))  # Remove all files
else:
    os.makedirs(fold_data)

# Calculate offsets and scales
x_off, y_off, t_off = x_min, y_min, t_min
time_split = 2 ** depth_tree
para["time_split"] = time_split
x_rng, y_rng, t_rng = x_max - x_min, y_max - y_min, t_max - t_min
x_scl = int((x_rng + (time_split - x_rng % time_split)) / time_split)
y_scl = int((y_rng + (time_split - y_rng % time_split)) / time_split)
t_scl = int((t_rng + (time_split - t_rng % time_split)) / time_split)

# Execute offsetting and scaling
files = os.listdir(fold_test)
for file in tqdm(files):
    df = pd.read_csv(filepath_or_buffer="{}/{}".format(fold_test, file), sep=",")
    # Execute offsetting
    df["x"] = df.apply(lambda x: x["x"] - x_off, axis=1)
    df["y"] = df.apply(lambda x: x["y"] - y_off, axis=1)
    df["t"] = df.apply(lambda x: x["t"] - t_off, axis=1)
    # Execute scaling
    df["x"] = df.apply(lambda x: x["x"] / x_scl, axis=1)
    df["y"] = df.apply(lambda x: x["y"] / y_scl, axis=1)
    df["t"] = df.apply(lambda x: x["t"] / t_scl, axis=1)
    df.to_csv("{}/{}".format(fold_data, file), index=False, mode='w')

para["x_scl"], para["y_scl"], para["t_scl"] = x_scl, y_scl, t_scl
para["x_off"], para["y_off"], para["t_off"] = x_off, y_off, t_off

  0%|          | 0/1000 [00:00<?, ?it/s]

In [61]:
""" Points Interpolating """
for file in tqdm(files):
    df = pd.read_csv(filepath_or_buffer="{}/{}".format(fold_data, file), sep=",")
    df.sort_values(by=['t'], inplace=True)
    values = df.values
    start_v = values[0]
    new_values = [start_v]
    for i in range(1, len(values)):
        start_p = np.array([start_v[0], start_v[1], start_v[2]])
        start_s = np.array([start_v[3]])
        end_v = values[i]
        end_p = np.array([end_v[0], end_v[1], end_v[2]])
        end_s = np.array([end_v[3]])
        if get_cell(start_p) == get_cell(end_p):
            new_values.append(end_v)
            start_v = end_v
        else:
            inter_ps = intermediate_points(start_p, end_p)
            inter_ps = [np.concatenate((inter_p, start_s)) for inter_p in inter_ps]
            new_values.extend(inter_ps)
            new_values.append(end_v)
            start_v = end_v
    new_values = np.vstack(new_values)
    new_df = pd.DataFrame(new_values, columns=["x", "y", "t", "s"])
    new_df.to_csv("{}/{}".format(fold_data, file), index=False, mode='w')

  0%|          | 0/1000 [00:00<?, ?it/s]

In [62]:
""" Sequence Splitting """
scale = np.array([x_scl,y_scl,t_scl])
for file in tqdm(files):
    df = pd.read_csv(filepath_or_buffer="{}/{}".format(fold_data, file))
    values = df.values
    start_v = values[0]
    groups = []
    group = []
    for i in range(1, len(values)):
        start_p = np.array([start_v[0], start_v[1], start_v[2]])
        start_s = np.array([start_v[3]])
        group.append((start_p*scale).astype(int))
        end_v = values[i]
        end_p = np.array([end_v[0], end_v[1], end_v[2]])
        end_s = np.array([end_v[3]])
        if get_cell(start_p) == get_cell(end_p) and start_s == end_s:
            start_v = end_v
        else:
            start_v = end_v
            group.append((end_p*scale).astype(int))
            if depth_tree == 0:
                groups.append({"indexing_key": 0, "state": int(start_s[0]), "geometry": get_linestring(group)})
            else:
                groups.append({"indexing_key": encode_morton(*get_cell(start_p), depth_tree), "state": int(start_s[0]), "geometry": get_linestring(group)})
            group = []
    if len(group) >= 2:
        if depth_tree == 0:
            groups.append({"indexing_key": 0, "state": int(start_s[0]), "geometry": get_linestring(group)})
        else:
            groups.append({"indexing_key": encode_morton(*get_cell(start_p), depth_tree), "state": int(start_s[0]), "geometry": get_linestring(group)})
    new_df = pd.DataFrame(groups, columns=["indexing_key", "state", "geometry"])
    new_df.to_csv("{}/{}".format(fold_data, file), mode='w', index=False)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [63]:
""" Parameters """
with open("{}/para_{}.json".format(fold_para, depth_tree), "w") as json_file:
    json.dump(para, json_file, default=json_numpy_serializer)