In [None]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import numpy as np
import glob

In [None]:
# 1 - Determine the list of TSVs to convert to a dataframe.
path_logs = './data_simulator/'
files = sorted(
    glob.glob(f"{path_logs}AgentStateTable-*.tsv"),
    key=lambda fn: int(fn.split("AgentStateTable-")[1].split(".tsv")[0])
)
files

In [None]:
# 2 - Parse the TSVs: retrieve the positions of the agents.
list_df = []
for f in files :
    print(f'Processing file {f}...')
    
    df = pd.read_csv(
        f,
        sep="\t",
        usecols=[1, 2, 3],
        dtype={2: str, 3: np.uint32}
    )
    # Assign names to the columns from the TSVs.
    df.columns = ["timestamp", "geometry", "ID"]

    # Convert the dates.
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Convert the WKT POINT strings into actual geometry objects
    df["geometry"] = df["geometry"].apply(wkt.loads)

    # Append to list.
    list_df.append(df)


# Concatenate everything.
df = pd.concat(list_df)
del list_df

In [None]:
# 3 - Create a GeoDataFrame with the CRS initially set to the one used by the authors of the simulator "Patterns of Life" 
#     for all the maps, i.e., EPSG:26916.
original_crs = "EPSG:26916"
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs=original_crs)
del df

# Project the coordinates to WGS84.
gdf.to_crs(epsg=4326, inplace=True)
display(gdf)
display(gdf.info())

# Write the final geodataframe to disk.
gdf.to_parquet('./dataset_simulator_trajectories.parquet')