In [1]:
import pandas as pd
from pathlib import Path
import geopandas as gpd
from shapely import wkb

csv_path = Path(r"D:\Siyu Zhao\data\Auckland region park\waitakere_trajectories.csv")


df = pd.read_csv(
    csv_path,
    sep=",",            
    header=0,
    dtype={
        "hashed_id": "string",
        "lat": "float64",          
        "lon": "float64",          
        "time": "int64",           
        "polygon_name": "category",
        "geom": "string"
    }
)

# 3. change unix_timestamp to datetime
df["datetime"] = pd.to_datetime(df["time"], unit="s", utc=True) 
df["datetime"] = df["datetime"].dt.tz_convert("Pacific/Auckland")  # Convert to Auckland timezone
df["timestamp"] = df["datetime"].apply(lambda x: x.timestamp())    

# 4. Convert the WKB geometry column to a GeoDataFrame
df["geometry"] = df["geom"].apply(lambda x: wkb.loads(bytes.fromhex(x))) # Convert WKB hex string to Shapely geometry


# # 5. print
# print(df.head())

In [2]:
#  Convert each user's trajectory into a NumPy array of shape (N, 3) with columns
data = [
    group.sort_values("time")[["lat", "lon", "time"]].to_numpy()
    for _, group in df.groupby("hashed_id")
]

# print(data)

In [3]:
from infostop import Infostop
import numpy as np

model = Infostop(
    r1=50,                            # Maximum distance to stay in the same place (for a stop)
    r2=50,                            # Maximum distance to group stops into one destination
    min_staying_time= 10 * 60,        # The minimum time a person must stay within a small area to be considered a stop.最短停留时间（小于就不是停留点）
    max_time_between= 24 * 60 * 60    # 24h The maximum time allowed between two nearby points to still count as the same stop. 最大停留时间（超过就分成多个停留）
)

labels = model.fit_predict(data)

# Flatten the list of arrays returned by Infostop and assign as 'destination_id'
# -1 means dynamic (not a stop), non-negative integers are stop location IDs
df_result = df.copy()
df_result["destination_id"] = np.concatenate(labels)


gdf_result = gpd.GeoDataFrame(df_result, geometry="geometry", crs="EPSG:4326") # Set the coordinate reference system to WGS 84


# print(gdf_result.head())


In [21]:

output_path = r"D:\Siyu Zhao\data\Auckland region park\infostop.csv"
gdf_result.to_csv(output_path, index=False)
