In [32]:
import pandas as pd
import os

In [None]:
def load_geolife_plt(filepath: str) -> pd.DataFrame:

    df = pd.read_csv(filepath, skiprows=6, header=None)
    df.columns = ["lat", "lon", "unused", "alt", "timestamp", "date", "time"]
    return df[["lat", "lon", "alt", "date", "time"]]

In [None]:
def create_all_trajectories_df_id_trajectory(base_path: str, user_ids: list) -> pd.DataFrame:

    all_trajectories = []
    for user_id in user_ids:
        user_path = f"{base_path}/{user_id}/Trajectory/"
        trajectory_files = sorted(os.listdir(user_path))  # ordena por consistencia

        # Contador de trayectorias para este usuario
        trajectory_number = 1

        for traj_file in trajectory_files:
            traj_df = load_geolife_plt(f"{user_path}/{traj_file}")
            traj_df["user_id"] = user_id
            traj_df["trajectory_number"] = trajectory_number  #
            traj_df["trajectory_file"] = traj_file  
            all_trajectories.append(traj_df)

            trajectory_number += 1  

    return pd.concat(all_trajectories, ignore_index=True)


In [3]:
df = load_geolife_plt("../data/019/Trajectory/20081214092748.plt")

In [4]:
df.head()

Unnamed: 0,lat,lon,alt,date,time
0,39.902433,116.44372,164.0,2008-12-14,09:27:48
1,39.902433,116.443497,164.0,2008-12-14,09:27:49
2,39.902428,116.443273,164.0,2008-12-14,09:27:50
3,39.902432,116.443043,164.0,2008-12-14,09:27:51
4,39.902427,116.442817,164.0,2008-12-14,09:27:52


In [35]:
def get_geohash(lat, lon, precision=7):
    base32 = "0123456789bcdefghjkmnpqrstuvwxyz"
    bits = []
    min_lat, max_lat = -90.0, 90.0
    min_lon, max_lon = -180.0, 180.0
    for ii in range(5 * precision):
        if ii % 2 == 0:  # longitud
            mid = (min_lon + max_lon) / 2
            if lon >= mid:
                bits.append('1')
                min_lon = mid
            else:
                bits.append('0')
                max_lon = mid
        else:  # latitud
            mid = (min_lat + max_lat) / 2
            if lat >= mid:
                bits.append('1')
                min_lat = mid
            else:
                bits.append('0')
                max_lat = mid
    bitstr = "".join(bits)
    quints = [bitstr[i * 5:(i + 1) * 5] for i in range(precision)]
    indices = [int(q, 2) for q in quints]
    geohash = "".join(base32[idx] for idx in indices)
    bbound = (min_lat, max_lat, min_lon, max_lon)
    return geohash, bbound

In [36]:
def trajectory_to_geohashes(filepath: str, precision: int = 7):
    df = load_geolife_plt(filepath)
    df["geohash"] = df.apply(lambda r: get_geohash(r.lat, r.lon, precision), axis=1)

    # Conjunto Ãºnico de celdas
    unique_cells = set(df["geohash"].tolist())

    return {
        "celdas_visitadas": unique_cells,
        "n_celdas": len(unique_cells)
    }

In [37]:
trajectory_to_geohashes("../data/020/Trajectory/20110825143825.plt", precision=8)

{'celdas_visitadas': {('wx4eqt0s',
   (39.97444152832031,
    39.974613189697266,
    116.30195617675781,
    116.30229949951172)),
  ('wx4eqt0t',
   (39.974613189697266,
    39.97478485107422,
    116.30195617675781,
    116.30229949951172)),
  ('wx4eqt0u',
   (39.97444152832031,
    39.974613189697266,
    116.30229949951172,
    116.30264282226562)),
  ('wx4eqt0v',
   (39.974613189697266,
    39.97478485107422,
    116.30229949951172,
    116.30264282226562)),
  ('wx4eqt0w',
   (39.97478485107422,
    39.97495651245117,
    116.30195617675781,
    116.30229949951172)),
  ('wx4eqt0x',
   (39.97495651245117,
    39.975128173828125,
    116.30195617675781,
    116.30229949951172)),
  ('wx4eqt0y',
   (39.97478485107422,
    39.97495651245117,
    116.30229949951172,
    116.30264282226562)),
  ('wx4eqt1h',
   (39.97444152832031,
    39.974613189697266,
    116.30264282226562,
    116.30298614501953)),
  ('wx4eqt1j',
   (39.974613189697266,
    39.97478485107422,
    116.30264282226562,


In [38]:
trajectory_to_geohashes("../data/020/Trajectory/20110826011926.plt", precision=8)

{'celdas_visitadas': {('wx4eqt1u',
   (39.97444152832031,
    39.974613189697266,
    116.30367279052734,
    116.30401611328125)),
  ('wx4eqt4h',
   (39.97444152832031,
    39.974613189697266,
    116.30401611328125,
    116.30435943603516)),
  ('wx4eqt4j',
   (39.974613189697266,
    39.97478485107422,
    116.30401611328125,
    116.30435943603516)),
  ('wx4eqt4k',
   (39.97444152832031,
    39.974613189697266,
    116.30435943603516,
    116.30470275878906)),
  ('wx4eqt4m',
   (39.974613189697266,
    39.97478485107422,
    116.30435943603516,
    116.30470275878906)),
  ('wx4eqt4s',
   (39.97444152832031,
    39.974613189697266,
    116.30470275878906,
    116.30504608154297)),
  ('wx4eqt4t',
   (39.974613189697266,
    39.97478485107422,
    116.30470275878906,
    116.30504608154297)),
  ('wx4eqt4u',
   (39.97444152832031,
    39.974613189697266,
    116.30504608154297,
    116.30538940429688)),
  ('wx4eqt4w',
   (39.97478485107422,
    39.97495651245117,
    116.30470275878906,