In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os
from datetime import datetime
import folium

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

%matplotlib inline

In [4]:
# Path to data
data_path = Path('../data/raw')

# Get list of users
users = sorted([d for d in os.listdir(data_path) if os.path.isdir(data_path / d)])
print(f"Total users: {len(users)}")
print(f"First 10 users: {users[:10]}")

Total users: 182
First 10 users: ['000', '001', '002', '003', '004', '005', '006', '007', '008', '009']


In [5]:
# Pick a user to explore
user_id = '000'
user_path = data_path / user_id / 'Trajectory'

# Get their trajectory files
trajectory_files = list(user_path.glob('*.plt'))
print(f"User {user_id} has {len(trajectory_files)} trajectory files")
print(f"Example file: {trajectory_files[0].name}")

User 000 has 171 trajectory files
Example file: 20081023025304.plt


In [6]:
# PLT file format (according to User Guide):
# Lines 1-6: header (skip)
# Line 7+: Latitude,Longitude,0,Altitude,Days,Date,Time

def read_plt_file(filepath):
    """Read a single PLT trajectory file"""
    df = pd.read_csv(
        filepath,
        skiprows=6,
        header=None,
        names=['lat', 'lon', 'zero', 'altitude', 'days', 'date', 'time']
    )
    
    # Combine date and time into datetime
    df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])

    # Add a column to add name of trajectory file
    df['filename'] = filepath.name

    # Add user name
    df['user'] = user_id
    
    # Drop unnecessary columns
    df = df[['user', 'lat', 'lon', 'altitude', 'datetime', 'filename']]
    
    return df

# Test it
sample_traj = read_plt_file(trajectory_files[0])
print(sample_traj.head())
print(f"\nShape: {sample_traj.shape}")
print(f"Time range: {sample_traj.datetime.min()} to {sample_traj.datetime.max()}")

  user        lat         lon  altitude            datetime  \
0  000  39.984702  116.318417       492 2008-10-23 02:53:04   
1  000  39.984683  116.318450       492 2008-10-23 02:53:10   
2  000  39.984686  116.318417       492 2008-10-23 02:53:15   
3  000  39.984688  116.318385       492 2008-10-23 02:53:20   
4  000  39.984655  116.318263       492 2008-10-23 02:53:25   

             filename  
0  20081023025304.plt  
1  20081023025304.plt  
2  20081023025304.plt  
3  20081023025304.plt  
4  20081023025304.plt  

Shape: (908, 6)
Time range: 2008-10-23 02:53:04 to 2008-10-23 11:11:12


In [7]:
# See full history for one user

all_traj_files = []

for traj_file in trajectory_files:
    traj = read_plt_file(traj_file)
    all_traj_files.append(traj)

user_000_df = pd.concat(all_traj_files, ignore_index=True)

print(user_000_df.head)



<bound method NDFrame.head of        user        lat         lon  altitude            datetime  \
0       000  39.984702  116.318417       492 2008-10-23 02:53:04   
1       000  39.984683  116.318450       492 2008-10-23 02:53:10   
2       000  39.984686  116.318417       492 2008-10-23 02:53:15   
3       000  39.984688  116.318385       492 2008-10-23 02:53:20   
4       000  39.984655  116.318263       492 2008-10-23 02:53:25   
...     ...        ...         ...       ...                 ...   
173865  000  40.000403  116.327255       149 2009-07-05 07:44:55   
173866  000  40.000433  116.327209       150 2009-07-05 07:45:00   
173867  000  40.000443  116.327186       150 2009-07-05 07:45:05   
173868  000  40.000522  116.327132       149 2009-07-05 07:45:10   
173869  000  40.000543  116.327148       150 2009-07-05 07:45:15   

                  filename  
0       20081023025304.plt  
1       20081023025304.plt  
2       20081023025304.plt  
3       20081023025304.plt  
4       

In [8]:
print(user_000_df.isnull().sum().sum())

0


In [18]:

# Calculate center
center_lat = user_000_df['lat'].mean()
center_lon = user_000_df['lon'].mean()

# Create map
m = folium.Map(location=[center_lat, center_lon], zoom_start=11, tiles='CartoDB positron')



coordinates = zip(user_000_df['lat'], user_000_df['lon'])

folium.PolyLine(coordinates).add_to(m)

m.save('../output/user_000_map.html') # Display the map