轨迹数据的读取与预处理

In [9]:
import datetime
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from tqdm import tqdm

# 参数设定
city = 'shenzhen'
GRID_SIZE = 50  # meter
target_dt = datetime.datetime(2018, 9, 6, 12, 0)

In [10]:
# 区域边界
if city == 'beijing':
    lon_l, lon_r, lat_b, lat_u = 115.43, 117.52, 39.44, 41.05
if city == 'shanghai':
    lon_l, lon_r, lat_b, lat_u = 120.836619, 122.165824, 30.631414, 31.7 #31.883925
if city == 'shenzhen':
    lon_l, lon_r, lat_b, lat_u = 113.70, 114.70, 22.45, 22.85
GRID_SIZE = 50  # Meter

# 经纬度步长计算
earth_radius = 6378137.0
pi = 3.1415926535897932384626
meter_per_degree = earth_radius * pi / 180.0
lat_step = GRID_SIZE * (1.0 / meter_per_degree)
ratio = np.cos((lat_b + lat_u) * np.pi / 360)
lon_step = lat_step / ratio
lat_split = int(np.ceil((lat_u - lat_b) / lat_step))
lon_split = int(np.ceil((lon_r - lon_l) / lon_step))

# 起始时间（小时偏移起点）
t0 = datetime.datetime.strptime('20180101 00:00:00', "%Y%m%d %H:%M:%S")

# 读取函数
def read_from_text(file):
    for line in file:
        yield line.strip('\r\n').split('\t')

# 映射回真实经纬度（用于可视化时反查）
def index_to_lon(x_idx):
    return lon_l + x_idx * lon_step

def index_to_lat(y_idx):
    return lat_b + y_idx * lat_step

In [None]:
# 人口热力计数器
user_move = {}
user_num = 0

# 读取并处理数据
with open(f'./record/record_{city}.txt', 'r') as file:
    for trace in tqdm(read_from_text(file), desc="Processing traces"):
        uid, trace = trace
        trace = trace.split('|')

        t_list = [int(trace[i]) for i in range(0, len(trace), 3)]
        x_idx_list = [float(trace[i]) for i in range(1, len(trace), 3)]
        y_idx_list = [float(trace[i]) for i in range(2, len(trace), 3)]

        n = 0
        for t, x_idx, y_idx in zip(t_list, x_idx_list, y_idx_list):
            dt = t0 + datetime.timedelta(hours=t)
            if dt == target_dt and (t + 2 in t_list):
                user_num += 1
                xi, yi = int(x_idx), int(y_idx)
                if n+2 < len(t_list) and t_list[n+2] == t + 2:
                    xi_2, yi_2 = int(x_idx_list[n+2]), int(y_idx_list[n+2])
                elif t_list[n+1] == t + 2:
                    xi_2, yi_2 = int(x_idx_list[n+1]), int(y_idx_list[n+1])

                if 0 <= xi < lon_split and 0 <= yi < lat_split and 0 <= xi_2 < lon_split and 0 <= yi_2 < lat_split:
                    user_move[uid] = (xi, yi, xi_2, yi_2)
                
                break
            
            n += 1

In [None]:
lon_split

In [None]:
import pickle

with open(f'./user_move/move_{city}_{target_dt.strftime("%Y%m%d_%H%M")}.pkl', 'wb') as f:
    pickle.dump(user_move, f)

print(len(user_move))