In [64]:
import pandas as pd
import transbigdata as tbd
import geopandas as gpd
import numpy as np
from tqdm import tqdm  # 显示循环进度
import time

In [65]:
# 显示设置
pd.set_option('display.max_columns', None)
# 显示宽度 pd.set_option('display.width', 1000)

In [66]:
# 数据路径
base_path = r'd:/Geo_python/Geo_data/2024_07_04/'
charging_stations = pd.read_csv('d:/python/jupyter/Geo/SZ_data.csv')[['lon', 'lat']]  # 充电站数据

In [67]:
##### 1. 数据读取
# 基础路径
base_path = r'd:/Geo_python/Geo_data/2024_07_04/'
# 充电站数据
station = pd.read_csv(r'SZ_data.csv')[['lon', 'lat']]
# 深圳地理范围
sz_bounds = [113.75, 22.4, 114.62, 22.86]  # [minlon, minlat, maxlon, maxlat]
shp = r'd:/Geo_python/pygeo-tutorial-master/shapefile/sz.shp'
sz = gpd.GeoDataFrame.from_file(shp, encoding='utf-8')

In [68]:
# 空列表 存储识别出的充电事件
charging_events = []

In [69]:
##### 原打算利用transbigdata栅格化停车识别，再匹配临近位置充电站。但不是根据停车时间和行驶距离判定停车

In [70]:
start = time.perf_counter()  # 开始时间
# 循环读取每个出租车文件（共288个）
for i in tqdm(range(1, 21), desc='处理出租车数据'):
    # 读取文件
    filename = f"{i:03d}.txt"
    full_path = base_path + filename
    taxi = pd.read_csv(full_path, header=None)
    
    ###### 1.1 数据格式处理
    # 列名
    taxi.columns = ['date','time','H','id','lon_taxi','lat_taxi','velocity','heading','passenger_on_off','shift_type','unnamed']
    taxi = taxi.drop(['H','heading','shift_type','unnamed'], axis=1)
    
    # 修改时间格式（datetime）
    taxi['date'] = taxi['date'].astype(str).str.zfill(8)  # 确保8位日期
    taxi['date'] = taxi['date'].str.slice(0,4) + '-' + taxi['date'].str.slice(4,6) + '-' + taxi['date'].str.slice(6,8)
    taxi['time'] = taxi['time'].astype(str).str.zfill(6)  # 确保6位时间
    taxi['time'] = taxi['time'].str.slice(0,2) + ':' + taxi['time'].str.slice(2,4) + ':' + taxi['time'].str.slice(4,6)
    taxi['datetime'] = pd.to_datetime(taxi['date'] + ' ' + taxi['time'])
    taxi = taxi.drop(['date', 'time'], axis=1)
    
    # 按ID和时间列排序
    taxi = taxi.sort_values(by=['id', 'datetime']).reset_index(drop=True)
    
    ###### 1.2 异常数据剔除
    taxi['pre_id'] = taxi['id'].shift(1)
    taxi['next_id'] = taxi['id'].shift(-1)
    taxi['pre_passenger'] = taxi['passenger_on_off'].shift(1)
    taxi['next_passenger'] = taxi['passenger_on_off'].shift(-1)
    # 剔除异常状态点  前后的载客状态相同但和当前不同，且ID一致
    taxi = taxi[-((taxi['id'] == taxi['pre_id']) & (taxi['id'] == taxi['next_id']) &
                 (taxi['pre_passenger'] == taxi['next_passenger']) & (taxi['passenger_on_off'] != taxi['pre_passenger']))]
    taxi = taxi.drop(['pre_id', 'next_id', 'pre_passenger', 'next_passenger'], axis=1)
    
    ###### 1.3 筛选出未载客空车状态
    taxi_off = taxi[taxi['passenger_on_off'] == 0].drop('passenger_on_off', axis=1).copy()
    
    ##### 2. 识别充电事件（GPS在线停车 / GPS离线）
    # 按出租车ID分组处理
    for tid, group in taxi_off.groupby('id'):
        group = group.sort_values('datetime').reset_index(drop=True)
        n = len(group)
        if n < 2:    # 数据点太少，无法判断
            continue  
        
        ###### 2.1 GPS在线时的停车事件识别
        # 计算相邻点之间的时间差（秒）、距离（米）
        group['time_diff'] = group['datetime'].diff().dt.total_seconds().fillna(0)
        group['distance'] = tbd.getdistance(group['lon_taxi'], group['lat_taxi'],
                                            group['lon_taxi'].shift(1), group['lat_taxi'].shift(1)
                                           ).fillna(0)
        
        # 累积计算位移和持续时间，识别停车段
        group['cum_distance'] = 0.0
        group['cum_duration'] = 0.0
        current_segment = 0  # 标记当前停车段ID
        # 初始化数组
        n = len(group)
        cum_distance_arr = group['distance'].values.copy()
        cum_duration_arr = group['time_diff'].values.copy()
        segment_id_arr = np.zeros(n, dtype=int)
        
        # 处理第一个点
        current_segment = 0
        segment_id_arr[0] = current_segment
        # 迭代处理
        for i in range(1, n):
            total_distance = cum_distance_arr[i-1] + group['distance'].iloc[i]
            if total_distance <= 1000:
                cum_distance_arr[i] = total_distance
                cum_duration_arr[i] = cum_duration_arr[i-1] + group['time_diff'].iloc[i]
                segment_id_arr[i] = current_segment
            else:
                current_segment += 1
                cum_distance_arr[i] = group['distance'].iloc[i]
                cum_duration_arr[i] = group['time_diff'].iloc[i]
                segment_id_arr[i] = current_segment
        
        # 赋值回DataFrame
        group['cum_distance'] = cum_distance_arr
        group['cum_duration'] = cum_duration_arr
        group['segment_id'] = segment_id_arr
                
        # 筛选符合条件的有效停车段（停车持续时间≥10分钟，位移≤1000米）  聚合
        valid_segments = group.groupby('segment_id').agg({'datetime': ['min', 'max'],  # 停车开始/结束时间
                                                          'lon_taxi': 'mean',          # 停车平均位置
                                                          'lat_taxi': 'mean',
                                                          'cum_distance': 'max',       # 总位移
                                                          'cum_duration': 'max'        # 总时长
                                                         }).reset_index()
        valid_segments.columns = ['segment_id', 'start_time', 'end_time', 'avg_lon', 'avg_lat', 'total_distance', 'total_duration']
        # 条件（10分钟=600秒）
        valid_stops = valid_segments[(valid_segments['total_distance'] <= 1000) & (valid_segments['total_duration'] >= 600)  ]
        
        # 是否靠近充电站（≤200米）
        for _, stop in valid_stops.iterrows():
            # 计算停车位置到最近充电站的距离
            min_dist_to_cs = np.min(tbd.getdistance(np.full(len(station), stop['avg_lon']),
                                                    np.full(len(station), stop['avg_lat']),
                                                    station['lon'], station['lat']))
            if min_dist_to_cs <= 200:
                # 符合条件则记录为充电事件
                charging_events.append({   'id': tid,
                                           'start_time': stop['start_time'],
                                           'end_time': stop['end_time'],
                                           'lon': stop['avg_lon'],
                                           'lat': stop['avg_lat'],
                                           'duration': stop['total_duration'],
                                           'type': 'online'  # GPS在线时充电
                                       })
        
        ###### 2.2 GPS离线时的充电事件识别
        # GPS离线时段（相邻记录时间差>10分钟）
        offline_threshold = 600  # 10分钟=600秒
        group['offline_flag'] = group['time_diff'] > offline_threshold
        offline_indices = group[group['offline_flag']].index
        
        for idx in offline_indices:
            # 离线前的最后位置和离线后的第一个位置
            off_loc = group.loc[idx-1]  # 离线前
            on_loc = group.loc[idx]     # 离线后
            offline_duration = group.loc[idx, 'time_diff']
            
            # 条件1：离线时长>10分钟
            if offline_duration <= 600:
                continue
            
            # 条件2：离线前后位移≤1000米
            displacement = tbd.getdistance(off_loc['lon_taxi'], off_loc['lat_taxi'],
                                           on_loc['lon_taxi'], on_loc['lat_taxi']
                                          )
            if displacement > 1000:
                continue
            
            # 条件3：离线前或离线后位置靠近充电站（≤200米）
            dist_off_to_cs = np.min(tbd.getdistance(np.full(len(station), off_loc['lon_taxi']),
                                                    np.full(len(station), off_loc['lat_taxi']),
                                                    station['lon'], station['lat']
                                                   ))
            dist_on_to_cs = np.min(tbd.getdistance(np.full(len(station), on_loc['lon_taxi']),
                                                   np.full(len(station), on_loc['lat_taxi']),
                                                   station['lon'], station['lat']
                                                  ))
            
            if dist_off_to_cs <= 200 or dist_on_to_cs <= 200:
                # 符合条件则记录为充电事件
                charging_events.append({
                    'id': tid,
                    'start_time': off_loc['datetime'],  # 离线开始时间
                    'end_time': on_loc['datetime'],      # 离线结束时间
                    'lon': (off_loc['lon_taxi'] + on_loc['lon_taxi']) / 2,  # 平均位置
                    'lat': (off_loc['lat_taxi'] + on_loc['lat_taxi']) / 2,
                    'duration': offline_duration,
                    'type': 'offline'  # GPS离线时的充电
                    })


end = time.perf_counter()    # 结束时间
print(f"运行时间: {end - start:.6f} 秒")

处理出租车数据: 100%|██████████| 20/20 [15:34<00:00, 46.74s/it]

运行时间: 934.709527 秒





In [71]:
#运行时间过慢 ，1个txt约1.5分钟
#之前结果1k多，现在大概2k多，仍少

In [76]:
##### 3. 结果
charging_events = pd.DataFrame(charging_events)
#最近充电站的ID
def get_nearest_cs(lon, lat):
    dists = tbd.getdistance(np.full(len(station), lon), np.full(len(station), lat),
                           station['lon'], station['lat'])
    return station.iloc[np.argmin(dists)].name  # 返回最近充电站的索引

if not charging_events.empty:
    charging_events['nearest_cs_id'] = charging_events.apply(
        lambda row: get_nearest_cs(row['lon'], row['lat']), axis=1
    )
    # 按时间排序
    charging_events = charging_events.sort_values(['id', 'start_time']).reset_index(drop=True)

print("充电事件数量：", len(charging_events))
charging_events

充电事件数量： 144


Unnamed: 0,id,start_time,end_time,lon,lat,duration,type,nearest_cs_id
0,粤BA66867,2024-07-03 23:51:52,2024-07-04 00:09:02,114.116059,22.561815,1030.0,online,1160
1,粤BA66867,2024-07-03 23:51:52,2024-07-04 00:07:32,114.115993,22.562127,940.0,offline,1160
2,粤BA71737,2024-07-04 00:00:16,2024-07-04 00:21:14,114.099131,22.566095,1258.0,offline,1669
3,粤BA72179,2024-07-04 00:01:30,2024-07-04 00:13:34,114.044117,22.548836,724.0,offline,627
4,粤BA75817,2024-07-04 00:18:13,2024-07-04 00:46:34,114.123418,22.542822,1701.0,offline,748
...,...,...,...,...,...,...,...,...
139,粤BDM3285,2024-07-03 20:22:52,2024-07-04 01:02:07,114.157090,22.608556,16755.0,offline,1226
140,粤BDR8931,2024-07-03 23:55:23,2024-07-04 01:22:21,114.100214,22.608336,5218.0,online,1405
141,粤BDR8931,2024-07-03 23:55:23,2024-07-04 01:20:47,114.100206,22.608328,5124.0,offline,1405
142,粤BDU6593,2024-07-03 19:41:17,2024-07-04 00:54:07,114.105018,22.616308,18770.0,online,524
