In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
n_lst = [10,20,40,60,80,100,150,200]
num_of_cells = 8000

gaps_between_traffic_lights = 1000
num_of_traffic_lights = num_of_cells // gaps_between_traffic_lights

gaps_between_digs = 100
num_of_digs = num_of_cells // gaps_between_digs

min_dist = 100
group_dist = 100

for n in tqdm(n_lst):
    df = pd.read_csv(f"data/3_VehicleData_2024112112250124_{n}.csv",encoding='utf-8')
    df['Travel Time'] = df['Arrival Time'] - df['In Time'] 
    df = df[df['Travel Time']>0].reset_index(drop=True)

    # 增加日期
    print("增加日期")
    df['Date'] = df['In Time'] // 57600   


    # 将InTime修改为当日的数据
    print("修改时间")
    df['Actual In Time'] = df['In Time'] % 57600  
    df['Actual Arrival Time'] = df['Arrival Time'] % 57600


    # 增加小时
    print("增加小时")
    df['Hour'] = (df['Actual In Time'] // 57600).clip(upper=23)
    df['Quarter'] = (df['Actual In Time'] - df['Date'] * 57600) // 600


    # 是否处于早高峰/晚高峰
    print("增加早高峰/晚高峰")
    df['Early Commute'] = ((df['Hour'] >= 7) & (df['Hour'] <= 10)).astype(int)
    df['Late Commute'] = ((df['Hour'] >= 17) & (df['Hour'] <= 20)).astype(int) 


    # 增加位置
    print("增加位置")
    df['O Route'] = np.where(
                                     df['Origin'] == 0,
                                     num_of_traffic_lights - 1,
                                     df['Origin'] // gaps_between_traffic_lights
                                 )
    df['D Route'] = np.where(
                                     df['Destination'] == 0,
                                     num_of_traffic_lights - 1,
                                     df['Destination'] // gaps_between_traffic_lights
                                 )

    df['OD Route'] = df.apply(lambda row: str(row['O Route'])+'_'+str(row['D Route']),axis=1)

    df['O Dig'] = np.where(
                                     df['Origin'] == 0,
                                     num_of_digs - 1,
                                     df['Origin'] // gaps_between_digs
                                 )
    df['D Dig'] = np.where(
                                     df['Destination'] == 0,
                                     num_of_digs - 1,
                                     df['Destination'] // gaps_between_digs
                                 )
    
    df['OD Dig'] = df.apply(lambda row: str(row['O Dig'])+'_'+str(row['D Dig']),axis=1)


    # 增加OD间的距离
    print("增加OD间的距离")
    df['Distance'] = df['Destination'].sub(df['Origin']).where(df['Origin'] < df['Destination'], num_of_cells - df['Origin'] + df['Destination'])

    # 在min_dist内的OD pair不被统计
    print("删除极短距离行程")
    df = df[df['Distance'] > min_dist].reset_index(drop=True)
    
    # 为距离增加分组信息
    print("为距离增加分组信息")
    df['Distance Dig'] = df['Distance'] // group_dist

    # 增加红绿灯信息
    print("增加红绿灯信息")
    df['Traffic Light'] = 0
    for i in range(num_of_traffic_lights):
        if i == num_of_traffic_lights - 1:
            df['Traffic Light ' + str(i)] = (df['D Route'] < df['O Route']).astype(int)
        else:
            df['Traffic Light ' + str(i)] = ((df['O Route'] == i) & (df['O Route'] != df['D Route'])).astype(int)
        df['Traffic Light'] += df['Traffic Light ' + str(i)]

    
    df.to_csv(f"Feature/VehicleData_n{n}.csv",encoding='utf-8',index=None)

  0%|          | 0/8 [00:00<?, ?it/s]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


 12%|█▎        | 1/8 [00:43<05:06, 43.73s/it]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


 25%|██▌       | 2/8 [01:27<04:23, 43.91s/it]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


 38%|███▊      | 3/8 [02:12<03:40, 44.17s/it]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


 50%|█████     | 4/8 [02:57<02:58, 44.53s/it]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


 62%|██████▎   | 5/8 [03:42<02:14, 44.93s/it]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


 75%|███████▌  | 6/8 [04:29<01:31, 45.58s/it]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


 88%|████████▊ | 7/8 [05:18<00:46, 46.55s/it]

增加日期
修改时间
增加小时
增加早高峰/晚高峰
增加位置
增加OD间的距离
删除极短距离行程
为距离增加分组信息
增加红绿灯信息


100%|██████████| 8/8 [06:08<00:00, 46.09s/it]


In [None]:
df

In [None]:
group = df[df['Date'].between(2,3)].groupby(['OD Dig'])['Travel Time'].agg([('odh_mean','mean'),('odh_std','std'),('odh_count','count')]).reset_index()
group