In [1]:
# 查询卡口数据量小时变化和相邻卡口的行车时间间隔分布
# 2025/01/19 by chenxinyi

import copy
import pandas as pd
import numpy as np
import time
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import warnings

# 关闭警告提醒
warnings.filterwarnings("ignore")
# 显示所有列
pd.set_option('display.max_columns', None)

In [2]:
base_path = r'D:\24216373郑金涛中微观研究'
# 读取预处理后的数据
df = pd.read_csv(base_path + r'\center_valid_df.csv', encoding='gbk')
df['GCSJ'] = pd.to_datetime(df["GCSJ"])    # 时间格式转换
df['KKBH'] = df["KKBH"].astype("str")      # 卡口编号格式转换

# 提取客车和货车数据
cars_df = df[df['CLLX'].isin(['K1','K2','K3','K4'])]
cars_df.reset_index(drop=True, inplace=True)
trucks_df = df[df['CLLX'].isin(['H1','H2','H3','H4'])]
trucks_df.reset_index(drop=True, inplace=True)

In [3]:
# 检查是否所有时间精确到秒（即毫秒和微秒均为0）
df['has_ms'] = (df['GCSJ'].dt.microsecond != 0) | (df['GCSJ'].dt.second != df['GCSJ'].dt.floor('S').dt.second)

# 输出不精确的样本行（最多展示前10条）
unprecise_rows = df[df['has_ms']].head(10)

if len(unprecise_rows) > 0:
    print("以下是时间不精确到秒的数据示例：")
    print(unprecise_rows[['GCSJ']])
else:
    print("所有时间都已精确到秒。")


# 查询小时ALPR记录数据量

In [3]:
def static_hour_volume(data):
    data['HOUR'] = data['GCSJ'].dt.hour  # 返回小时
    hour_statics = data[['GCRQ', 'HOUR']].groupby(['HOUR']).count()
    hour_statics.reset_index(drop=False, inplace=True)
    hour_statics.rename(columns={'GCRQ': 'count'}, inplace=True)
    hour_statics['perc'] = hour_statics['count'] / sum(hour_statics['count'])
    return hour_statics

cars_volume = static_hour_volume(cars_df)
trucks_volume = static_hour_volume(trucks_df)

# 保存数据
volums_result = {'hour': cars_volume['HOUR'], 'car_count': cars_volume['count'], 'car_perc': cars_volume['perc'],
                 'truck_count': trucks_volume['count'], 'truck_perc': trucks_volume['perc']}
volums_result = pd.DataFrame(volums_result)
volums_result.to_excel(base_path +r'\卡口数据统计结果\中心城区小时卡口数据量.xlsx')

In [4]:
volums_result

Unnamed: 0,hour,car_count,car_perc,truck_count,truck_perc
0,0,283197,0.012365,9283,0.011877
1,1,176926,0.007725,8016,0.010256
2,2,118653,0.005181,8033,0.010278
3,3,84765,0.003701,8958,0.011461
4,4,73018,0.003188,9792,0.012529
5,5,96889,0.00423,15469,0.019792
6,6,310206,0.013545,18816,0.024074
7,7,1508025,0.065845,25061,0.032065
8,8,1755419,0.076647,36204,0.046322
9,9,1455581,0.063555,61800,0.079071


# 查询相邻卡口行车时间间隔的分布

In [5]:
def static_travel_time(data):

    # 统计所有间隔时间
    nb = []
    tt = []
    for i in range(0, len(data) - 1):
        if data.loc[i, 'CLTMBH'] == data.loc[i + 1, 'CLTMBH'] and data.loc[i, 'GCRQ'] == data.loc[i + 1, 'GCRQ']:
            nb.append(data.loc[i, 'KKBH'] + '-' + data.loc[i + 1, 'KKBH'])
            tt.append((data.loc[i + 1, 'GCSJ'] - data.loc[i, 'GCSJ']).total_seconds())

    TravelTime = pd.DataFrame({'KKOD': nb, 'time': tt})
# 
#     # 统计间隔时间分布频率
# 
#     print('最小间隔(s):', min(TravelTime['time']))
#     print('最大间隔(s):', max(TravelTime['time']))
#     print('平均间隔(s):', TravelTime['time'].mean())
#     print('中位数间隔(s):', TravelTime['time'].median())
#     # print('其他参数:')
#     # print(TravelTime['time'].describe())
# 
#     interval = [0, 60, 300, 900, 1800, 3600, 28800, 43200, 86400]  # 时间间隔
#     label = [60, 300, 900, 1800, 3600, 28800, 43200, 86400]
#     label_hour = ['1m', '5m', '15m', '30m', '1h', '8h', '12h', '24h']
#     a = pd.cut(TravelTime['time'], interval, labels=label)  # 区间左开右闭
#     counts = pd.value_counts(a)
#     counts = pd.DataFrame(counts)
#     counts.reset_index(drop=False, inplace=True)
#     counts.rename(columns={'index': 'travel_time_s', 'time': 'count'}, inplace=True)
#     counts.sort_values(by='travel_time_s', ascending=True, inplace=True)  # 按照从小到大排序
#     counts.reset_index(drop=True, inplace=True)
#     counts['perc'] = counts['count'] / sum(counts['count'])
#     
#     return counts
# def static_travel_time(data):
#     # 统计所有间隔时间
#     nb = []
#     tt = []
#     for i in range(0, len(data) - 1):
#         if data.loc[i, 'CLTMBH'] == data.loc[i + 1, 'CLTMBH'] and data.loc[i, 'GCRQ'] == data.loc[i + 1, 'GCRQ']:
#             nb.append(data.loc[i, 'KKBH'] + '-' + data.loc[i + 1, 'KKBH'])
#             tt.append((data.loc[i + 1, 'GCSJ'] - data.loc[i, 'GCSJ']).total_seconds())
# 
#     TravelTime = pd.DataFrame({'KKOD': nb, 'time': tt})
#     print(TravelTime)
    # 统计间隔时间分布频率
    print('最小间隔(s):', min(TravelTime['time']))
    print('最大间隔(s):', max(TravelTime['time']))
    print('平均间隔(s):', TravelTime['time'].mean())
    print('中位数间隔(s):', TravelTime['time'].median())

    interval = [0, 60, 300, 900, 1800, 3600, 28800, 43200, 86400]  # 时间间隔
    label = [60, 300, 900, 1800, 3600, 28800, 43200, 86400]
    label_hour = ['1m', '5m', '15m', '30m', '1h', '8h', '12h', '24h']

    a = pd.cut(TravelTime['time'], interval, labels=label_hour)  # 使用 label_hour 更直观
    counts = a.value_counts().sort_index()
    counts = counts.reset_index()
    counts.columns = ['travel_time_label', 'count']
    counts['perc'] = counts['count'] / counts['count'].sum()

    return counts

In [6]:
print('客车：')
cars_traveltime = static_travel_time(cars_df)
print('\n货车：')
trucks_traveltime = static_travel_time(trucks_df)

客车：
最小间隔(s): 1.0
最大间隔(s): 85623.0
平均间隔(s): 3007.3915516444313
中位数间隔(s): 188.0

货车：
最小间隔(s): 1.0
最大间隔(s): 85626.0
平均间隔(s): 3394.6041388584026
中位数间隔(s): 315.0


In [None]:
# 保存数据
traveltime_result = pd.DataFrame({
    'travel_time_label': cars_traveltime['travel_time_label'],
    'car_count': cars_traveltime['count'],
    'car_perc': cars_traveltime['perc'],
    'truck_count': trucks_traveltime['count'],
    'truck_perc': trucks_traveltime['perc']
})

# 保存为 Excel 文件
traveltime_result.to_excel('D:/24216373郑金涛中微观研究/卡口数据统计结果/中心城区原始相邻卡口间隔时间分布.xlsx', index=False)


In [None]:
traveltime_result