In [None]:
# 道路数据处理
import geopandas as gpd
import matplotlib.pyplot as plt

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 定义文件路径列表和对应的中文名称
file_paths = [
    './road2-12-9road/boundaryroad_with9road.geojson',
    './road2-12-9road/crosswalkroad_with9road.geojson',
    './road2-12-9road/laneroad_with9road.geojson',
    './road2-12-9road/signalroad_with9road.geojson',
    './road2-12-9road/stoplineroad_with9road.geojson'
]

file_names = [
    '边界道路',
    '人行横道路',
    '车道线',
    '信号灯道路',
    '停车线道路'
]

# 创建子图，设定布局为 1 行 5 列
fig, axes = plt.subplots(1, 5, figsize=(20, 4))  # 可以调整 figsize 来改变图形的整体大小

# 循环读取每个文件并绘制在对应的子图中
for i, (file_path, ax) in enumerate(zip(file_paths, axes)):
    # 读取 GeoJSON 文件
    geo_df = gpd.read_file(file_path)
    
    # 绘制地图到子图，强制设置 aspect='equal'
    geo_df.plot(ax=ax, aspect='equal')
    
    # 设置子图标题
    ax.set_title(f'{file_names[i]}')

# 调整布局以避免重叠
plt.tight_layout()

# 显示图形
plt.show()

In [None]:
#json转excel
import pandas as pd
import json
import os
from tqdm import tqdm

# 定义 type 值到中文名称的映射
type_to_chinese = {
    0: '未知',
    1: '小型车辆',
    2: '行人',
    3: '非机动车',
    4: '卡车',
    5: '厢式货车_面包车',
    6: '客车',
    7: '静态物体',
    8: '路牙',
    9: '锥桶',
    10: '手推车_三轮车',
    11: '信号灯',
    12: '门_阀门_闸机_出入口'
}

# 定义处理一块数据的函数
def process_chunk(chunk):
    chunk['position'] = chunk['position'].apply(json.loads)
    chunk['x_coord'] = chunk['position'].apply(lambda pos: pos.get('x', None))
    chunk['y_coord'] = chunk['position'].apply(lambda pos: pos.get('y', None))
    
    if 'velocity' in chunk.columns:
        chunk['speed'] = chunk['velocity']
    
    if 'type' in chunk.columns:
        chunk['vehicle_type'] = chunk['type']
    
    if 'orientation' in chunk.columns:
        chunk['orientation_angle'] = chunk['orientation']
    
    if 'heading' in chunk.columns:
        chunk['heading_angle'] = chunk['heading']
    
    if 'vehicle_type' in chunk.columns:
        chunk = chunk[chunk['vehicle_type'].isin(type_to_chinese.keys())].copy()
    
    if 'speed' in chunk.columns:
        def speed_status(speed):
            if speed == 0:
                return '静止'
            elif speed < 5:
                return '低速'
            else:
                return '高速'
        
        chunk.loc[:, 'speed_status'] = chunk['speed'].apply(speed_status)
    
    if 'orientation_angle' in chunk.columns:
        def direction_category(orientation):
            if -0.5 < orientation <= 0.5:
                return '东'
            elif 0.5 < orientation <= 1.5:
                return '北'
            elif -1.5 < orientation <= -0.5:
                return '南'
            else:
                return '西'
        
        chunk.loc[:, 'direction'] = chunk['orientation_angle'].apply(direction_category)
    
    return chunk

# 增量处理并按类型分块存储为Excel文件
def incremental_process_by_type_to_excel(file_path, output_dir, chunk_size=50000):
    os.makedirs(output_dir, exist_ok=True)

    with open(file_path, 'r') as f:
        temp_chunk = []
        
        for i, line in enumerate(tqdm(f, desc=f"Processing {file_path}")):
            data = json.loads(line)
            temp_chunk.append(data)

            if len(temp_chunk) >= chunk_size:
                df_chunk = pd.DataFrame(temp_chunk)
                processed_chunk = process_chunk(df_chunk)

                for vehicle_type, group_df in processed_chunk.groupby('vehicle_type'):
                    chinese_name = type_to_chinese[vehicle_type]
                    # 将每个数据块保存到单独的 Excel 文件
                    output_file = os.path.join(output_dir, f'{chinese_name}_part_{i // chunk_size}.xlsx')
                    
                    # 保存到 Excel 文件
                    group_df.to_excel(output_file, index=False)

                temp_chunk = []  # 清空临时块

        # 处理最后一块数据
        if temp_chunk:
            df_chunk = pd.DataFrame(temp_chunk)
            processed_chunk = process_chunk(df_chunk)

            for vehicle_type, group_df in processed_chunk.groupby('vehicle_type'):
                chinese_name = type_to_chinese[vehicle_type]
                output_file = os.path.join(output_dir, f'{chinese_name}_part_final.xlsx')
                group_df.to_excel(output_file, index=False)

# 批量处理多个 JSON 文件
def process_multiple_json_files(file_list, output_dir, chunk_size=50000):
    for file_path in file_list:
        incremental_process_by_type_to_excel(file_path, output_dir, chunk_size)

# 示例：批量处理多个 JSON 文件
json_files = [
    './part-00002-62742e18-8a7b-444f-91a8-41e5ebf9258f-c000.json',
]
output_dir = './数据处理'
process_multiple_json_files(json_files, output_dir, chunk_size=50000)

In [None]:
# 整合表格
import pandas as pd
from datetime import timedelta
import os

# 定义源文件夹和目标文件夹路径
source_folder = './数据处理/'
target_folder = './处理结果/'

# 如果目标文件夹不存在，创建它
os.makedirs(target_folder, exist_ok=True)

# 遍历源文件夹中的所有Excel文件
for filename in os.listdir(source_folder):
    if filename.endswith('.xlsx'):  # 只处理Excel文件
        file_path = os.path.join(source_folder, filename)
        
        # 读取每个Excel文件
        data_df = pd.read_excel(file_path)
        
        # 提取所需的列
        processed_data_df = data_df.loc[:, ['id', 'speed', 'x_coord', 'y_coord', 'type', 'time_meas', 'orientation_angle', 'heading_angle']]
        
        # 计算 'orientation_heading_diff' 列
        processed_data_df.loc[:, 'orientation_heading_diff'] = processed_data_df['orientation_angle'] - processed_data_df['heading_angle']
        
        # 删除原来的 'orientation_angle' 和 'heading_angle' 列
        processed_data_df = processed_data_df.drop(columns=['orientation_angle', 'heading_angle'])
        
        # 将 'time_meas' 列转换为日期时间格式，单位为微秒
        processed_data_df['time_meas'] = pd.to_datetime(processed_data_df['time_meas'], unit='us')
        
        # 初始化时间分组
        start_time = processed_data_df['time_meas'].min()
        end_time = processed_data_df['time_meas'].max()
        sheets_dict = {}
        current_interval_start = start_time

        # 按10分钟分组
        while current_interval_start <= end_time:
            current_interval_end = current_interval_start + timedelta(minutes=10)
            interval_data = processed_data_df[(processed_data_df['time_meas'] >= current_interval_start) & (processed_data_df['time_meas'] < current_interval_end)]
            
            if not interval_data.empty:
                sheet_name = f"{current_interval_start.strftime('%Y%m%d_%H%M')} - {current_interval_end.strftime('%H%M')}"
                sheets_dict[sheet_name] = interval_data
            
            current_interval_start = current_interval_end

        # 保存到新的Excel文件，每个10分钟区间为一个工作表
        new_filename = 'new_' + filename
        output_path = os.path.join(target_folder, new_filename)
        with pd.ExcelWriter(output_path) as writer:
            for sheet_name, data in sheets_dict.items():
                writer_sheet_name = sheet_name[:31]  # 确保工作表名称不超过31字符
                data.to_excel(writer, sheet_name=writer_sheet_name, index=False)

        print(f"处理完成: {new_filename}，保存在 {output_path}")


In [None]:
# 按时间划分合并
import pandas as pd
import os

# 定义包含所有处理后Excel文件的文件夹路径
source_folder = './处理结果/'
output_folder = './合并结果/'

# 如果输出文件夹不存在，创建它
os.makedirs(output_folder, exist_ok=True)

# 用于存储所有数据的字典，按Sheet名称分组
sheets_data = {}

# 遍历处理后的Excel文件
for filename in os.listdir(source_folder):
    if filename.endswith('.xlsx'):
        file_path = os.path.join(source_folder, filename)
        
        # 读取文件中的所有Sheet
        excel_data = pd.read_excel(file_path, sheet_name=None)
        
        # 遍历每个Sheet，将内容合并到sheets_data字典中
        for sheet_name, data in excel_data.items():
            if sheet_name not in sheets_data:
                sheets_data[sheet_name] = []  # 创建新的列表用于存储该Sheet的所有数据
            sheets_data[sheet_name].append(data)  # 将数据添加到相应的Sheet名称列表中

# 将合并后的数据按Sheet名称保存到新的Excel文件中
for sheet_name, data_list in sheets_data.items():
    # 将所有数据合并为一个DataFrame
    combined_data = pd.concat(data_list, ignore_index=True)
    
    # 保存每个合并后的Sheet为独立的Excel文件
    output_path = os.path.join(output_folder, f"{sheet_name}.xlsx")
    combined_data.to_excel(output_path, index=False)
    
    print(f"合并完成并保存: {output_path}")

In [3]:
#绘图
import os
import base64
from io import BytesIO
import pandas as pd
import geopandas as gpd
from pyecharts.charts import Bar, Scatter, Line, Grid
from pyecharts import options as opts
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import os
os.environ["OMP_NUM_THREADS"] = "1"  # 将线程数限制为 1，避免内存泄漏


# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 文件路径
geojson_file_path = './车辆驾驶画像可视分析/road2-12-9road/boundaryroad_with9road.geojson'
excel_files = ['./车辆驾驶画像可视分析/合并结果/20230413_0000 - 0010.xlsx', 
               './车辆驾驶画像可视分析/合并结果/20230413_0001 - 0011.xlsx',
              './车辆驾驶画像可视分析/合并结果/20230413_0003 - 0013.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0004 - 0014.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0010 - 0020.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0011 - 0021.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0013 - 0023.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0014 - 0024.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0020 - 0030.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0021 - 0031.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0023 - 0033.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0024 - 0034.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0030 - 0040.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0031 - 0041.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0033 - 0043.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0034 - 0044.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0041 - 0051.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0040 - 0050.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0043 - 0053.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0044 - 0054.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0050 - 0100.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0051 - 0101.xlsx',
               './车辆驾驶画像可视分析/合并结果/20230413_0053 - 0103.xlsx', 
               './车辆驾驶画像可视分析/合并结果/20230413_0054 - 0104.xlsx'
              ]

# 颜色配置
colors = ["blue", "orange", "green", "red"]

# 类型映射字典
type_mapping = {
    0: "未识别", 1: "小型车辆", 2: "行人",
    3: "非机动车", 4: "卡车", 5: "厢式货车",
    6: "客车", 7: "静态物体", 8: "路牙",
    9: "锥桶", 10: "手推车", 11: "信号灯",
    12: "闸机"
}

# 主HTML文件模板
main_html = """
<!DOCTYPE html>
<html>
<head>
    <title>Excel Analysis Selector</title>
    <style>
        body {{ font-family: Arial, sans-serif; }}
        .container {{ text-align: center; }}
        .chart-container {{ margin: auto; width: 90%; }}
        select {{ font-size: 16px; padding: 8px; }}
    </style>
</head>
<body>
<div class="container">
    <h1>选择时间片</h1>
    <select id="pageSelector" onchange="selectPage()">
        <option value="" disabled selected>选择一个待分析的时间段</option>
        {options}
    </select>
    <div class="chart-container" id="chartContainer">
        <!-- Placeholder for charts -->
    </div>
</div>
<script>
    function selectPage() {{
        let selector = document.getElementById("pageSelector");
        let selectedValue = selector.value;
        document.getElementById("chartContainer").innerHTML = `<iframe src="${{selectedValue}}" width="100%" height="1000px" frameborder="0"></iframe>`;
    }}
</script>
</body>
</html>
"""

# HTML页面选项
options_html = ""

# 循环处理每个 Excel 文件
for idx, excel_file_path in enumerate(excel_files):
    data = pd.read_excel(excel_file_path, sheet_name='Sheet1')
    file_name = os.path.basename(excel_file_path)

    # 生成聚类图并转换为Base64编码
    data_for_clustering = data[['speed', 'orientation_heading_diff']].dropna()
    n_clusters = 4
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    clusters = kmeans.fit_predict(data_for_clustering)
    data_for_clustering['cluster'] = clusters

    fig, ax = plt.subplots(figsize=(6, 4))
    for cluster_num in range(n_clusters):
        cluster_data = data_for_clustering[data_for_clustering['cluster'] == cluster_num]
        ax.scatter(cluster_data['speed'], cluster_data['orientation_heading_diff'], color=colors[cluster_num], label=f'簇 {cluster_num}', s=10)

    # 绘制聚类中心
    centroids = kmeans.cluster_centers_
    ax.scatter(centroids[:, 0], centroids[:, 1], s=100, c='black', marker='x', label='中心点')
    ax.set_xlabel('速度')
    ax.set_ylabel('方向偏差')
    ax.set_title('速度与方向偏差聚类分析')
    ax.legend()

    # 将图像转换为Base64编码
    buffer = BytesIO()
    plt.savefig(buffer, format="png")
    plt.close(fig)
    img_base64 = base64.b64encode(buffer.getvalue()).decode()
    img_html = f'<div style="position:absolute; left:60%; bottom:15%; transform: translateX(-50%);"><img src="data:image/png;base64,{img_base64}" width="500" height="330"></div>'

    # ======= 生成pyecharts的其他图表 =======
    # 图表1：不同车种数量的条形统计图
    type_counts = data['type'].value_counts()
    bar_chart = (
        Bar()
        .add_xaxis([type_mapping.get(t, f"未知类型 {t}") for t in type_counts.index])
        .add_yaxis("车种数量", type_counts.values.tolist(), label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(
            xaxis_opts=opts.AxisOpts(name="车种类型"),
            yaxis_opts=opts.AxisOpts(name="数量"),
            legend_opts=opts.LegendOpts(pos_top="2%", pos_left="center", orient="horizontal")
        )
    )

    # 图表2：道路与坐标点图（散点图）
    road_data = gpd.read_file(geojson_file_path)
    scatter_chart = Scatter()
    scatter_chart.set_global_opts(
        xaxis_opts=opts.AxisOpts(type_="value", name="X坐标"),
        yaxis_opts=opts.AxisOpts(type_="value", name="Y坐标")
    )

    for _, row in road_data.iterrows():
        geometry = row.geometry
        if geometry.geom_type == 'LineString':
            coordinates = list(geometry.coords)
        elif geometry.geom_type == 'Polygon':
            coordinates = list(geometry.exterior.coords)
        else:
            continue
        scatter_chart.add_xaxis([coord[0] for coord in coordinates])
        scatter_chart.add_yaxis("道路", [coord[1] for coord in coordinates], symbol_size=0.2, color="green", label_opts=opts.LabelOpts(is_show=False))

    for t, group in data.groupby('type'):
        scatter_data = group[['x_coord', 'y_coord']].values.tolist()
        color = colors[t % len(colors)]
        scatter_chart.add_xaxis([coord[0] for coord in scatter_data])
        scatter_chart.add_yaxis(type_mapping.get(t, f"未知类型 {t}"), [coord[1] for coord in scatter_data], symbol_size=1.5, color=color, label_opts=opts.LabelOpts(is_show=False))

    # 图表3：速度统计图
    line_chart = Line()
    line_chart.add_xaxis(data.index.tolist())
    for t, group in data.groupby('type'):
        color = colors[t % len(colors)]
        line_chart.add_yaxis(type_mapping.get(t, f"未知类型 {t}"), group['speed'].tolist(), is_smooth=True, color=color, symbol="circle", symbol_size=3)

    line_chart.set_global_opts(
        xaxis_opts=opts.AxisOpts(name="ID", axislabel_opts=opts.LabelOpts(is_show=False)),
        yaxis_opts=opts.AxisOpts(name="速度"), 
        legend_opts=opts.LegendOpts(is_show=False)
    )

    # 使用 Grid 布局，将所有图表添加到同一页面
    grid = Grid(init_opts=opts.InitOpts(width="1200px", height="1000px"))
    grid.add(bar_chart, grid_opts=opts.GridOpts(pos_left="5%", pos_right="60%", pos_top="12%", pos_bottom="55%"))
    grid.add(scatter_chart, grid_opts=opts.GridOpts(pos_left="60%", pos_right="5%", pos_top="12%", pos_bottom="55%"))
    grid.add(line_chart, grid_opts=opts.GridOpts(pos_left="5%", pos_right="60%", pos_top="55%", pos_bottom="5%"))

    # 输出单独的 HTML 文件
    single_output_path = f"./result/分表_{idx + 1}.html"
    grid.render(single_output_path)

    # 将页面选项添加到主HTML文件的下拉菜单
    options_html += f'<option value="{single_output_path}">{file_name}</option>'

    # 在每个HTML文件的底部插入聚类图HTML
    with open(single_output_path, "r+", encoding="utf-8") as file:
        html_content = file.read()
        html_content = html_content.replace("</body>", f"{img_html}</body>")
        file.seek(0)
        file.write(html_content)
        file.truncate()

# 创建主HTML页面
main_html_content = main_html.format(options=options_html)
with open("主界面.html", "w", encoding="utf-8") as main_file:
    main_file.write(main_html_content)

print("主选择页面生成完成")




主选择页面生成完成
