In [43]:
# --- 1. 导入模块 (清晰、稳定) ---
import pandas as pd
from src import data_processor
from config import global_config
from IPython.display import display
# 导入 importlib 以便在需要时重新加载模块
import importlib
importlib.reload(global_config)
importlib.reload(data_processor)

<module 'src.data_processor' from 'd:\\A-Code\\PEMS_HGV_Analysis\\src\\data_processor.py'>

In [None]:


# --- 定义共享参数 ---
# 定义需要清洗的列名列表，两种车型通用
EMISSION_COLS_TO_CLEAN = ['NO浓度(ppm)', 'NO2浓度(ppm)', 'CO浓度(vol%)', 'CO2浓度(vol%)',
                          'CO(g/s)', 'NOx(g/s)', 'CO2(g/s)', 'PN(#/s)']

# --- 2. 柴油车 (Diesel) 数据处理 ---
print("\n" + "="*20 + " 开始处理柴油车数据 " + "="*20)
try:
    df_diesel_raw = data_processor.load_processed_data(global_config.DIESEL_RAW_DATA_FILE_PATH.name)
    df_diesel_segments = data_processor.load_segments_data(global_config.DIESEL_SEGMENTS_FILE_PATH)
    df_diesel_tagged = data_processor.tag_segments(df_diesel_raw, df_diesel_segments)
    df_diesel_cleaned = data_processor.clean_emission_data(df_diesel_tagged, columns=EMISSION_COLS_TO_CLEAN)
    
    print("\n柴油车处理结果预览:")
    display(df_diesel_cleaned.head())
    print("\n柴油车 'SegmentType' 分布情况:")
    print(df_diesel_cleaned['SegmentType'].value_counts())
    
    # 保存处理好的柴油车数据
    data_processor.save_tagged_data(df_diesel_cleaned, global_config.DIESEL_TAGGED_OUTPUT_PATH)

except Exception as e:
    print(f"\n处理柴油车数据时发生错误: {e}")

# --- 3. LNG车辆数据处理 ---
print("\n" + "="*20 + " 开始处理LNG车辆数据 " + "="*20)
try:
    df_lng_raw = data_processor.load_processed_data(global_config.LNG_RAW_DATA_FILE_PATH.name)
    df_lng_segments = data_processor.load_segments_data(global_config.LNG_SEGMENTS_FILE_PATH)
    df_lng_tagged = data_processor.tag_segments(df_lng_raw, df_lng_segments)
    df_lng_cleaned = data_processor.clean_emission_data(df_lng_tagged, columns=EMISSION_COLS_TO_CLEAN)

    print("\nLNG车辆处理结果预览:")
    display(df_lng_cleaned.head())
    print("\nLNG车辆 'SegmentType' 分布情况:")
    print(df_lng_cleaned['SegmentType'].value_counts())
    
    # 保存处理好的LNG数据
    data_processor.save_tagged_data(df_lng_cleaned, global_config.LNG_TAGGED_OUTPUT_PATH)

except FileNotFoundError:
    print(f"\n处理LNG车辆数据时出错: 找不到LNG相关数据文件，请检查`config/global_config.py`中的路径是否正确。")
except Exception as e:
    print(f"\n处理LNG车辆数据时发生错误: {e}")

print("\n\n--- 所有数据处理和保存流程执行完毕 ---")




成功加载文件: Heavy_Diesel_G6_01_processed.csv
成功加载路段定义文件: 国六柴油_隧道时间与分类_segments.xlsx

--- 开始为数据打标签 ---
打标签完成！

--- 正在清洗排放数据列: ['NO浓度(ppm)', 'NO2浓度(ppm)', 'CO浓度(vol%)', 'CO2浓度(vol%)', 'CO(g/s)', 'NOx(g/s)', 'CO2(g/s)', 'PN(#/s)'] ---
  - 在列 'NO浓度(ppm)' 中发现 11144 个无效值 (<=0)，将校正为 0.0。
  - 在列 'NO2浓度(ppm)' 中发现 45 个无效值 (<=0)，将校正为 0.0。
  - 在列 'CO浓度(vol%)' 中发现 1343 个无效值 (<=0)，将校正为 0.0。
  - 在列 'CO(g/s)' 中发现 1346 个无效值 (<=0)，将校正为 0.0。
  - 在列 'NOx(g/s)' 中发现 2989 个无效值 (<=0)，将校正为 0.0。
  - 在列 'CO2(g/s)' 中发现 5 个无效值 (<=0)，将校正为 0.0。
  - 在列 'PN(#/s)' 中发现 107 个无效值 (<=0)，将校正为 0.0。
数据清洗完成！

柴油车处理结果预览:


Unnamed: 0,时间戳,车速(km/h),GPS车速(km/h),发动机转速(rpm),发动机扭矩(%),排气质量流量(kg/h),NO浓度(ppm),NO2浓度(ppm),CO浓度(vol%),CO2浓度(vol%),...,纬度(°),海拔(m),PN真实浓度(#/cm3),排气质量流量(g/s),CO(g/s),NOx(g/s),CO2(g/s),PN(#/s),SegmentName,SegmentType
0,2025-08-07 20:56:44.262,0.0,0,0.0,-125.0,8.492607,0.0,114.308823,0.0,0.057034,...,0,0,9492.42578,2.359058,0.0,0.000382,0.002057,18583550.0,OpenRoad,OpenRoad
1,2025-08-07 20:56:45.262,0.0,0,0.0,-125.0,6.960509,0.0,114.220016,0.0,0.057027,...,0,0,10181.6416,1.933475,0.0,0.000313,0.001685,16336890.0,OpenRoad,OpenRoad
2,2025-08-07 20:56:46.262,0.0,0,0.0,-125.0,-0.508475,0.0,114.206581,0.0,0.057024,...,0,0,11560.075196,-0.141243,5.839515e-07,0.0,0.0,0.0,OpenRoad,OpenRoad
3,2025-08-07 20:56:47.262,0.0,0,0.0,-125.0,3.321774,0.0,114.285904,0.0,0.057031,...,0,0,12211.001952,0.922715,0.0,0.000149,0.000804,9350435.0,OpenRoad,OpenRoad
4,2025-08-07 20:56:48.262,0.0,0,0.0,-125.0,1.981186,0.0,114.199295,0.0,0.057025,...,0,0,8420.310548,0.550329,0.0,8.9e-05,0.00048,3845597.0,OpenRoad,OpenRoad



柴油车 'SegmentType' 分布情况:
SegmentType
OpenRoad       10348
MainTunnel      2000
ShortTunnel      954
Name: count, dtype: int64

--- 正在保存已处理的数据 ---
成功将数据保存至: D:\A-Code\PEMS_HGV_Analysis\data\01_processed\Heavy_Diesel_G6_01_tagged.csv

成功加载文件: Heavy_LNG_G6_01_processed.csv
成功加载路段定义文件: 国六LNG_隧道时间与分类_segments.xlsx

--- 开始为数据打标签 ---
打标签完成！

--- 正在清洗排放数据列: ['NO浓度(ppm)', 'NO2浓度(ppm)', 'CO浓度(vol%)', 'CO2浓度(vol%)', 'CO(g/s)', 'NOx(g/s)', 'CO2(g/s)', 'PN(#/s)'] ---
  - 在列 'NO浓度(ppm)' 中发现 323 个无效值 (<=0)，将校正为 0.0。
  - 在列 'NO2浓度(ppm)' 中发现 36 个无效值 (<=0)，将校正为 0.0。
  - 在列 'CO浓度(vol%)' 中发现 8404 个无效值 (<=0)，将校正为 0.0。
  - 在列 'CO(g/s)' 中发现 8509 个无效值 (<=0)，将校正为 0.0。
  - 在列 'NOx(g/s)' 中发现 132 个无效值 (<=0)，将校正为 0.0。
  - 在列 'CO2(g/s)' 中发现 105 个无效值 (<=0)，将校正为 0.0。
  - 在列 'PN(#/s)' 中发现 105 个无效值 (<=0)，将校正为 0.0。
数据清洗完成！

LNG车辆处理结果预览:


Unnamed: 0,时间戳,车速(km/h),GPS车速(km/h),发动机转速(rpm),发动机扭矩(%),排气质量流量(kg/h),NO浓度(ppm),NO2浓度(ppm),CO浓度(vol%),CO2浓度(vol%),...,纬度(°),海拔(m),PN真实浓度(#/cm3),排气质量流量(g/s),CO(g/s),NOx(g/s),CO2(g/s),PN(#/s),SegmentName,SegmentType
0,2025-08-09 00:25:03.096,0.0,0,0.0,-125.0,4.094402,0.0,153.851654,0.003725,0.175172,...,0,0,3216.334472,1.137334,4.1e-05,0.000255,0.003045,3035723.0,OpenRoad,OpenRoad
1,2025-08-09 00:25:04.096,0.0,0,0.0,-125.0,13.574267,0.0,153.737091,0.007662,0.285959,...,0,0,5743.461916,3.77063,0.000279,0.000855,0.016482,17972170.0,OpenRoad,OpenRoad
2,2025-08-09 00:25:05.096,0.0,0,0.0,-125.0,7.733139,0.0,153.626389,0.010411,0.37963,...,0,0,7275.053712,2.148094,0.000216,0.000491,0.012466,12968880.0,OpenRoad,OpenRoad
3,2025-08-09 00:25:06.097,0.0,0,0.0,-125.0,7.541626,0.0,153.512299,0.011999,0.454525,...,0,0,6892.15576,2.094896,0.000243,0.000483,0.014555,11982030.0,OpenRoad,OpenRoad
4,2025-08-09 00:25:07.097,0.0,0,0.0,-125.0,1.796254,0.0,153.457748,0.013802,0.506851,...,0,0,8462.038084,0.498959,6.7e-05,0.000116,0.003866,3503912.0,OpenRoad,OpenRoad



LNG车辆 'SegmentType' 分布情况:
SegmentType
OpenRoad       7628
MainTunnel      985
ShortTunnel     288
Name: count, dtype: int64

--- 正在保存已处理的数据 ---
成功将数据保存至: D:\A-Code\PEMS_HGV_Analysis\data\01_processed\Heavy_LNG_G6_01_tagged.csv


--- 所有数据处理和保存流程执行完毕 ---
