## 读取最初的数据文件，并切割

根据缓存区号，切割can解析csv。

若已经切割完毕，则可跳过

In [1]:
import pandas as pd
import os

# 读取CSV文件
file_path = '20250314-0003can.csv'
df = pd.read_csv(file_path)

# 获取Buffer Number列
buffer_numbers = df['Buffer Number'].unique()

# 创建保存文件的文件夹
output_folder = '20250314-0003can'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# 根据Buffer Number切割数据并保存到不同的文件
for buffer_number in buffer_numbers:
    buffer_df = df[df['Buffer Number'] == buffer_number]
    buffer_number_str = f'{buffer_number:02}'
    output_file_path = os.path.join(output_folder, f'20250314-0003can_{buffer_number_str}.csv')
    buffer_df.to_csv(output_file_path, index=False)

print("文件切割并保存完成。")

文件切割并保存完成。


## 将数据按照CAN帧进行切割

根据CAN分析文件，按照rtr-crc进行进行数据文件切割，定义相关函数

In [1]:
# # 示例输入
# id_hex = "18EFFFFF"  # 16进制ID值（包含标准ID、扩展ID、SRR、IDE和RTR）
# frame_hex = "0102030405060708"  # 数据字段（8字节）
# rtr = 0  # RTR位
# fdf = 1  # FDF位（1表示扩展帧）
# dlc = 8  # 数据长度码（8字节）
# crc = "18EF"

# 将16进制转换为二进制比特流
def hex_to_bits(hex_str, length=None):
    if length:
        return bin(int(hex_str, 16))[2:].zfill(length)
    return bin(int(hex_str, 16))[2:].zfill(len(hex_str) * 4)

# 计算填充位数
def calculate_stuffing_bits(bits):
    stuffing_count = 0
    count = 0
    prev_bit = None
    for bit in bits:
        if bit == prev_bit:
            count += 1
            if count == 5:
                stuffing_count += 1
                count = 0
        else:
            count = 0
        prev_bit = bit
    return stuffing_count

# 扩充id二进制值
def insert_bits(binary_str, insert_value):
    # 确保输入是一个二进制字符串
    if not all(bit in '01' for bit in binary_str):
        raise ValueError("输入必须是一个二进制字符串")
    
    # 在第12和第13位插入1
    modified_str = binary_str[:11] + '11' + binary_str[11:]
    
    # 在末尾插入给定值
    modified_str += str(insert_value)
    
    return modified_str

# 解析扩展帧
def parse_extended_frame(id_hex, frame_hex, dlc, rtr, fdf, crc):
    # 将ID转换为二进制
    id_bits = hex_to_bits(id_hex, length=29)  # 29位（11位标准ID + 18位扩展ID）

    id_bits_all = insert_bits(id_bits, rtr)   # 32位（11位标准ID + 18位扩展ID + 3位控制）

    # 控制字段（IDE=1表示扩展帧，r0=0，DLC）
    # print(type(fdf))
    # print(type(dlc))
    # print(dlc)

    control_bits = str(fdf) + "0" + bin(int(dlc))[2:].zfill(4)  # 6位

    # 数据字段
    data_bits = hex_to_bits(frame_hex)  # DLC × 8位

    # CRC字段（假设为15位，实际需要计算）
    crc_bits = hex_to_bits(crc).zfill(4)  # 占位符

    # 组合完整帧
    full_frame_bits = id_bits_all + control_bits + data_bits + crc_bits

    # 计算填充位数
    stuffing_bits = calculate_stuffing_bits(full_frame_bits)

    # 计算RTR和CRC的位置
    rtr_index = 32 + calculate_stuffing_bits(id_bits)  # RTR位索引
    crc_end_index = rtr_index + len(control_bits) + len(data_bits) + 15 + calculate_stuffing_bits(control_bits + data_bits + crc_bits)

    return rtr_index, crc_end_index

# # 计算RTR和CRC的位置
# rtr_index, crc_end_index = parse_extended_frame(id_hex, frame_hex, dlc, rtr, fdf, crc)
# print(f"RTR起始位索引: {rtr_index}")
# print(f"CRC结束位索引: {crc_end_index}")


In [2]:
# df1['时间'] = pd.to_numeric(df1['时间'])
def process_datacontent_dataframes(df1, df2):
# 将第一个文件的时间列从毫秒转换为秒
    df1['时间（s）'] = df1['(ms)'] / 1000

    # 创建一个空的列表来存储切割后的DataFrame
    dataframes = []

    # 遍历第二个文件中的每一行，获取start time和end time
    for index, row in df2.iterrows():
        if row['CRC Valid'] == 0:
            continue
        start_time = row['Start Time']
        end_time = row['End Time']
        # label = row['label']
        
        # 根据start time和end time切割第一个文件的数据
        mask = (df1['时间（s）'] >= start_time) & (df1['时间（s）'] <= end_time)
        sliced_df = df1[mask]
        # sliced_df['label'] = label

        id_hex = row['ID'].replace(" ", "")  # 11位标准ID + 18位扩展ID
        frame_hex = row['Data'].replace(" ", "")  # 数据字段
        rtr = row['RTR']  # RTR位
        fdf = row['FDF']  # FDF位
        # if type(row['CRC']) == float:
        #     print(row)
        crc = row['CRC'].replace(" ", "")  # CRC值
        dlc = row['DLC']  # 数据长度码（8字节）

        rtr_index, crc_end_index = parse_extended_frame(id_hex, frame_hex, dlc, rtr, fdf, crc)

        # 进一步裁剪 DataFrame 中给定行数范围内的行
        further_sliced_df = sliced_df.iloc[(int(rtr_index)*25):(int(crc_end_index)*25)]

        
        # print(sliced_df.iloc[int(rtr_index):int(crc_end_index)])
        # sliced_df.iloc[rtr_index, crc_end_index]
        # 将切割后的DataFrame添加到列表中
        # if (further_sliced_df['(V)'] > 2.5).any():
        #     continue
        dataframes.append(further_sliced_df)

    # 打印切割后的DataFrame以验证结果
    # for i, df in enumerate(dataframes):
        # if i==0:
        #     df.to_csv('dataorigin1.csv', index=True)
        # print(f"DataFrame {i+1}:")
        # print(df)

    return dataframes

对切割好的每对数据文件和can文件，使用上述函数，进行进一步裁剪。

In [3]:
import pandas as pd
import os

# 定义文件夹路径
folder_0003 = '20250314-0003'
folder_0003can = '20250314-0003can'
output_folder = 'crcslice'

# 获取文件夹中的文件列表
files_0003 = os.listdir(folder_0003)
files_0003can = os.listdir(folder_0003can)

# 遍历 20250314-0003 文件夹中的文件
for file_0003 in files_0003:
    # 获取文件名的最后两位数字
    buffer_number = file_0003.split('_')[-1].split('.')[0]
    
    # 在 20250314-0003can 文件夹中找到对应的文件
    matching_files = [f for f in files_0003can if f.endswith(f'_{buffer_number}.csv')]
    
    if matching_files:
        file_0003can = matching_files[0]
        
        # 读取文件为 DataFrame
        df1 = pd.read_csv(os.path.join(folder_0003, file_0003), skiprows=1)
        df2 = pd.read_csv(os.path.join(folder_0003can, file_0003can))

        # print(type(df2['CRC'][0]))
        dataframes = process_datacontent_dataframes(df1, df2)

        # 保存每个 DataFrame 到 crcslice 文件夹
        for idx, df in enumerate(dataframes):
            output_filename = f"{buffer_number}_{idx+1}.csv"
            output_path = os.path.join(output_folder, output_filename)
            df.to_csv(output_path, index=False)
        
        
    else:
        print(f"未找到匹配的文件: {file_0003}")

    # break

print("文件处理完成。")

文件处理完成。


这里是直接对数据帧进行切割，此部分内容注释掉

In [None]:
# # df1['时间'] = pd.to_numeric(df1['时间'])
# def process_dataframes(df1, df2):
# # 将第一个文件的时间列从毫秒转换为秒
#     df1['时间（s）'] = df1['(ms)'] / 1000

#     # 创建一个空的列表来存储切割后的DataFrame
#     dataframes = []

#     # 遍历第二个文件中的每一行，获取start time和end time
#     for index, row in df2.iterrows():
#         if row['CRC Valid'] == 0:
#             continue
#         start_time = row['Start Time']
#         end_time = row['End Time']
#         # label = row['label']
        
#         # 根据start time和end time切割第一个文件的数据
#         mask = (df1['时间（s）'] >= start_time) & (df1['时间（s）'] <= end_time)
#         sliced_df = df1[mask]

#         # if (sliced_df['(V)'] > 3.0).any():
#         #     continue
#         dataframes.append(sliced_df)

#     # 打印切割后的DataFrame以验证结果
#     for i, df in enumerate(dataframes):
#         # if i==0:
#         #     df.to_csv('dataorigin1.csv', index=True)
#         print(f"DataFrame {i+1}:")
#         print(df)

#     return dataframes

## 将数据继续分割为信号长度的片段

首先还是要按照上升沿，下降沿等进行区分。开始>0.15,上升沿结束为变化小于0.07吧

In [3]:
import pandas as pd
import os

def detect_edges_and_split(df):
    segments = []
    i = 0
    while i < len(df) - 1:
        prev_voltage = df.iloc[i - 1]['(V)'] if i > 0 else None
        curr_voltage = df.iloc[i]['(V)']
        next_voltage = df.iloc[i + 1]['(V)'] if i < len(df) - 1 else None
        step_voltage = df.iloc[i + 2]['(V)']if i < len(df) - 2 else None
        
        # 检测上升沿开始
        if prev_voltage is not None and prev_voltage < 0.10 and curr_voltage > 0.10 and next_voltage > curr_voltage:
            start_idx = i-1
            if step_voltage is None:
                i+=1
                print("step_voltage is None")
                break
            while i < len(df) - 1 and not (abs(curr_voltage - next_voltage) < 0.08 and (abs(curr_voltage - next_voltage) != 0.042753) and (abs(step_voltage - next_voltage) < 0.08)):
                #  and (abs(curr_voltage - next_voltage) != 0.042753)):
                if curr_voltage < 0.15:
                    # 如果上升沿过程中出现小于0.15的值，跳过当前大循环
                    break
                i += 1
                curr_voltage = df.iloc[i]['(V)']
                next_voltage = df.iloc[i + 1]['(V)'] if i < len(df) - 1 else None
                step_voltage = df.iloc[i + 2]['(V)']if i < len(df) - 2 else None

            if next_voltage == None:
                break
            
            if curr_voltage < 0.15:
                    # 如果上升沿过程中出现小于0.15的值，跳过当前大循环
                i += 1
                continue

            end_rising_idx = i + 2
            i += 2

            curr_voltage = next_voltage
            next_voltage = step_voltage
            step_voltage = df.iloc[i + 2]['(V)']if i < len(df) - 2 else None
            
            #确认上升沿结束,多检测3位是否变化超幅0.08
            j = i
            while j < i + 3:
                if abs(curr_voltage - next_voltage) < 0.08:
                    j+=1
                    curr_voltage = next_voltage
                    next_voltage = step_voltage
                    step_voltage = df.iloc[j + 2]['(V)']if j < len(df) - 2 else None
                    # end_rising_idx = end_rising_idx + 1
                else:
                    j += 1
                    end_rising_idx = j
                    curr_voltage = next_voltage
                    next_voltage = step_voltage
                    step_voltage = df.iloc[j + 2]['(V)']if j < len(df) - 2 else None

            i = end_rising_idx + 1
            # 检测稳定态
            curr_voltage = df.iloc[i]['(V)']
            next_voltage = df.iloc[i + 1]['(V)'] if i < len(df) - 1 else None
            step_voltage = df.iloc[i + 2]['(V)']if i < len(df) - 2 else None
            while i < len(df) - 1:
                i += 1
                if(step_voltage == None):
                    break
                if abs(curr_voltage - next_voltage) >= 0.09:
                    # if curr_voltage < next_voltage:
                    #     end_rising_idx = end_rising_idx + 1
                    if next_voltage < 1.5:
                        break
                    if abs(step_voltage - next_voltage) >= 0.09:
                        if i - end_rising_idx < 7:
                            print(curr_voltage)
                            print(next_voltage)
                            print(step_voltage)
                        if step_voltage < next_voltage:
                            break
                curr_voltage = df.iloc[i]['(V)']
                next_voltage = df.iloc[i + 1]['(V)'] if i < len(df) - 1 else None
                step_voltage = df.iloc[i + 2]['(V)']if i < len(df) - 2 else None
            start_falling_idx = i
            if(step_voltage == None):
                break
            
            if i==len(df)-1:
                break

            # 检测下降沿结束
            while i < len(df) - 1 and not (curr_voltage < 0.10 and prev_voltage < 0.10):
                i += 1
                prev_voltage = df.iloc[i - 1]['(V)']
                curr_voltage = df.iloc[i]['(V)']
            end_falling_idx = i + 1
            
            ### 这里是后面修改的内容。未执行过测试
            if end_falling_idx - start_falling_idx < 3:
                break
                
            # 将上升沿开始到下降沿结束的数据作为一个独立的段
            segment = df.iloc[start_idx:end_falling_idx].copy()
            # [['(V)', '(ms)', 'label']]
            # print(start_idx)
            # print(end_rising_idx)
            segment.loc[:,'state'] = 'steady'  # 默认标记为稳态
            segment.iloc[0:(end_rising_idx - start_idx), segment.columns.get_loc('state')] = 'rising'
            # print(start_falling_idx)
            # print(end_falling_idx)
            segment.iloc[(start_falling_idx - start_idx):(end_falling_idx - start_idx), segment.columns.get_loc('state')] = 'falling'
            
            
            segments.append(segment)
        
        i += 1
    
    return segments




In [4]:
# 定义读取CSV文件的路径
file_path = './crcslice/06_31.csv'
df = pd.read_csv(file_path)

segments = detect_edges_and_split(df)
# print(segments)
# 对每个切割后的DataFrame进行处理
# all_segments = []
# for i, df in enumerate(dataframes):
#     segments = detect_edges_and_split(df)
#     all_segments.extend(segments)
# 对每个切割后的DataFrame进行处理

# 保存
rise_counts = []
steady_counts = []
falling_counts = []
for segment in segments:
    rise_count = segment[segment['state'] == 'rising'].shape[0]
    steady_count = segment[segment['state'] == 'steady'].shape[0]
    falling_count = segment[segment['state'] == 'falling'].shape[0]
    rise_counts.append(rise_count)
    steady_counts.append(steady_count)
    falling_counts.append(falling_count)

print(rise_counts)
print(steady_counts)
print(falling_counts)
print(segments[3])

[8, 9, 7, 8, 8, 6, 6, 6, 7, 7, 6, 6, 6, 6, 5]
[118, 117, 119, 118, 118, 120, 119, 94, 43, 43, 20, 20, 20, 20, 22]
[7, 7, 7, 7, 7, 7, 8, 8, 7, 8, 8, 8, 8, 8, 11]
         (ms)       (V)     时间（s）  ID    state
547  13.15552  0.000000  0.013156  25   rising
548  13.15560  1.628322  0.013156  25   rising
549  13.15568  2.013872  0.013156  25   rising
550  13.15576  2.099686  0.013156  25   rising
551  13.15584  2.013872  0.013156  25   rising
..        ...       ...       ...  ..      ...
675  13.16576  0.428457  0.013166  25  falling
676  13.16584  0.385550  0.013166  25  falling
677  13.16592  0.171321  0.013166  25  falling
678  13.16600  0.085661  0.013166  25  falling
679  13.16608  0.042753  0.013166  25  falling

[133 rows x 5 columns]


In [5]:
# 定义文件夹路径
input_folder = './crcslice'
output_folder = './crcsignal'

# 确保输出文件夹存在
os.makedirs(output_folder, exist_ok=True)

# 遍历输入文件夹中的所有文件
for filename in os.listdir(input_folder):
    if filename.endswith('.csv'):
        file_path = os.path.join(input_folder, filename)
        df = pd.read_csv(file_path)
        
        # 使用detect_edges_and_split函数切割DataFrame
        segments = detect_edges_and_split(df)
        
        # 保存每个切割后的DataFrame
        for i, segment in enumerate(segments):
            output_filename = f"{os.path.splitext(filename)[0]}_{i}.csv"
            output_path = os.path.join(output_folder, output_filename)
            segment.to_csv(output_path, index=False)

        # 统计每个切割后的DataFrame中不同状态的行数
        rise_counts = []
        steady_counts = []
        falling_counts = []
        for segment in segments:
            rise_count = segment[segment['state'] == 'rising'].shape[0]
            steady_count = segment[segment['state'] == 'steady'].shape[0]
            falling_count = segment[segment['state'] == 'falling'].shape[0]
            rise_counts.append(rise_count)
            steady_counts.append(steady_count)
            falling_counts.append(falling_count)
        
        # 检查steady_counts中是否有元素小于5
        has_steady_less_than_10 = any(count < 10 for count in steady_counts)
        has_steady_more_than_150 = any(count > 150 for count in steady_counts)
        
        if has_steady_less_than_10 or has_steady_more_than_150:
            print(f"File: {filename}")
            print("Rising counts:", rise_counts)
            print("Steady counts:", steady_counts)
            print("Falling counts:", falling_counts)
            print("Warning: There are segments with steady counts less than 5.")
            # 打印steady_counts中小于10的segment
            for i, steady_count in enumerate(steady_counts):
                if steady_count < 10:
                    print(f"Segment {i} with steady count {steady_count}:")
                    print(segments[i])
                if steady_count > 150:
                    print(f"Segment {i} with steady count {steady_count}:")
                    print(segments[i])

2.271008
2.14244
2.271008


## 读取采集到的数据

In [29]:
import os
import pandas as pd

# 定义文件夹路径
input_folder = './crcsignal'

# 初始化变量
max_rows = 0
min_rows = float('inf')
file_max_rows = ""
file_min_rows = ""

# 遍历输入文件夹中的所有文件
for filename in os.listdir(input_folder):
    if filename.endswith('.csv'):
        file_path = os.path.join(input_folder, filename)
        df = pd.read_csv(file_path)
        
        # 获取当前DataFrame的行数
        num_rows = df.shape[0]
        
        # 更新最大和最小行数
        if num_rows > max_rows:
            max_rows = num_rows
            file_max_rows = filename
        if num_rows < min_rows:
            min_rows = num_rows
            file_min_rows = filename

# 输出最大和最小行数
print(f"文件 {file_max_rows} 的行数最多，为 {max_rows} 行")
print(f"文件 {file_min_rows} 的行数最少，为 {min_rows} 行")

文件 01_11_4.csv 的行数最多，为 136 行
文件 01_34_8.csv 的行数最少，为 30 行


## 数据补零处理

In [31]:
import pandas as pd
import os

# 输入文件夹和输出文件夹路径
input_folder = "crcsignal"
output_folder = "padsignal"
target_length = 150  # 目标长度

# 创建输出文件夹（如果不存在）
os.makedirs(output_folder, exist_ok=True)

# 遍历输入文件夹中的所有CSV文件
for filename in os.listdir(input_folder):
    if filename.endswith(".csv"):
        # 读取CSV文件
        filepath = os.path.join(input_folder, filename)
        df = pd.read_csv(filepath, usecols=["时间（s）", "(V)", "state"])
        
        # 获取当前行数
        current_length = len(df)
        
        # 处理长度不足的情况
        if current_length < target_length:
            # 计算需要填充的行数
            num_pad = target_length - current_length
            
            # 构建填充数据（假设列名为：时间（s）、V、其他列、state）
            pad_data = {
                "时间（s）": [-1] * num_pad,
                "(V)": [-1] * num_pad,
                "state": ["padding"] * num_pad,
                # 其他列填充NaN（根据需求调整）
                # "其他列": [pd.NA] * num_pad
            }
            
            # 生成填充的DataFrame
            pad_df = pd.DataFrame(pad_data)
            
            # 追加到原始数据
            df = pd.concat([df, pad_df], ignore_index=True)
        
        # 处理长度过长的情况
        elif current_length > target_length:
            df = df.head(target_length)  # 保留前150行
        
        # 保存处理后的文件到输出文件夹
        output_path = os.path.join(output_folder, filename)
        df.to_csv(output_path, index=False)

print("处理完成！所有文件已保存至:", output_folder)

处理完成！所有文件已保存至: padsignal


## 异常数据处理

In [None]:
import pandas as pd
import os
# 导入0003.csv文件
df1 = pd.read_csv('20250214.csv', skiprows=1)
# df1 = pd.read_csv('20250117_0003.csv', skiprows=1)

# 导入0004.csv文件2
df2 = pd.read_csv('20250214can.csv')

output_folder = 'abcrcslice'
# 处理异常数据的文件
dataframes = process_datacontent_dataframes(df1, df2)

# 保存每个 DataFrame 到 crcslice 文件夹
for idx, df in enumerate(dataframes):
    output_filename = f"{idx+1}.csv"
    output_path = os.path.join(output_folder, output_filename)
    df.to_csv(output_path, index=False)

切割为信号

In [None]:
# 定义读取CSV文件的路径
file_path = './abcrcslice/54.csv'
df = pd.read_csv(file_path)

segments = detect_edges_and_split(df)
# print(segments)
# 对每个切割后的DataFrame进行处理
# all_segments = []
# for i, df in enumerate(dataframes):
#     segments = detect_edges_and_split(df)
#     all_segments.extend(segments)
# 对每个切割后的DataFrame进行处理

# 保存
rise_counts = []
steady_counts = []
falling_counts = []
for segment in segments:
    rise_count = segment[segment['state'] == 'rising'].shape[0]
    steady_count = segment[segment['state'] == 'steady'].shape[0]
    falling_count = segment[segment['state'] == 'falling'].shape[0]
    rise_counts.append(rise_count)
    steady_counts.append(steady_count)
    falling_counts.append(falling_count)

print(rise_counts)
print(steady_counts)
print(falling_counts)
print(segments[3])

In [None]:
# 定义文件夹路径
input_folder = './abcrcslice'
output_folder = './abcrcsignal'

# 确保输出文件夹存在
os.makedirs(output_folder, exist_ok=True)

# 遍历输入文件夹中的所有文件
for filename in os.listdir(input_folder):
    if filename.endswith('.csv'):
        file_path = os.path.join(input_folder, filename)
        df = pd.read_csv(file_path)
        
        # 使用detect_edges_and_split函数切割DataFrame
        segments = detect_edges_and_split(df)
        
        # 保存每个切割后的DataFrame
        for i, segment in enumerate(segments):
            output_filename = f"{os.path.splitext(filename)[0]}_{i}.csv"
            output_path = os.path.join(output_folder, output_filename)
            segment.to_csv(output_path, index=False)

        # 统计每个切割后的DataFrame中不同状态的行数
        rise_counts = []
        steady_counts = []
        falling_counts = []
        for segment in segments:
            rise_count = segment[segment['state'] == 'rising'].shape[0]
            steady_count = segment[segment['state'] == 'steady'].shape[0]
            falling_count = segment[segment['state'] == 'falling'].shape[0]
            rise_counts.append(rise_count)
            steady_counts.append(steady_count)
            falling_counts.append(falling_count)
        
        # 检查steady_counts中是否有元素小于5
        has_steady_less_than_10 = any(count < 10 for count in steady_counts)
        has_steady_more_than_150 = any(count > 150 for count in steady_counts)
        
        if has_steady_less_than_10 or has_steady_more_than_150:
            print(f"File: {filename}")
            print("Rising counts:", rise_counts)
            print("Steady counts:", steady_counts)
            print("Falling counts:", falling_counts)
            print("Warning: There are segments with steady counts less than 5.")
            # 打印steady_counts中小于10的segment
            for i, steady_count in enumerate(steady_counts):
                if steady_count < 10:
                    print(f"Segment {i} with steady count {steady_count}:")
                    print(segments[i])
                if steady_count > 150:
                    print(f"Segment {i} with steady count {steady_count}:")
                    print(segments[i])

补零

In [None]:
import pandas as pd
import os

# 输入文件夹和输出文件夹路径
input_folder = "abcrcsignal"
output_folder = "abpadsignal"
target_length = 150  # 目标长度

# 创建输出文件夹（如果不存在）
os.makedirs(output_folder, exist_ok=True)

# 遍历输入文件夹中的所有CSV文件
for filename in os.listdir(input_folder):
    if filename.endswith(".csv"):
        # 读取CSV文件
        filepath = os.path.join(input_folder, filename)
        df = pd.read_csv(filepath, usecols=["时间（s）", "(V)", "state"])
        
        # 获取当前行数
        current_length = len(df)
        
        # 处理长度不足的情况
        if current_length < target_length:
            # 计算需要填充的行数
            num_pad = target_length - current_length
            
            # 构建填充数据（假设列名为：时间（s）、V、其他列、state）
            pad_data = {
                "时间（s）": [-1] * num_pad,
                "(V)": [-1] * num_pad,
                "state": ["padding"] * num_pad,
                # 其他列填充NaN（根据需求调整）
                # "其他列": [pd.NA] * num_pad
            }
            
            # 生成填充的DataFrame
            pad_df = pd.DataFrame(pad_data)
            
            # 追加到原始数据
            df = pd.concat([df, pad_df], ignore_index=True)
        
        # 处理长度过长的情况
        elif current_length > target_length:
            df = df.head(target_length)  # 保留前150行
        
        # 保存处理后的文件到输出文件夹
        output_path = os.path.join(output_folder, filename)
        df.to_csv(output_path, index=False)

print("处理完成！所有文件已保存至:", output_folder)

## 生成数据用于原始模型的计算

In [26]:
import os
import pandas as pd
import numpy as np

# 定义文件夹路径
crcsignal_folder = 'crcsignal'
abcrcsignal_folder = 'abcrcsignal'

# 获取文件夹中的所有文件
crcsignal_files = os.listdir(crcsignal_folder)
abcrcsignal_files = os.listdir(abcrcsignal_folder)

# 随机选取1/5的文件
crcsignal_sample_files = np.random.choice(crcsignal_files, size=len(crcsignal_files)//250, replace=False)
abcrcsignal_sample_files = np.random.choice(abcrcsignal_files, size=len(abcrcsignal_files)//5, replace=False)

# 读取文件并添加label列
df_list = []

for file in crcsignal_sample_files:
    df = pd.read_csv(os.path.join(crcsignal_folder, file))
    df['label'] = 0
    df_list.append(df)

for file in abcrcsignal_sample_files:
    df = pd.read_csv(os.path.join(abcrcsignal_folder, file))
    df['label'] = 1
    df_list.append(df)

# 打印列表中的DataFrame
for df in df_list:
    print(df)

all_segments = df_list
# 将所有DataFrame合并并保存为dtfeature.csv
# combined_df = pd.concat(df_list)

        (ms)       (V)     时间（s）    state  label
0   16.49792  0.042753  0.016498   rising      0
1   16.49800  1.456847  0.016498   rising      0
2   16.49808  2.271008  0.016498   rising      0
3   16.49816  1.971118  0.016498   rising      0
4   16.49824  2.013872  0.016498   rising      0
5   16.49832  2.228100  0.016498   rising      0
6   16.49840  2.313915  0.016498   rising      0
7   16.49848  2.228100  0.016498   rising      0
8   16.49856  2.228100  0.016499   rising      0
9   16.49864  2.228100  0.016499   steady      0
10  16.49872  2.228100  0.016499   steady      0
11  16.49880  2.185347  0.016499   steady      0
12  16.49888  2.228100  0.016499   steady      0
13  16.49896  2.228100  0.016499   steady      0
14  16.49904  2.185347  0.016499   steady      0
15  16.49912  2.228100  0.016499   steady      0
16  16.49920  2.228100  0.016499   steady      0
17  16.49928  2.228100  0.016499   steady      0
18  16.49936  2.228100  0.016499   steady      0
19  16.49944  2.1853

In [10]:
i = 0
for df in df_list:
    if i == 445:
        print(df)
    i += 1

        (ms)       (V)     时间（s）    state  label
0   28.81320  0.042753  0.028813   rising      0
1   28.81328  0.428457  0.028813   rising      0
2   28.81336  1.799643  0.028813   rising      0
3   28.81344  2.313915  0.028813   rising      0
4   28.81352  2.313915  0.028814   rising      0
5   28.81360  2.442483  0.028814   rising      0
6   28.81368  2.313915  0.028814   rising      0
7   28.81376  2.185347  0.028814   rising      0
8   28.81384  2.185347  0.028814   rising      0
9   28.81392  2.185347  0.028814   steady      0
10  28.81400  2.185347  0.028814   steady      0
11  28.81408  2.185347  0.028814   steady      0
12  28.81416  2.185347  0.028814   steady      0
13  28.81424  2.185347  0.028814   steady      0
14  28.81432  2.185347  0.028814   steady      0
15  28.81440  2.185347  0.028814   steady      0
16  28.81448  2.185347  0.028814   steady      0
17  28.81456  2.185347  0.028815   steady      0
18  28.81464  2.185347  0.028815   steady      0
19  28.81472  2.1853

## 计算数据特征

In [16]:

from scipy.stats import skew, kurtosis
import numpy as np
from scipy.fft import fft, fftfreq
# 打印每个段以验证结果
for i, segment in enumerate(all_segments):
    if i==444:
        print(f"Segment {i+1}:")
        print(segment)
        
        # df = segment[0:3]
        df = segment[segment['state'] == 'steady']
        fft_values = fft(df['(V)'].values)
        fft_freq = fftfreq(len(df), d=(df['(ms)'].diff().mean() / 1000))  # 将时间间隔转换为秒
        fft_magnitude = np.abs(fft_values)

        # 只取前一半的结果，忽略负值部分
        half_n =  (len(fft_values) + 1) // 2
        fft_values = fft_values[:half_n]
        fft_freq = fft_freq[:half_n]
        fft_magnitude = fft_magnitude[:half_n]
        # rising_edge = segment[segment['state'] == 'rising']
        # print(rising_edge)
        kurt = kurtosis(fft_magnitude)
        skewness = skew(fft_magnitude)
        print(kurt)
        print(skewness)
        print(skew(df['(V)']))
        # break

Segment 445:
       (ms)       (V)     时间（s）    state  label
0   7.59728  0.042753  0.007597   rising      0
1   7.59736  0.514118  0.007597   rising      0
2   7.59744  1.499754  0.007597   rising      0
3   7.59752  2.142440  0.007598   rising      0
4   7.59760  2.442483  0.007598   rising      0
5   7.59768  2.570897  0.007598   rising      0
6   7.59776  2.570897  0.007598   rising      0
7   7.59784  2.528143  0.007598   rising      0
8   7.59792  2.442483  0.007598   rising      0
9   7.59800  2.399575  0.007598   steady      0
10  7.59808  2.399575  0.007598   steady      0
11  7.59816  2.399575  0.007598   steady      0
12  7.59824  2.399575  0.007598   steady      0
13  7.59832  2.399575  0.007598   steady      0
14  7.59840  2.399575  0.007598   steady      0
15  7.59848  2.399575  0.007598   steady      0
16  7.59856  2.399575  0.007599   steady      0
17  7.59864  2.399575  0.007599   steady      0
18  7.59872  2.399575  0.007599   steady      0
19  7.59880  2.399575  0.00

  print(skew(df['(V)']))


In [8]:
print(segment.loc[563248 :563260])

             (V)      (ms)  label   state
563248  0.042753  45.05984      1  rising
563249  0.171321  45.05992      1  rising
563250  1.928211  45.06000      1  rising
563251  2.056779  45.06008      1  rising
563252  2.099686  45.06016      1  rising
563253  2.099686  45.06024      1  steady
563254  2.142440  45.06032      1  steady
563255  2.142440  45.06040      1  steady
563256  2.099686  45.06048      1  steady
563257  2.099686  45.06056      1  steady
563258  2.056779  45.06064      1  steady
563259  2.056779  45.06072      1  steady
563260  2.142440  45.06080      1  steady


In [None]:
# from scipy.fft import fft, fftfreq
# rising_edge = segment[0:4]
# dfe = rising_edge
# fft_values = fft(rising_edge['(V)'].values)
# fft_magnitude = np.abs(fft_values)
# print(fft_magnitude)
# print(dfe['(ms)'].diff().mean() / 1000)
# fft_freq = fftfreq(len(dfe), d=(dfe['(ms)'].diff().mean() / 1000))
# print(fft_freq)
# centroid = np.sum(fft_freq * fft_magnitude) / np.sum(fft_magnitude)

# print(centroid)

NameError: name 'np' is not defined

In [23]:
from scipy.stats import skew, kurtosis
import numpy as np
from scipy.fft import fft, fftfreq

# 定义一个函数来计算统计特征
def calculate_statistics(df):
    stats = {
        'mean': df['(V)'].mean(),
        'std': df['(V)'].std(),
        'mean_diff': df['(V)'].diff().mean(),
        'skew': skew(df['(V)']),
        'kurtosis': kurtosis(df['(V)']),
        'rms': np.sqrt(np.mean(df['(V)']**2)),
        'max': df['(V)'].max(),
        'min': df['(V)'].min()
    }
    return stats

# 定义一个函数来计算频域统计特征
def calculate_frequency_domain_statistics(df):
    # 进行傅里叶变换
    fft_values = fft(df['(V)'].values)
    fft_freq = fftfreq(len(df), d=(df['(ms)'].diff().mean() / 1000))  # 将时间间隔转换为秒
    fft_magnitude = np.abs(fft_values)

    # 只取前一半的结果，忽略负值部分
    half_n =  (len(fft_values) + 1) // 2
    fft_values = fft_values[:half_n]
    fft_freq = fft_freq[:half_n]
    fft_magnitude = fft_magnitude[:half_n]

    # 过滤掉零值
    non_zero_indices = fft_magnitude > 0
    fft_magnitude_entro = fft_magnitude[non_zero_indices]
    # fft_freq = fft_freq[non_zero_indices]
    
    # 计算频域统计特征
    centroid = np.sum(fft_freq * fft_magnitude) / np.sum(fft_magnitude)
    entropy = -np.sum((fft_magnitude_entro / np.sum(fft_magnitude_entro)) * np.log(fft_magnitude_entro / np.sum(fft_magnitude_entro)))
    spread = np.sqrt(np.sum(((fft_freq - centroid) ** 2) * fft_magnitude) / np.sum(fft_magnitude))
    skewness = skew(fft_magnitude)
    avg_value = np.mean(fft_magnitude)
    variance = np.var(fft_magnitude)
    kurt = kurtosis(fft_magnitude)
    irregularity = np.sum(np.abs(np.diff(fft_magnitude)))
    
    stats = {
        'fft_centroid': centroid,
        'fft_entropy': entropy,
        'fft_spread': spread,
        'fft_skewness': skewness,
        'fft_avg_value': avg_value,
        'fft_variance': variance,
        'fft_kurtosis': kurt,
        'fft_irregularity': irregularity
    }
    return stats

# 创建一个空的列表来存储统计特征
statistics = []

# 对每个段进行处理
for segment in all_segments:
    label = segment['label'].iloc[0]  # 获取第一个label值
    rising_edge = segment[segment['state'] == 'rising']
    falling_edge = segment[segment['state'] == 'falling']
    steady_state = segment[segment['state'] == 'steady']
    
    rising_time_stats = calculate_statistics(rising_edge)
    falling_time_stats = calculate_statistics(falling_edge)
    steady_time_stats = calculate_statistics(steady_state)

    rising_freq_stats = calculate_frequency_domain_statistics(rising_edge)
    falling_freq_stats = calculate_frequency_domain_statistics(falling_edge)
    steady_freq_stats = calculate_frequency_domain_statistics(steady_state)
    
    combined_stats = {
        'label': label,
        'rising_mean': rising_time_stats['mean'],
        'rising_std': rising_time_stats['std'],
        'rising_mean_diff': rising_time_stats['mean_diff'],
        'rising_skew': rising_time_stats['skew'],
        'rising_kurtosis': rising_time_stats['kurtosis'],
        'rising_rms': rising_time_stats['rms'],
        'rising_max': rising_time_stats['max'],
        'rising_min': rising_time_stats['min'],
        'rising_fft_centroid': rising_freq_stats['fft_centroid'],
        'rising_fft_entropy': rising_freq_stats['fft_entropy'],
        'rising_fft_spread': rising_freq_stats['fft_spread'],
        'rising_fft_skewness': rising_freq_stats['fft_skewness'],
        'rising_fft_avg_value': rising_freq_stats['fft_avg_value'],
        'rising_fft_variance': rising_freq_stats['fft_variance'],
        'rising_fft_kurtosis': rising_freq_stats['fft_kurtosis'],
        'rising_fft_irregularity': rising_freq_stats['fft_irregularity'],
        'falling_mean': falling_time_stats['mean'],
        'falling_std': falling_time_stats['std'],
        'falling_mean_diff': falling_time_stats['mean_diff'],
        'falling_skew': falling_time_stats['skew'],
        'falling_kurtosis': falling_time_stats['kurtosis'],
        'falling_rms': falling_time_stats['rms'],
        'falling_max': falling_time_stats['max'],
        'falling_min': falling_time_stats['min'],
        'falling_fft_centroid': falling_freq_stats['fft_centroid'],
        'falling_fft_entropy': falling_freq_stats['fft_entropy'],
        'falling_fft_spread': falling_freq_stats['fft_spread'],
        'falling_fft_skewness': falling_freq_stats['fft_skewness'],
        'falling_fft_avg_value': falling_freq_stats['fft_avg_value'],
        'falling_fft_variance': falling_freq_stats['fft_variance'],
        'falling_fft_kurtosis': falling_freq_stats['fft_kurtosis'],
        'falling_fft_irregularity': falling_freq_stats['fft_irregularity'],
        'steady_mean': steady_time_stats['mean'],
        'steady_std': steady_time_stats['std'],
        'steady_mean_diff': steady_time_stats['mean_diff'],
        'steady_skew': steady_time_stats['skew'],
        'steady_kurtosis': steady_time_stats['kurtosis'],
        'steady_rms': steady_time_stats['rms'],
        'steady_max': steady_time_stats['max'],
        'steady_min': steady_time_stats['min'],
        'steady_fft_centroid': steady_freq_stats['fft_centroid'],
        'steady_fft_entropy': steady_freq_stats['fft_entropy'],
        'steady_fft_spread': steady_freq_stats['fft_spread'],
        'steady_fft_skewness': steady_freq_stats['fft_skewness'],
        'steady_fft_avg_value': steady_freq_stats['fft_avg_value'],
        'steady_fft_variance': steady_freq_stats['fft_variance'],
        'steady_fft_kurtosis': steady_freq_stats['fft_kurtosis'],
        'steady_fft_irregularity': steady_freq_stats['fft_irregularity']
    }
    
    statistics.append(combined_stats)

# 将统计特征转换为DataFrame
stats_df = pd.DataFrame(statistics)

# 保存统计特征DataFrame为文件
stats_df.to_csv('statistics1.csv', index=False)

# 打印统计特征DataFrame
print(stats_df)

  'skew': skew(df['(V)']),
  'kurtosis': kurtosis(df['(V)']),


      label  rising_mean  rising_std  rising_mean_diff  rising_skew  \
0         0     1.661575    0.858276          0.257117    -1.206836   
1         0     1.813928    0.985333          0.299966    -0.959584   
2         0     1.767520    0.754391          0.287718    -1.741686   
3         0     1.906777    0.935187          0.348926    -1.183762   
4         0     1.679672    0.827673          0.223781    -1.359643   
...     ...          ...         ...               ...          ...   
1880      1     1.803965    0.817911          0.257102    -1.446713   
1881      1     1.726198    0.767230          0.349948    -1.800659   
1882      1     1.880605    0.723872          0.283895    -2.085970   
1883      1     1.534018    0.867375          0.503506    -1.246019   
1884      1     1.448327    0.904687          0.514233    -0.813621   

      rising_kurtosis  rising_rms  rising_max  rising_min  \
0           -0.331901    1.848139    2.228100    0.042753   
1           -0.735458    

In [25]:
# 将stats_df中的所有NaN值替换为零
stats_df = stats_df.fillna(0)

# 删除第16534行
stats_df = stats_df.drop(1758)
# 查找包含 NaN 值的行
nan_rows = stats_df[pd.isna(stats_df).any(axis=1)]

# 打印包含 NaN 值的行
# print("存在 NaN 的行：")
# print(nan_rows)
# 打印这些行中只包含NaN的列
for index, row in nan_rows.iterrows():
    nan_columns = row[row.isna()].index
    print(f"Row {index} has NaN in columns: {nan_columns.tolist()}")
    print(row['label'])

# 保存统计特征DataFrame为文件
stats_df.to_csv('statistics1.csv', index=False)

In [36]:
print(nan_rows['rising_fft_skewness'])

Series([], Name: rising_fft_skewness, dtype: float64)


这里开始都是之前废稿
import pandas as pd

# 定义一个函数来检测上升沿、下降沿和稳定态
def detect_edges(df):
    rising_edge = []
    falling_edge = []
    steady_state = []

    i = 0
    while i < len(df) - 1:
        prev_voltage = df.iloc[i - 1]['(V)'] if i > 0 else None
        curr_voltage = df.iloc[i]['(V)']
        next_voltage = df.iloc[i + 1]['(V)'] if i < len(df) - 1 else None
        
        # 检测上升沿
        if prev_voltage is not None and prev_voltage < 0.1 and curr_voltage > 0.1:
            start_idx = i
            while i < len(df) - 1 and not (curr_voltage > 2.0 and next_voltage > 2.0):
                i += 1
                curr_voltage = df.iloc[i]['(V)']
                next_voltage = df.iloc[i + 1]['(V)'] if i < len(df) - 1 else None
            rising_edge.extend(df.iloc[start_idx:i+1][['(V)', '(ms)']].values.tolist())
        
        # 检测下降沿
        elif curr_voltage > 2.0 and next_voltage < 2.0:
            start_idx = i
            while i < len(df) - 1 and not (curr_voltage < 0.1 and prev_voltage > 0.1):
                i += 1
                prev_voltage = df.iloc[i - 1]['(V)']
                curr_voltage = df.iloc[i]['(V)']
            falling_edge.extend(df.iloc[start_idx:i+1][['(V)', '(ms)']].values.tolist())
        
        # 其余部分是稳定态
        else:
            steady_state.append(df.iloc[i][['(V)', '(ms)']].values.tolist())
        
        i += 1
    
    rising_edge_df = pd.DataFrame(rising_edge, columns=['(V)', '(ms)'])
    falling_edge_df = pd.DataFrame(falling_edge, columns=['(V)', '(ms)'])
    steady_state_df = pd.DataFrame(steady_state, columns=['(V)', '(ms)'])
    
    return rising_edge_df, falling_edge_df, steady_state_df


# 对每个切割后的DataFrame进行处理
for i, df in enumerate(dataframes):
    rising_edge, falling_edge, steady_state = detect_edges(df)
    print(f"DataFrame {i+1} 上升沿:")
    print(rising_edge)
    print(f"DataFrame {i+1} 下降沿:")
    print(falling_edge)
    print(f"DataFrame {i+1} 稳定态:")
    print(steady_state)
    print(f"DataFrame {i+1} 标签:")
    print(df['label'].iloc[0])
    break
