In [15]:
import os
import pandas as pd

global_window_id = 0  # 用于标识每个窗口的全局唯一ID

# 读取 tactile_dataset.csv 并提取数据块信息
def load_tactile_dataset(tactile_dataset_file):
    """加载 tactile_dataset.csv 文件并提取数据块信息"""
    if os.path.isfile(tactile_dataset_file):
        df = pd.read_csv(tactile_dataset_file)
        # 提取每个数据块的信息
        block_info = df.groupby('block_id').apply(
            lambda x: (x.name, x.index[0], x.index[-1], x['touch_type'].iloc[0])
        ).tolist()
        return block_info
    else:
        raise FileNotFoundError(f"未找到文件 {tactile_dataset_file}")

def check_overlap(window_df, block_info):
    """检查窗口数据块是否与tactile_dataset.csv中的数据块有重叠"""
    overlap_blocks = []
    window_start = window_df.index[0]
    window_end = window_df.index[-1]

    for block_id, block_start, block_end, block_touch_type in block_info:
        # Check for any overlap (Partial Overlap)
        if (window_end >= block_start) and (window_start <= block_end):
            overlap_blocks.append((block_id, block_touch_type))

        # Check if the window is completely within the block (Complete Containment: Window in Block)
        elif (window_start >= block_start) and (window_end <= block_end):
            overlap_blocks.append((block_id, block_touch_type))

        # Check if the block is completely within the window (Complete Containment: Block in Window)
        elif (block_start >= window_start) and (block_end <= window_end):
            overlap_blocks.append((block_id, block_touch_type))

    return overlap_blocks

def process_labeled_data(file_path, block_info, window_size=28, step_size=14):
    global global_window_id
    df = pd.read_csv(file_path)
    windows = []

    # 滑动窗口切割
    for start in range(0, len(df) - window_size + 1, step_size):
        end = start + window_size
        window_df = df.iloc[start:end].copy()  # 切割窗口

        # 检查窗口是否与 tactile_dataset.csv 的任何数据块重叠
        overlap_blocks = check_overlap(window_df, block_info)

        # 如果有重叠
        if overlap_blocks:
            block_ids, touch_types = zip(*overlap_blocks)
            window_df['block_id'] = ','.join(map(str, block_ids))  # 记录重叠的 block_id
            window_df['touch_type'] = ','.join(touch_types)  # 记录重叠的 touch_type
        else:
            window_df['block_id'] = 'NA'  # 没有重叠则标记为 NA
            window_df['touch_type'] = 'NC'  # 没有重叠则标记为 NC (No Contact)

        window_df['window_id'] = global_window_id  # 为每个窗口分配全局唯一ID
        windows.append(window_df)
        global_window_id += 1  # 增加全局窗口ID
    
    return windows

def process_all_folders(data_folder, tactile_dataset_path, output_file, window_size, step_size):
    """处理所有子文件夹，生成带有窗口标记的新 CSV 文件"""
    block_info = load_tactile_dataset(tactile_dataset_path)
    all_windows = []

    for folder_name in os.listdir(data_folder):
        folder_path = os.path.join(data_folder, folder_name)
        if os.path.isdir(folder_path):
            file_path = os.path.join(folder_path, 'labeled_data.csv')
            if os.path.isfile(file_path):
                windows = process_labeled_data(file_path, block_info, window_size, step_size)
                all_windows.extend(windows)

    if all_windows:
        result_df = pd.concat(all_windows, ignore_index=True)
        output_path = os.path.join(output_file, 'windowed_dataset.csv')
        
        # 如果文件存在则删除
        if os.path.exists(output_path):
            os.remove(output_path)
        
        result_df.to_csv(output_path, index=False)
        print(f"windowed_dataset.csv 文件已生成，包含 {len(all_windows)} 个窗口")
    else:
        print("没有数据块被处理")


In [16]:
data_folder = '../DATA/Labeled_data'
tactile_dataset_path='../DATA/tactile_dataset.csv'
output_file = '../DATA/'
window_size = 28
step_size = 14  # 前移步长

# 处理所有文件夹并生成最终输出
process_all_folders(data_folder, tactile_dataset_path, output_file, window_size, step_size)

windowed_dataset.csv 文件已生成，包含 13987 个窗口


In [11]:
# 先验证文件总行数是否一致，应是原来数据行数的两倍
data_folder = '../DATA/Labeled_data'
total_row = 0
for folder_name in os.listdir(data_folder):
    folder_path = os.path.join(data_folder,folder_name)
    file_path = os.path.join(folder_path,'labeled_data.csv')
    print(f"当前路径是：{file_path}")

    if os.path.isfile(file_path):
        df = pd.read_csv(file_path)
        row_count = len(df)
        print(f"当前文件共有{row_count}行")
        total_row += row_count
        print(f"目前共有{total_row} 行")
print(f"共有{total_row} 行")

当前路径是：../DATA/Labeled_data/0724-7DT-Y1/labeled_data.csv
当前文件共有23022行
目前共有23022 行
当前路径是：../DATA/Labeled_data/0724-7ST-Y1/labeled_data.csv
当前文件共有30682行
目前共有53704 行
当前路径是：../DATA/Labeled_data/0724-7ST-S1/labeled_data.csv
当前文件共有20499行
目前共有74203 行
当前路径是：../DATA/Labeled_data/0724-7DT-S1/labeled_data.csv
当前文件共有23775行
目前共有97978 行
当前路径是：../DATA/Labeled_data/0724-7P-Y1/labeled_data.csv
当前文件共有26122行
目前共有124100 行
当前路径是：../DATA/Labeled_data/0724-7P-S1/labeled_data.csv
当前文件共有18024行
目前共有142124 行
当前路径是：../DATA/Labeled_data/0724-7G-S1/labeled_data.csv
当前文件共有22884行
目前共有165008 行
当前路径是：../DATA/Labeled_data/0724-7G-Y1/labeled_data.csv
当前文件共有30971行
目前共有195979 行
共有195979 行


In [18]:
#验证

window_data= pd.read_csv("/Users/shangyu/Documents/GitHub/tactileGestureDetection/DATA/windowed_dataset.csv")
window_data
# # window_data.head()
grouped_data = window_data.groupby(['window_id','block_id'])
grouped_data
for name, group in grouped_data:
    print(f"Group name: {name}")
    # print(group)

# contact_count = window_data['block_id'].unique()
# contact_count
# print(f"there are {len(contact_count)} contact block") # 453-511=58 少了58个contact block, 为什么？

# filtered = window_data[window_data['block_id'] =='NA']
# filtered 

Group name: (0, '0')
Group name: (1, '0,1')
Group name: (2, '0,1')
Group name: (3, '1,2')
Group name: (4, '1,2')
Group name: (5, '2')
Group name: (6, '2,3')
Group name: (7, '2,3')
Group name: (8, '3')
Group name: (9, '3,4')
Group name: (10, '3,4')
Group name: (11, '4')
Group name: (12, '4,5')
Group name: (13, '4,5')
Group name: (14, '5')
Group name: (15, '5,6')
Group name: (16, '5,6')
Group name: (17, '6,7')
Group name: (18, '6,7')
Group name: (19, '7')
Group name: (20, '7,8')
Group name: (21, '7,8')
Group name: (22, '8,9')
Group name: (23, '8,9')
Group name: (24, '9')
Group name: (25, '9,10')
Group name: (26, '9,10')
Group name: (27, '10,11')
Group name: (28, '10,11')
Group name: (29, '11,12')
Group name: (30, '11,12')
Group name: (31, '12')
Group name: (32, '12,13')
Group name: (33, '12,13')
Group name: (34, '13')
Group name: (35, '13,14')
Group name: (36, '13,14')
Group name: (37, '14')
Group name: (38, '14,15')
Group name: (39, '15')
Group name: (40, '15,16')
Group name: (41, '15,1

In [16]:
contact_data= pd.read_csv("/Users/shangyu/Documents/GitHub/tactileGestureDetection/DATA/tactile_dataset.csv")
contact_counts = contact_data['block_id'].nunique()
print(f"there are {contact_counts} contact block")

there are 511 contact block


In [None]:
# 单个文件验证
