In [2]:
import os
import pandas as pd

global_window_id = 0

def process_labeled_data(file_path,window_size,step_size):
    """dealing with labeled_data.csv, splide data based on window"""
    global global_window_id
    df = pd.read_csv(file_path)
    tactile_contact_df = pd.read_csv("../DATA/tactile_dataset_contact.csv")

    windowed_data = []

    for start in range(0, len(df) - window_size + 1, step_size):
        end = start + window_size
        window = df.iloc[start:end].copy()
        window['window_id'] = global_window_id
        global_window_id += 1

        if (window['label'] == 0).all():
            window['tactile_type_window'] = 'NC'
            window['contact_id'] = None
        else:
            window_start_time = window.iloc[0]['time']
            window_end_time = window.iloc[-1]['time']
            matching_contacts = tactile_contact_df[
                (tactile_contact_df['time'] >= window_start_time) & 
                (tactile_contact_df['time'] <= window_end_time)
            ]

            if not matching_contacts.empty:
                window['tactile_type_window'] = matching_contacts.iloc[0]['touch_type']
                window['contact_id'] = matching_contacts.iloc[0]['contact_id']
            else:
                window['tactile_type_window'] = 'NC'
                window['contact_id'] = None
        
        windowed_data.append(window)
    
    return windowed_data

def process_single_folder(data_folder, folder_name, window_size, step_size):
    """处理指定文件夹下的labeled_data.csv，生成窗口化数据"""
    folder_path = os.path.join(data_folder, folder_name)
    
    if os.path.isdir(folder_path):  # Check if it's a directory
        file_path = os.path.join(folder_path, 'labeled_data.csv')
        
        if os.path.isfile(file_path):
            # tactile_type = folder_name.split('-')[1][1:]
            windowed_data = process_labeled_data(file_path, window_size, step_size)
            return windowed_data
        else:
            print(f"文件夹 {folder_name} 下没有找到labeled_data.csv文件")
            return []
    else:
        print(f"{folder_name} 不是有效的文件夹")
        return []


def process_all_folders(data_folder, output_file, window_size, step_size):
    """处理所有子文件夹，合并所有窗口化数据，生成labeled_data_window.csv文件"""
    all_windowed_data = []
    
    for folder_name in os.listdir(data_folder):
        folder_windowed_data = process_single_folder(data_folder, folder_name, window_size, step_size)
        all_windowed_data.extend(folder_windowed_data)
    
    if all_windowed_data:
        result_df = pd.concat(all_windowed_data, ignore_index=True)
        output_path = os.path.join(output_file, 'labeled_data_window.csv')
        
        # 删除已存在的文件
        if os.path.exists(output_path):
            os.remove(output_path)
        
        # 保存新文件
        result_df.to_csv(output_path, index=False)
        print(f"{output_path} 文件已生成，包含 {len(result_df)} 行数据")
    else:
        print("没有数据块被处理")


In [8]:
data_folder = '../DATA/Labeled_data'
output_file = '../DATA/'
window_size = 28
step_size = 14  # 前移步长

# 处理所有文件夹并生成最终输出
process_all_folders(data_folder, output_file, window_size, step_size)

.DS_Store 不是有效的文件夹
../DATA/labeled_data_window.csv 文件已生成，包含 509208 行数据


In [9]:
# 先验证文件总行数是否一致，应是原来数据行数的两倍
data_folder = '../DATA/Labeled_data'
total_row = 0
for folder_name in os.listdir(data_folder):
    folder_path = os.path.join(data_folder,folder_name)
    file_path = os.path.join(folder_path,'labeled_data.csv')
    print(f"当前路径是：{file_path}")

    if os.path.isfile(file_path):
        df = pd.read_csv(file_path)
        row_count = len(df)
        print(f"当前文件共有{row_count}行")
        total_row += row_count
        print(f"目前共有{total_row} 行")
print(f"共有{total_row} 行")

当前路径是：../DATA/Labeled_data/.DS_Store/labeled_data.csv
当前路径是：../DATA/Labeled_data/0724-7DT-Y1/labeled_data.csv
当前文件共有23022行
目前共有23022 行
当前路径是：../DATA/Labeled_data/0724-7ST-Y1/labeled_data.csv
当前文件共有30682行
目前共有53704 行
当前路径是：../DATA/Labeled_data/0724-7ST-S1/labeled_data.csv
当前文件共有20499行
目前共有74203 行
当前路径是：../DATA/Labeled_data/0724-7DT-S1/labeled_data.csv
当前文件共有23775行
目前共有97978 行
当前路径是：../DATA/Labeled_data/0820-0NC-Y/labeled_data.csv
当前文件共有58803行
目前共有156781 行
当前路径是：../DATA/Labeled_data/0724-7P-Y1/labeled_data.csv
当前文件共有26122行
目前共有182903 行
当前路径是：../DATA/Labeled_data/0724-7P-S1/labeled_data.csv
当前文件共有18024行
目前共有200927 行
当前路径是：../DATA/Labeled_data/0724-7G-S1/labeled_data.csv
当前文件共有22884行
目前共有223811 行
当前路径是：../DATA/Labeled_data/0724-7G-Y1/labeled_data.csv
当前文件共有30971行
目前共有254782 行
共有254782 行


In [10]:
#验证

window_data= pd.read_csv("/Users/shangyu/Documents/GitHub/tactileGestureDetection/DATA/labeled_data_window.csv")
window_data.columns
window_data.head()
grouped_data = window_data.groupby(['window_id','tactile_type_window'])
grouped_data
# for name, group in grouped_data:
#     print(f"Group name: {name}")
#     print(group)

contact_count = window_data['contact_id'].unique()
# contact_count
print(f"there are {len(contact_count)} contact block") # 453-511=58 少了58个contact block, 为什么？

there are 453 contact block


In [4]:
contact_data= pd.read_csv("/Users/shangyu/Documents/GitHub/tactileGestureDetection/DATA/tactile_dataset_contact.csv")
contact_counts = contact_data['contact_id'].nunique()
print(f"there are {contact_counts} contact block")

there are 511 contact block


In [None]:
# 单个文件验证
