In [1]:
"""
在labeled_data.csv中加’block_id‘和’touch_type‘列,生成labeled_tactile_dataset.csv
遇到DT,ST,P,G文件时，将所有未标记的数据标记为：
‘block_id’ = -1  ’touch_type‘= ’NC‘
"""
import os
import pandas as pd

global_block_id = 0

def process_labeledData(df, tactile_type):
    """处理单个labeled_data.csv文件，标记数据块和背景数据"""
    """Process a single labeled_data.csv file to tag data blocks and background data."""
    global global_block_id
    labeled_blocks = []
    total_blocks = 0
    
    in_block = False
    block_start = 0
    block_info = []

    # if tactile_type == 'NC':
    #     # Mark touched data as 'ST_abandon' and untouched data as 'NC'
    #     df.loc[df['label'] == 1, ['block_id', 'touch_type']] = [-1, 'ST_abandon']
    #     df.loc[df['label'] == 0, ['block_id', 'touch_type']] = [-2, 'NC']
    #     return df, total_blocks, block_info

    for i in range(len(df)):
        if df.iloc[i, df.columns.get_loc('label')] == 1 and not in_block:
            in_block = True
            block_start = i
        elif df.iloc[i, df.columns.get_loc('label')] == 0 and in_block:
            in_block = False
            block = df.iloc[block_start:i].copy()
            block['touch_type'] = tactile_type
            block['block_id'] = global_block_id  # 添加全局唯一数据块标识符 # Add global unique block identifier
            labeled_blocks.append(block)
            block_info.append((global_block_id, block_start, i - 1))  # 记录block信息 # Record block info
            global_block_id += 1  # 增加全局唯一数据块标识符 # Increment global block ID
            total_blocks += 1

    if in_block:
        block = df.iloc[block_start:].copy()
        block['touch_type'] = tactile_type
        block['block_id'] = global_block_id  # 添加全局唯一数据块标识符
        labeled_blocks.append(block)
        block_info.append((global_block_id, block_start, len(df) - 1))  # 记录block信息
        global_block_id += 1  # 增加全局唯一数据块标识符
        total_blocks += 1
    
    
    df['block_id'] = -1
    df['touch_type'] = 'NC'
    
    # 将处理后的数据块合并回原始DataFrame
    # Merge processed blocks back into the original DataFrame
    for block in labeled_blocks:
        df.update(block)
    
    return df, total_blocks, block_info

def calculate_zero_counts(df, block_info):
    """计算相邻数据块之间0的个数"""
    """Calculate the count of zeros between adjacent data blocks."""
    zero_counts = []
    for i in range(len(block_info) - 1):
        end_of_current_block = block_info[i][2]
        start_of_next_block = block_info[i + 1][1]
        zero_count = (df.iloc[end_of_current_block + 1:start_of_next_block]['label'] == 0).sum()
        zero_counts.append((block_info[i][0], block_info[i + 1][0], zero_count))
    return zero_counts

def determine_double_taps(zero_counts):
    """根据0的个数决定哪些单拍应该合并为双拍"""
    """Decide which single taps should merge into double taps based on zero counts."""
    zero_counts_values = [count[2] for count in zero_counts]
    zero_counts_values.sort()
    n = len(zero_counts_values)
    max_value_index = int((n / 2) / (n + 1) * n)
    max_value = zero_counts_values[max_value_index]
    min_value = zero_counts_values[0]
    
    double_tap_blocks = []
    for start_block_id, end_block_id, zero_count in zero_counts:
        if min_value <= zero_count <= max_value:
            double_tap_blocks.append((start_block_id, end_block_id))
    return double_tap_blocks

def merge_continuous_double_taps(double_tap_blocks):
    """合并连续的双拍块对"""
    """Merge continuous double tap block pairs."""
    merged_double_tap_blocks = []
    current_start = None
    current_end = None
    
    for start_block_id, end_block_id in double_tap_blocks:
        if current_start is None:
            current_start = start_block_id
            current_end = end_block_id
        elif start_block_id == current_end:
            current_end = end_block_id
        else:
            merged_double_tap_blocks.append((current_start, current_end))
            current_start = start_block_id
            current_end = end_block_id
    
    if current_start is not None:
        merged_double_tap_blocks.append((current_start, current_end))
    
    return merged_double_tap_blocks

def merge_double_taps(df, block_info, double_tap_blocks, start_block_id):
    """合并确定的双拍数据块"""
    """Merge identified double tap data blocks."""
    merged_double_tap_blocks = merge_continuous_double_taps(double_tap_blocks)
    merged_blocks = []
    new_block_id = start_block_id

    for start_block_id, end_block_id in merged_double_tap_blocks:
        start_row = next(bi[1] for bi in block_info if bi[0] == start_block_id)
        end_row = next(bi[2] for bi in block_info if bi[0] == end_block_id)
        
        merged_block = df.iloc[start_row:end_row + 1].copy()
        merged_block['block_id'] = new_block_id  # 使用新的连续编号
        merged_block['touch_type'] = 'DT'  # 标记为双拍
        
        merged_blocks.append(merged_block)
        new_block_id += 1

    return merged_blocks, new_block_id


def process_double_tap_folder(df, tactile_type, start_block_id):
    """处理双拍数据，将挨得非常近的两个连续块标记为双拍类型"""
    """Process double tap data and label closely adjacent blocks as double tap type."""
    df, total_blocks, block_info = process_labeledData(df, tactile_type)
    zero_counts = calculate_zero_counts(df, block_info)
    double_tap_blocks = determine_double_taps(zero_counts)
    merged_blocks, new_block_id = merge_double_taps(df, block_info, double_tap_blocks, start_block_id)
    
    # 将双拍块更新回df
    for block in merged_blocks:
        df.update(block)
    
    return df, total_blocks

def process_single_folder(data_folder, folder_name, start_block_id):
# def process_single_folder(data_folder, folder_name, start_block_id,output_file= None):
    """处理指定文件夹下的labeled_data.csv，并打印块和行的信息"""
    """Process labeled_data.csv in the specified folder and print block and row info."""
    folder_path = os.path.join(data_folder, folder_name)
    
    if os.path.isdir(folder_path):  # Check if it's a directory
        file_path = os.path.join(folder_path, 'labeled_data.csv')
        
        if os.path.isfile(file_path):
            tactile_type = folder_name.split('-')[1][1:]
            df = pd.read_csv(file_path)
            
            if tactile_type == 'DT':
                df, total_blocks = process_double_tap_folder(df, tactile_type, start_block_id)
            elif tactile_type in ['ST', 'G', 'P']:
                df, total_blocks, _ = process_labeledData(df, tactile_type)
            elif tactile_type == 'NC':
                df, total_blocks, _ = process_labeledData(df, tactile_type)
            else:
                print(f"未知的触碰类型: {tactile_type}")
                return pd.DataFrame(), 0
            
            print(f"文件夹 {folder_name}: 数据长度 = {len(df)}, 数据块个数 = {total_blocks}")
            # output_path = os.path.join(output_file, '2_specific_labeled_tactile_dataset.csv')
            # df.to_csv(output_path, index=False)
            return df, total_blocks
        else:
            print(f"文件夹 {folder_name} 下没有找到labeled_data.csv文件")
            return pd.DataFrame(), 0
    else:
        print(f"{folder_name} 不是有效的文件夹")
        return pd.DataFrame(), 0

def process_all_folders(data_folder, output_file):
    """处理所有子文件夹，合并所有数据块，生成labeled_tactile_dataset文件"""
    """Process all subfolders, merge all data blocks, and create labeled_tactile_dataset."""
    all_data = []
    total_blocks_count = 0
    
    for folder_name in os.listdir(data_folder):
        df, total_blocks = process_single_folder(data_folder, folder_name, global_block_id)
        if not df.empty:
            all_data.append(df)
            total_blocks_count += total_blocks
    
    if all_data:
        result_df = pd.concat(all_data, ignore_index=True)
        output_path = os.path.join(output_file, '2_labeled_tactile_dataset.csv')
        
        # 删除已存在的文件
        if os.path.exists(output_path):
            os.remove(output_path)
        
        # 保存新文件
        result_df.to_csv(output_path, index=False)
        print(f"2_labeled_tactile_dataset.csv 文件已生成，total length = {len(result_df)}, total_block = {total_blocks_count}")
    else:
        print("没有数据块被处理")

# 设置数据文件夹路径
data_folder = '../DATA/Labeled_data/'
folder_name = "0910-7ST-S1"
output_file = '../DATA/'
process_all_folders(data_folder, output_file)
# process_single_folder(data_folder, folder_name, start_block_id=1,output_file = output_file)

.DS_Store 不是有效的文件夹
文件夹 0910-7P-S2: 数据长度 = 7995, 数据块个数 = 15
文件夹 0910-7DT-S4: 数据长度 = 13475, 数据块个数 = 60
文件夹 0910-7P-S5: 数据长度 = 8868, 数据块个数 = 16
文件夹 0910-7DT-S3: 数据长度 = 14576, 数据块个数 = 60
文件夹 0910-7ST-S3: 数据长度 = 11675, 数据块个数 = 30
文件夹 0910-7ST-Y2: 数据长度 = 15724, 数据块个数 = 30
文件夹 0910-7P-S4: 数据长度 = 8677, 数据块个数 = 15
文件夹 0910-7ST-S2: 数据长度 = 12168, 数据块个数 = 30
文件夹 0910-7DT-S2: 数据长度 = 15183, 数据块个数 = 60
文件夹 0910-7P-S3: 数据长度 = 9301, 数据块个数 = 15
文件夹 0910-7DT-S5: 数据长度 = 14004, 数据块个数 = 60
文件夹 0910-7G-S5: 数据长度 = 9368, 数据块个数 = 15
文件夹 0910-7G-S2: 数据长度 = 10075, 数据块个数 = 15
文件夹 0910-7G-S3: 数据长度 = 11221, 数据块个数 = 15
文件夹 0910-7G-S4: 数据长度 = 13279, 数据块个数 = 15
文件夹 0910-7P-S1: 数据长度 = 8343, 数据块个数 = 15
文件夹 0910-7ST-S1: 数据长度 = 10883, 数据块个数 = 30
文件夹 0910-7DT-S1: 数据长度 = 14472, 数据块个数 = 60
文件夹 0910-7ST-Y1: 数据长度 = 17838, 数据块个数 = 30
文件夹 0910-7G-S1: 数据长度 = 9531, 数据块个数 = 15
2_labeled_tactile_dataset.csv 文件已生成，total length = 236656, total_block = 601


In [2]:
# 验证生成的数据 
import os
import pandas as pd
df = pd.read_csv("../DATA/2_labeled_tactile_dataset.csv")
df.head(100)
# dt.iloc[240:300]
# dt[dt['touch_type']=='DT']

Unnamed: 0,index,time,tau_J0,tau_J1,tau_J2,tau_J3,tau_J4,tau_J5,tau_J6,tau_J_d0,...,etau_J0,etau_J1,etau_J2,etau_J3,etau_J4,etau_J5,etau_J6,label,block_id,touch_type
0,1.0,0.000000,-0.148950,-33.260303,-1.629527,19.343332,0.447105,2.415051,-0.089855,0.0,...,0.148950,33.260303,1.629527,-19.343332,-0.447105,-2.415051,0.089855,0.0,-1.0,NC
1,2.0,0.004996,-0.113244,-33.220631,-1.558116,19.232248,0.447105,2.415051,-0.079784,0.0,...,0.113244,33.220631,1.558116,-19.232248,-0.447105,-2.415051,0.079784,0.0,-1.0,NC
2,3.0,0.009996,-0.113244,-33.220631,-1.558116,19.232248,0.447105,2.415051,-0.079784,0.0,...,0.113244,33.220631,1.558116,-19.232248,-0.447105,-2.415051,0.079784,0.0,-1.0,NC
3,4.0,0.015003,-0.148950,-33.260303,-1.593821,19.343332,0.465415,2.404980,-0.059643,0.0,...,0.148950,33.260303,1.593821,-19.343332,-0.465415,-2.404980,0.059643,0.0,-1.0,NC
4,5.0,0.020783,-0.148950,-33.260303,-1.593821,19.343332,0.465415,2.404980,-0.059643,0.0,...,0.148950,33.260303,1.593821,-19.343332,-0.465415,-2.404980,0.059643,0.0,-1.0,NC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96.0,0.474981,-0.224328,-33.299976,-1.629527,19.383005,0.447105,2.415051,-0.069713,0.0,...,0.224328,33.299976,1.629527,-19.383005,-0.447105,-2.415051,0.069713,0.0,-1.0,NC
96,97.0,0.479996,-0.224328,-33.299976,-1.629527,19.383005,0.447105,2.415051,-0.069713,0.0,...,0.224328,33.299976,1.629527,-19.383005,-0.447105,-2.415051,0.069713,0.0,-1.0,NC
97,98.0,0.484995,-0.224328,-33.299976,-1.593821,19.343332,0.465415,2.415051,-0.069713,0.0,...,0.224328,33.299976,1.593821,-19.343332,-0.465415,-2.415051,0.069713,0.0,-1.0,NC
98,99.0,0.489995,-0.224328,-33.299976,-1.593821,19.343332,0.465415,2.415051,-0.069713,0.0,...,0.224328,33.299976,1.593821,-19.343332,-0.465415,-2.415051,0.069713,0.0,-1.0,NC


In [5]:
print(df['block_id'].nunique())
print("---")
print(df.groupby('touch_type')['block_id'].nunique())
print("---")
print(df['block_id'].unique())


452
---
touch_type
DT    150
G      75
NC      1
P      76
ST    150
Name: block_id, dtype: int64
---
[ -1.   0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.
  13.  14.  15.  16.  17.  18.  19.  20.  21.  22.  23.  24.  25.  26.
  27.  28.  29.  30.  31.  32.  33.  34.  35.  36.  37.  38.  39.  40.
  41.  42.  43.  44.  75.  76.  77.  78.  79.  80.  81.  82.  83.  84.
  85.  86.  87.  88.  89.  90.  91.  92.  93.  94.  95.  96.  97.  98.
  99. 100. 101. 102. 103. 104. 105. 106. 107. 108. 109. 110. 111. 112.
 113. 114. 115. 116. 117. 118. 119. 120. 151. 152. 153. 154. 155. 156.
 157. 158. 159. 160. 161. 162. 163. 164. 165. 166. 167. 168. 169. 170.
 171. 172. 173. 174. 175. 176. 177. 178. 179. 180. 181. 182. 183. 184.
 185. 186. 187. 188. 189. 190. 191. 192. 193. 194. 195. 196. 197. 198.
 199. 200. 201. 202. 203. 204. 205. 206. 207. 208. 209. 210. 211. 212.
 213. 214. 215. 216. 217. 218. 219. 220. 221. 222. 223. 224. 225. 226.
 227. 228. 229. 230. 231. 232. 233. 234. 235. 

In [6]:
df[df['touch_type']=='NC']
# nc['block_id'].unique()
# nc[nc['block_id']==0]
df[df['block_id']==-1]

Unnamed: 0,index,time,tau_J0,tau_J1,tau_J2,tau_J3,tau_J4,tau_J5,tau_J6,tau_J_d0,...,etau_J0,etau_J1,etau_J2,etau_J3,etau_J4,etau_J5,etau_J6,label,block_id,touch_type
0,1.0,0.000000,-0.148950,-33.260303,-1.629527,19.343332,0.447105,2.415051,-0.089855,0.0,...,0.148950,33.260303,1.629527,-19.343332,-0.447105,-2.415051,0.089855,0.0,-1.0,NC
1,2.0,0.004996,-0.113244,-33.220631,-1.558116,19.232248,0.447105,2.415051,-0.079784,0.0,...,0.113244,33.220631,1.558116,-19.232248,-0.447105,-2.415051,0.079784,0.0,-1.0,NC
2,3.0,0.009996,-0.113244,-33.220631,-1.558116,19.232248,0.447105,2.415051,-0.079784,0.0,...,0.113244,33.220631,1.558116,-19.232248,-0.447105,-2.415051,0.079784,0.0,-1.0,NC
3,4.0,0.015003,-0.148950,-33.260303,-1.593821,19.343332,0.465415,2.404980,-0.059643,0.0,...,0.148950,33.260303,1.593821,-19.343332,-0.465415,-2.404980,0.059643,0.0,-1.0,NC
4,5.0,0.020783,-0.148950,-33.260303,-1.593821,19.343332,0.465415,2.404980,-0.059643,0.0,...,0.148950,33.260303,1.593821,-19.343332,-0.465415,-2.404980,0.059643,0.0,-1.0,NC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236651,9527.0,47.629988,-0.264001,-33.415028,-1.704905,19.267954,0.511192,2.468151,-0.079784,0.0,...,0.264001,33.415028,1.704905,-19.267954,-0.511192,-2.468151,0.079784,0.0,-1.0,NC
236652,9528.0,47.634999,-0.299707,-33.450733,-1.669200,19.343332,0.538658,2.468151,-0.069713,0.0,...,0.299707,33.450733,1.669200,-19.343332,-0.538658,-2.468151,0.069713,0.0,-1.0,NC
236653,9529.0,47.639985,-0.299707,-33.450733,-1.669200,19.343332,0.538658,2.468151,-0.069713,0.0,...,0.299707,33.450733,1.669200,-19.343332,-0.538658,-2.468151,0.069713,0.0,-1.0,NC
236654,9530.0,47.645006,-0.264001,-33.415028,-1.744578,19.267954,0.529502,2.478222,-0.099926,0.0,...,0.264001,33.415028,1.744578,-19.267954,-0.529502,-2.478222,0.099926,0.0,-1.0,NC


In [None]:
#不要运行这个代码，这是一个测试
import os
import pandas as pd

global_block_id = 0

def process_labeledDta(file_path, tactile_type):
    """处理单个labeled_data.csv文件，将连续label为1的数据块标记为指定触碰类型"""
    global global_block_id
    df = pd.read_csv(file_path)
    labeled_blocks = []
    block_info = []

    in_block = False
    block_start = 0

    for i in range(len(df)):
        if df.iloc[i, df.columns.get_loc('label')] == 1 and not in_block:
            in_block = True
            block_start = i
        elif df.iloc[i, df.columns.get_loc('label')] == 0 and in_block:
            in_block = False
            block = df.iloc[block_start:i].copy()
            block['touch_type'] = tactile_type
            block['block_id'] = global_block_id  # 添加全局唯一数据块标识符
            labeled_blocks.append(block)
            block_info.append((global_block_id, block_start, i - 1))  # 记录块的ID、开始行和结束行
            global_block_id += 1  # 增加全局唯一数据块标识符

    if in_block:
        block = df.iloc[block_start:].copy()
        block['touch_type'] = tactile_type
        block['block_id'] = global_block_id  # 添加全局唯一数据块标识符
        labeled_blocks.append(block)
        block_info.append((global_block_id, block_start, len(df) - 1))  # 记录块的ID、开始行和结束行
        global_block_id += 1  # 增加全局唯一数据块标识符
    
    return labeled_blocks, block_info

def calculate_zero_counts(df, block_info):
    """计算相邻数据块之间0的个数"""
    zero_counts = []
    for i in range(len(block_info) - 1):
        end_of_current_block = block_info[i][2]
        start_of_next_block = block_info[i + 1][1]
        zero_count = (df.iloc[end_of_current_block + 1:start_of_next_block]['label'] == 0).sum()
        zero_counts.append((block_info[i][0], block_info[i + 1][0], zero_count))
    return zero_counts

def determine_double_taps(zero_counts):
    """根据0的个数决定哪些单拍应该合并为双拍"""
    zero_counts_values = [count[2] for count in zero_counts]
    zero_counts_values.sort()
    n = len(zero_counts_values)
    max_value_index = int((n / 2) / (n + 1) * n)
    max_value = zero_counts_values[max_value_index]
    min_value = zero_counts_values[0]
    
    double_tap_blocks = []
    for start_block_id, end_block_id, zero_count in zero_counts:
        if min_value <= zero_count <= max_value:
            double_tap_blocks.append((start_block_id, end_block_id))
    return double_tap_blocks

def merge_continuous_double_taps(double_tap_blocks):
    """合并连续的双拍块对"""
    merged_double_tap_blocks = []
    current_start = None
    current_end = None
    
    for start_block_id, end_block_id in double_tap_blocks:
        if current_start is None:
            current_start = start_block_id
            current_end = end_block_id
        elif start_block_id == current_end:
            current_end = end_block_id
        else:
            merged_double_tap_blocks.append((current_start, current_end))
            current_start = start_block_id
            current_end = end_block_id
    
    if current_start is not None:
        merged_double_tap_blocks.append((current_start, current_end))
    
    return merged_double_tap_blocks

def merge_double_taps(df, block_info, double_tap_blocks, start_block_id):
    """合并确定的双拍数据块"""
    merged_double_tap_blocks = merge_continuous_double_taps(double_tap_blocks)
    merged_blocks = []
    new_block_id = start_block_id

    for start_block_id, end_block_id in merged_double_tap_blocks:
        start_row = next(bi[1] for bi in block_info if bi[0] == start_block_id)
        end_row = next(bi[2] for bi in block_info if bi[0] == end_block_id)
        
        merged_block = df.iloc[start_row:end_row + 1].copy()
        merged_block['block_id'] = new_block_id  # 使用新的连续编号
        merged_block['touch_type'] = 'DT'  # 标记为双拍
        
        merged_blocks.append(merged_block)
        new_block_id += 1

    return merged_blocks, new_block_id

def process_double_tap_folder(df, file_path, tactile_type, start_block_id):
    """处理双拍数据，将挨得非常近的两个连续块标记为双拍类型"""
    blocks, block_info = process_labeledDta(file_path, tactile_type)
    zero_counts = calculate_zero_counts(df, block_info)
    double_tap_blocks = determine_double_taps(zero_counts)
    merged_blocks, new_block_id = merge_double_taps(df, block_info, double_tap_blocks, start_block_id)
    return merged_blocks, new_block_id

def process_single_folder(data_folder, folder_name, start_block_id):
    """处理指定文件夹下的labeled_data.csv，统计数据块个数，并打印块的开始行和结束行"""
    folder_path = os.path.join(data_folder, folder_name)
    
    if os.path.isdir(folder_path):  # Check if it's a directory
        file_path = os.path.join(folder_path, 'labeled_data.csv')
        
        if os.path.isfile(file_path):
            tactile_type = folder_name.split('-')[1][1:]
            df = pd.read_csv(file_path)
            original_length = len(df)
            
            if tactile_type == 'DT':
                blocks, new_block_id = process_double_tap_folder(df, file_path, tactile_type, start_block_id)
            elif tactile_type in ['ST', 'G', 'P']:
                blocks, _ = process_labeledDta(file_path, tactile_type)
                new_block_id = start_block_id + len(blocks)
            else:
                blocks = []
                new_block_id = start_block_id
            
            print(f"文件夹: {folder_name}, 总行数: {original_length}, 数据块个数: {len(blocks)}")
            return blocks, new_block_id
        else:
            print(f"文件夹 {folder_name} 下没有找到labeled_data.csv文件")
            return [], start_block_id
    else:
        print(f"{folder_name} 不是有效的文件夹")
        return [], start_block_id

def process_all_folders(data_folder, output_file):
    """处理所有子文件夹，合并所有数据块，生成tactile_dataset.csv文件"""
    all_blocks = []
    global global_block_id
    global_block_id = 0  # Reset global_block_id at the start
    total_blocks = 0
    
    for folder_name in os.listdir(data_folder):
        folder_blocks, global_block_id = process_single_folder(data_folder, folder_name, global_block_id)
        all_blocks.extend(folder_blocks)
        total_blocks += len(folder_blocks)
    
    if all_blocks:
        result_df = pd.concat(all_blocks, ignore_index=True)
        output_path = os.path.join(output_file, 'labeled_tactile_dataset.csv')
        
        # 删除已存在的文件
        if os.path.exists(output_path):
            os.remove(output_path)
        
        # 保存新文件
        result_df.to_csv(output_path, index=False)
        print(f"labeled_tactile_dataset.csv 文件已生成，总行数: {len(result_df)}，总数据块个数: {total_blocks}")
    else:
        print("没有数据块被处理")

# 设置数据文件夹路径
data_folder = '../DATA/Labeled_data'
output_file = '../DATA/'
process_all_folders(data_folder, output_file)


