In [5]:
# 用窗口取数, 
import os
import pandas as pd
import numpy as np

# 参数设置
window_size = 28
step_size = 14

# 读取数据
input_file = '../DATA/3_labeled_tactile_dataset_clean.csv'
output_file = '../DATA/4_labeled_window_dataset.csv'
output_file_normalization = '../DATA/4_labeled_window_dataset_Normalization.csv'

df = pd.read_csv(input_file)

# 初始化存储窗口信息的列表
windowed_data = []

# 初始化 window_id
window_id = 0

normalization = True


# 循环以 step_size 步长遍历数据
for start in range(0, len(df) - window_size + 1, step_size):
    end = start + window_size

    window = df.iloc[start:end].copy()

    if normalization:
        cols_to_normalize = [
            'e0', 'e1', 'e2', 'e3', 'e4', 'e5', 'e6', 
            'de0', 'de1', 'de2', 'de3', 'de4', 'de5', 'de6', 
            'tau_J0', 'tau_J1', 'tau_J2', 'tau_J3', 'tau_J4', 'tau_J5', 'tau_J6', 
            'tau_ext0', 'tau_ext1', 'tau_ext2', 'tau_ext3', 'tau_ext4', 'tau_ext5', 'tau_ext6'
        ]
        
        # 对每一列进行归一化
        for col in cols_to_normalize:
            mean_col = np.mean(window[col])
            std_col = np.std(window[col])
            # window[col] = (window[col] - mean_col) / (std_col + 1e-5)
            window[col] = window[col] / (std_col + 1e-5)

    # 生成 window_id
    window['window_id'] = window_id
    window_id += 1  # 每次迭代增加 window_id
    
    # 判断窗口内 touch_type 是否全为 'NC'
    unique_touch_types = window['touch_type'].unique()
    if len(unique_touch_types) == 1 and unique_touch_types[0] == 'NC':
        window_touch_type = 'NC'
    else:
        # 如果不是全为 'NC'，取窗口内唯一的非 'NC' 的 touch_type
        non_nc_touch_types = [t for t in unique_touch_types if t != 'NC']
        window_touch_type = non_nc_touch_types[0] if non_nc_touch_types else 'NC'
    
    # 给窗口内所有行赋值 window_touch_type
    window['window_touch_type'] = window_touch_type
    
    # 添加窗口数据到列表
    windowed_data.append(window)

# 合并所有窗口数据
windowed_df = pd.concat(windowed_data, ignore_index=True)

if normalization:
    # 删除已存在的文件
    if os.path.exists(output_file_normalization):
        os.remove(output_file_normalization)

    # 保存到新的CSV文件
    windowed_df.to_csv(output_file_normalization, index=False)

else:

    if os.path.exists(output_file):
        os.remove(output_file)

    # 保存到新的CSV文件
    windowed_df.to_csv(output_file, index=False)
    

print(f"{output_file if not normalization else output_file_normalization} 文件已生成，总行数 = {len(windowed_df)}")


../DATA/4_labeled_window_dataset_Normalization.csv 文件已生成，总行数 = 244692


In [2]:
import os
import pandas as pd
import numpy as np
dt = pd.read_csv('../DATA/4_labeled_window_dataset.csv')
dt.head()
# # dt[dt['window_touch_type']=='ST']['window_id'].nunique()
# grouped = dt.groupby(['window_id', 'window_touch_type', 'touch_type']).size().reset_index(name='count')
# # # 展示结果
# print(grouped)
dt[dt['window_id']==0]
# # print(len(dt)/28)

Unnamed: 0,index,time,tau_J0,tau_J1,tau_J2,tau_J3,tau_J4,tau_J5,tau_J6,tau_J_d0,...,etau_J2,etau_J3,etau_J4,etau_J5,etau_J6,label,block_id,touch_type,window_id,window_touch_type
0,755,3.770009,-0.264001,-33.450733,-1.6692,19.267954,0.492881,2.457165,-0.018444,0.0,...,1.6692,-19.267954,-0.492881,-2.457165,0.018444,1,0.0,DT,0,DT
1,756,3.775012,-0.264001,-33.415028,-1.704905,19.267954,0.483726,2.468151,-0.069713,0.0,...,1.704905,-19.267954,-0.483726,-2.468151,0.069713,1,0.0,DT,0,DT
2,757,3.78002,-0.264001,-33.415028,-1.704905,19.267954,0.483726,2.468151,-0.069713,0.0,...,1.704905,-19.267954,-0.483726,-2.468151,0.069713,1,0.0,DT,0,DT
3,758,3.785017,-0.410791,-34.216419,-1.815989,19.867014,1.125511,4.366039,-0.141125,0.0,...,1.815989,-19.867014,-1.125511,-4.366039,0.141125,1,0.0,DT,0,DT
4,759,3.790012,-0.410791,-34.216419,-1.815989,19.867014,1.125511,4.366039,-0.141125,0.0,...,1.815989,-19.867014,-1.125511,-4.366039,0.141125,1,0.0,DT,0,DT
5,760,3.795004,-2.993494,-38.913685,-4.434398,24.996714,1.088889,3.631786,-0.059643,0.0,...,4.434398,-24.996714,-1.088889,-3.631786,0.059643,1,0.0,DT,0,DT
6,761,3.800002,-2.993494,-38.913685,-4.434398,24.996714,1.088889,3.631786,-0.059643,0.0,...,4.434398,-24.996714,-1.088889,-3.631786,0.059643,1,0.0,DT,0,DT
7,762,3.80501,-4.977136,-44.717823,-6.933787,28.62678,0.822471,3.956799,-0.099926,0.0,...,6.933787,-28.62678,-0.822471,-3.956799,0.099926,1,0.0,DT,0,DT
8,763,3.810013,-4.977136,-44.717823,-6.933787,28.62678,0.822471,3.956799,-0.099926,0.0,...,6.933787,-28.62678,-0.822471,-3.956799,0.099926,1,0.0,DT,0,DT
9,764,3.815004,-4.564538,-48.343922,-6.711619,30.42396,0.859092,4.093212,-0.059643,0.0,...,6.711619,-30.42396,-0.859092,-4.093212,0.059643,1,0.0,DT,0,DT


In [4]:
dt = pd.read_csv('../DATA/4_labeled_window_dataset_Normalization.csv')
dt.head()
dt.columns

Index(['index', 'time', 'tau_J0', 'tau_J1', 'tau_J2', 'tau_J3', 'tau_J4',
       'tau_J5', 'tau_J6', 'tau_J_d0', 'tau_J_d1', 'tau_J_d2', 'tau_J_d3',
       'tau_J_d4', 'tau_J_d5', 'tau_J_d6', 'tau_ext0', 'tau_ext1', 'tau_ext2',
       'tau_ext3', 'tau_ext4', 'tau_ext5', 'tau_ext6', 'q0', 'q1', 'q2', 'q3',
       'q4', 'q5', 'q6', 'q_d0', 'q_d1', 'q_d2', 'q_d3', 'q_d4', 'q_d5',
       'q_d6', 'dq0', 'dq1', 'dq2', 'dq3', 'dq4', 'dq5', 'dq6', 'dq_d0',
       'dq_d1', 'dq_d2', 'dq_d3', 'dq_d4', 'dq_d5', 'dq_d6', 'e0', 'e1', 'e2',
       'e3', 'e4', 'e5', 'e6', 'de0', 'de1', 'de2', 'de3', 'de4', 'de5', 'de6',
       'etau_J0', 'etau_J1', 'etau_J2', 'etau_J3', 'etau_J4', 'etau_J5',
       'etau_J6', 'label', 'block_id', 'touch_type', 'window_id',
       'window_touch_type'],
      dtype='object')

In [9]:
print(dt.groupby('window_touch_type')['window_id'].nunique())

window_touch_type
DT     839
G     1633
NC    4233
P     1571
ST     463
Name: window_id, dtype: int64
