## 生成标签文件

### CSV：标签+wav文件路径

In [1]:
import os

# 文件夹路径
data_folder = 'data'

# 两个人的文件夹名
person_folders = ['lijunjie', 'xuzhaoqi', 'chence', 'wusiyuan']

# 创建一个空列表来存储标签和文件路径
label_file_paths = []

# 遍历每个人的文件夹
for person in person_folders:
    person_path = os.path.join(data_folder, person)
    for i in range(1, 31):  # 假设每人有30个WAV文件
        wav_filename = f'{i}.wav'
        wav_filepath = os.path.join(person_path, wav_filename)
        label_file_paths.append(f'{person} {wav_filepath}')

# 将标签和文件路径写入CSV文件
output_csv = 'labels.csv'
with open(output_csv, 'w') as f:
    for line in label_file_paths:
        f.write(line + '\n')

print(f"标签文件已生成，保存在 {output_csv} 中。")

标签文件已生成，保存在 labels.csv 中。


### CSV：[能量，均值，标准差]+标签

In [4]:
import os
import pandas as pd
import numpy as np
import pywt
from scipy.io import wavfile

# 读取labels.csv文件
labels_df = pd.read_csv('data/full/labels.csv', delimiter=' ', header=None, names=['label', 'path'])

# 初始化特征向量列表
feature_vectors = []

# 遍历每个音频文件
for index, row in labels_df.iterrows():
    label = row['label']
    audio_path = row['path']
    
    # 读取音频文件
    rate, wavsignal = wavfile.read(audio_path)
    
    # 进行小波变换
    wavelet_name = 'db4'
    coeffs = pywt.wavedec(wavsignal, wavelet_name, level=4)
    
    # 获取高频部分的系数（假设这是第二个尺度的高频部分）
    high_coeffs = coeffs[2]
    
    # 提取时频特征（例如能量、均值等）
    feature_vector = [np.sum(np.square(high_coeffs)), np.mean(high_coeffs), np.std(high_coeffs)]
    
    # 添加标签
    feature_vector.append(label)
    
    # 添加到特征向量列表
    feature_vectors.append(feature_vector)

# 创建DataFrame
feature_df = pd.DataFrame(feature_vectors, columns=['energy', 'mean', 'std', 'label'])

# 将特征向量写入CSV文件
feature_df.to_csv('feature_vectors.csv', index=False)
print("特征向量已写入feature_vectors.csv文件。")

特征向量已写入feature_vectors.csv文件。


### CSV：[能量，均值，标准差，功率谱]+标签