## 生成标签文件

### CSV：标签+wav文件路径

In [7]:
import os

# 文件夹路径
data_folder = 'data'

# 两个人的文件夹名
# person_folders = ['lijunjie', 'xuzhaoqi', 'chence', 'wusiyuan']
person_folders = ['xuzhaoqi_p2']

# 创建一个空列表来存储标签和文件路径
label_file_paths = []

# 遍历每个人的文件夹
for person in person_folders:
    person_path = os.path.join(data_folder, person)
    for i in range(1, 31):  # 假设每人有30个WAV文件
        wav_filename = f'{i}.wav'
        wav_filepath = os.path.join(person_path, wav_filename)
        label_file_paths.append(f'{person} {wav_filepath}')

# 将标签和文件路径写入CSV文件
output_csv = '240402_xzq_p2.csv'
with open(output_csv, 'w') as f:
    for line in label_file_paths:
        f.write(line + '\n')

print(f"标签文件已生成，保存在 {output_csv} 中。")

标签文件已生成，保存在 240402_xzq_p2.csv 中。


In [3]:
import os
import csv

# 指定音频文件所在的目录
directory = 'data/1/chence'

# 指定输出CSV文件的名称和路径
output_csv_file = 'chence_labelpath.csv'

# 创建或打开CSV文件，并准备写入
with open(output_csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Label', 'File Path'])
    
    # 遍历指定目录下的所有文件
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):  # 确保处理的是wav文件
            # 从文件名解析出标签
            label = filename.split('_')[0]
            # 构建文件的完整路径
            file_path = os.path.join(directory, filename)
            # 写入当前文件的标签和路径到CSV文件
            writer.writerow([label, file_path])

print(f"CSV file '{output_csv_file}' has been created successfully.")

CSV file 'xuzhaoqi_labelpath.csv' has been created successfully.


### CSV：[能量，均值，标准差]+标签

In [6]:
import os
import pandas as pd
import numpy as np
import pywt
from scipy.io import wavfile

# 读取audio_files.csv文件，确保分隔符与创建CSV时一致
labels_df = pd.read_csv('lijunjie_labelpath.csv', delimiter=',', header=None, names=['label', 'path'])

# 初始化特征向量列表
feature_vectors = []

# 遍历每个音频文件
for index, row in labels_df.iterrows():
    label = row['label']
    audio_path = row['path']
    
    # 读取音频文件
    rate, wavsignal = wavfile.read(audio_path)
    
    # 如果音频是立体声，取第一个通道
    if wavsignal.ndim > 1:
        wavsignal = wavsignal[:, 0]
    
    # 进行小波变换
    wavelet_name = 'db4'
    try:
        coeffs = pywt.wavedec(wavsignal, wavelet_name, level=4)
    except ValueError as e:
        print(f"Error processing file {audio_path}: {e}")
        continue
    
    # 获取高频部分的系数（假设这是第二个尺度的高频部分）
    high_coeffs = coeffs[2]
    
    # 提取时频特征（例如能量、均值等）
    feature_vector = [np.sum(np.square(high_coeffs)), np.mean(high_coeffs), np.std(high_coeffs)]
    
    # 添加标签
    feature_vector.append(label)
    
    # 添加到特征向量列表
    feature_vectors.append(feature_vector)

# 创建DataFrame
feature_df = pd.DataFrame(feature_vectors, columns=['energy', 'mean', 'std', 'label'])

# 将特征向量写入CSV文件
feature_df.to_csv('lijunjie_xiaobo_vectors.csv', index=False)
print("特征向量已写入lijunjie_xiaobo_vectors.csv文件。")


特征向量已写入lijunjie_xiaobo_vectors.csv文件。


### CSV：[能量，均值，标准差，功率谱]+标签