In [23]:
import pandas as pd
import os

current_path = os.getcwd()
audio_scale_path = os.path.join(current_path, "data/audio_scale.xlsx")
audio_file_path = os.path.join(current_path, "data/audio_filelist")
audio_wav_path = os.path.join(current_path, "data/audio_wav")
audio_wav_vad_path = os.path.join(current_path, "data/audio_wav_vad")
feature_file = os.path.join(current_path, "features")

In [24]:
df = pd.read_excel(audio_scale_path)

# 提取特定的列，比如列名为 'column_name'
column1 = df['量表完成时间']
column2 = df['联系电话']
column3 = df['抑郁得分']

# 将3列数据拼接在一起
concatenated_columns = pd.concat([column1, column2, column3], axis=1)

# 打印提取的列
# print(concatenated_columns)

audio_name = []
labels = []
for index, row in concatenated_columns.iterrows():
    # index 是行索引，row 是包含每一行数据的 Series 对象
    # 访问每一行的数据
    audio_name.append(str(row['联系电话']) + '_' + row['量表完成时间'].replace('-', '_') + '.amr')
    labels.append(row['抑郁得分'])

audio_path = [os.path.join(audio_file_path, audio) for audio in audio_name]  # 音频文件路径 list

In [27]:
from pydub import AudioSegment
import subprocess
import librosa
import opensmile
from acoustic_feature import *

# # 遍历amr文件，转换为wav文件
# files = os.listdir(audio_file_path)
# for file in files:
#     # 构建完整的文件路径
#     file_path = os.path.join(audio_file_path, file)
#     print(file_path)
#     command = f"ffmpeg -i {file_path} -ar 16000 -ac 1 {os.path.join(audio_wav_path, file.replace('amr', 'wav'))}"
#     subprocess.call(command, shell=True)

# # 读取wav文件，并完成vad处理
# wav_files = os.listdir(audio_wav_path)

# for wav in wav_files:
#     wav_path = os.path.join(audio_wav_path, wav)
#     wave_file_vad = wav_path.split(".")[0] + "_vad.wav"

#     # 端点检测
#     vad = VAD(wav_path, min_interval=15, pt=False)
#     sf.write(wave_file_vad.replace('audio_wav', 'audio_wav_vad'), vad.wav_dat_utterance, 16000, "PCM_16")

#     # print(wav_path)
#     # print(wave_file_vad.replace('audio_wav', 'audio_wav_vad'))
#     # break

# 提取音频特征
wav_vad_files = os.listdir(audio_wav_vad_path)

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

ComParE_2016_Functionals_feature_path = os.path.join(feature_file, "ComParE_2016_Functionals_features.csv")
count = 0
for wav_vad in wav_vad_files:
    wav_vad_path = os.path.join(audio_wav_vad_path, wav_vad)
    print(wav_vad_path)
    feature = smile.process_file(wav_vad_path)
    
    if count == 0:
        feature.to_csv(ComParE_2016_Functionals_feature_path, mode='a', index=False)
    else:
        feature.to_csv(ComParE_2016_Functionals_feature_path, mode='a', header=False, index=False)
    count += 1


d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13193950052_2020_10_08_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13193950052_2020_11_15_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13295080228_2020_10_06_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13295080228_2020_12_19_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13295080228_2021_01_14_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13461169518_2020_10_05_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13523238095_2020_10_08_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13523238095_2020_11_11_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13598622617_2020_10_07_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13598622617_2020_12_13_vad.wav
d:\PycharmProjects\Speech_Depression\data/audio_wav_vad\13837160646_2020_10_08_vad.wav
d:\PycharmProjects\Speech_Depression\data/a

In [30]:
from torch.utils.data import Dataset, DataLoader

class AudioDataset(Dataset):
    def __init__(self, feature_file, label_file, transform=None):
        self.feature_file = feature_file
        self.label_file = label_file
        self.labels = self.load_annotations()
        self.features = self.load_features()
        self.transform = transform

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        feature = self.features[idx]
        label = self.labels[idx]
        if self.transform:
            sample = self.transform(sample)
        return feature, label

    def load_annotations(self):
        df = pd.read_excel(self.label_file)

        # 提取特定的列，比如列名为 'column_name'
        column1 = df['量表完成时间']
        column2 = df['联系电话']
        column3 = df['抑郁得分']

        # 将两列数据拼接在一起
        concatenated_columns = pd.concat([column1, column2, column3], axis=1)

        audio_name = []
        labels = []
        for index, row in concatenated_columns.iterrows():
            audio_name.append(str(row['联系电话']) + '_' + row['量表完成时间'].replace('-', '_') + '.amr')
            labels.append(row['抑郁得分'])
        return labels
    
    def load_features(self):
        # 读取 CSV 文件
        df = pd.read_csv(self.feature_file)

        # 将每一行作为一个列表
        rows_as_lists = df.values.tolist()
        return rows_as_lists

In [31]:
dataset = AudioDataset(feature_file=os.path.join(feature_file, 'ComParE_2016_Functionals_features.csv'), label_file = audio_scale_path)

In [32]:
dataloader = DataLoader(dataset, batch_size=61, shuffle=True)

In [34]:
len(dataset)

122