In [1]:
import parselmouth
from pydub import AudioSegment
import glob
import numpy as np
import pandas as pd
from parselmouth.praat import call
import os
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [2]:
# 按照文件路径转化声音文件格式
# path = r"C:\Users\yx.fan\Desktop\语音筛查\voice_data\customer_service"   #客服录音和电话录音差异 句子的不同内容
# path = r"C:\Users\yx.fan\Desktop\语音筛查\voice_data\special_content"  #持续元音和连续句子
path = r"C:\Users\yx.fan\Desktop\语音筛查\voice_data\distance"         #声源距离远近
filter=[".m4a",".aac"] 
def other_format(dirname):
    result = []
    for maindir,subdir,file_name_list in os.walk(dirname):
        for filename in file_name_list:
            apath = os.path.join(maindir, filename)
            ext = os.path.splitext(apath)[1] 
            if ext in filter:
                result.append(apath)
    return result
other_format_files = other_format(path)
for filename in other_format_files:
    temp = AudioSegment.from_file(filename,format= filename[-3:])
    temp.export(filename[:-3] + 'wav',format = 'wav')
    os.remove(filename)

#在这一步部分预实验利用praat包手动分割音频片段

In [3]:
#不同声音参数
def measurePitch(voiceID, f0min, f0max, unit):
    sound = parselmouth.Sound(voiceID) # read the sound
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object
    meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
    intensity = call(sound, 'To Intensity', 75.0, 0.0, True)
    meanInten = call(intensity, 'Get mean', 2, 1, 'energy')
    sdInten = call(intensity, 'Get standard deviation', 2, 1)
    
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    
    return meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer,ddpJitter,ddaShimmer,meanInten,sdInten

In [4]:
#不同声音参数整合成数据框
file_list = []
mean_F0_list = []
sd_F0_list = []
hnr_list = []
localJitter_list = []
localabsoluteJitter_list = []
rapJitter_list = []
ppq5Jitter_list = []
ddpJitter_list = []
localShimmer_list = []
localdbShimmer_list = []
apq3Shimmer_list = []
aqpq5Shimmer_list = []
apq11Shimmer_list = []
ddaShimmer_list = []
meanInten_list = []
sdInten_list = []

# Go through all the wave files in the folder and measure pitch
for wave_file in glob.glob( path + "\\*.wav" ):
    sound = parselmouth.Sound(wave_file)
    (meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer,meanInten,sdInten) = measurePitch(sound, 50, 800, "Hertz")
    file_list.append(wave_file) # make an ID list
    mean_F0_list.append(meanF0) # make a mean F0 list
    sd_F0_list.append(stdevF0) # make a sd F0 list
    hnr_list.append(hnr)
    localJitter_list.append(localJitter)
    localabsoluteJitter_list.append(localabsoluteJitter)
    rapJitter_list.append(rapJitter)
    ppq5Jitter_list.append(ppq5Jitter)
    ddpJitter_list.append(ddpJitter)
    localShimmer_list.append(localShimmer)
    localdbShimmer_list.append(localdbShimmer)
    apq3Shimmer_list.append(apq3Shimmer)
    aqpq5Shimmer_list.append(aqpq5Shimmer)
    apq11Shimmer_list.append(apq11Shimmer)
    ddaShimmer_list.append(ddaShimmer)
    meanInten_list.append(meanInten)
    sdInten_list.append(sdInten)
df = pd.DataFrame(np.column_stack([file_list, mean_F0_list, sd_F0_list, hnr_list, localJitter_list, localabsoluteJitter_list, rapJitter_list, ppq5Jitter_list, ddpJitter_list, localShimmer_list, localdbShimmer_list, apq3Shimmer_list, aqpq5Shimmer_list, apq11Shimmer_list, ddaShimmer_list,meanInten_list,sdInten_list]),
                               columns=['voiceID', 'meanF0Hz', 'stdevF0Hz', 'HNR', 'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                        'ppq5Jitter', 'ddpJitter', 'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                        'apq11Shimmer', 'ddaShimmer','meanInten','sdInten'])  


In [6]:
df
df.to_csv("C:\\Users\\yx.fan\\Desktop\\语音筛查\\voice_data\\data_distance.csv")

In [5]:
#转化数据类型
for i in range(0,df.shape[0]):
    for j in range(1,df.shape[1]):
        df.iloc[i,j] = pd.to_numeric(df.iloc[i,j])

In [38]:
#句子的不同内容做出的影响 变异系数判断不同参数的离散程度
df.iloc[53:,]
pd.DataFrame(df.var()/df.mean())

  pd.DataFrame(df.var()/df.mean())
  pd.DataFrame(df.var()/df.mean())


Unnamed: 0,0
meanF0Hz,1.13349
stdevF0Hz,21.446027
HNR,0.083493
localJitter,0.000201
localabsoluteJitter,2e-06
rapJitter,0.000136
ppq5Jitter,0.000148
ddpJitter,0.001242
localShimmer,0.009139
localdbShimmer,0.000476


In [50]:
#持续元音和自然语句的声音参数对比分析
df_test = pd.DataFrame(columns=['meanF0Hz', 'stdevF0Hz', 'HNR', 'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                        'ppq5Jitter', 'ddpJitter', 'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                        'apq11Shimmer', 'ddaShimmer','meanInten','sdInten'])

new_index = df_test.index.tolist() + list(range(0,3)) #男 女 共同
df_test = df_test.reindex(new_index)
n = int(df.shape[0]/2)
p = int(df_test.shape[1])
n,p
for i in range(0,p):
    df_test.iloc[0,i] = stats.ttest_rel(df.iloc[0:n,i+1].tolist(),df.iloc[n:df.shape[0],i+1].tolist()).pvalue
    df_test.iloc[1,i] = stats.ttest_rel(df.iloc[[0,2,4,5],i+1].tolist(),df.iloc[[6,8,10,11],i+1].tolist()).pvalue
    df_test.iloc[2,i] = stats.ttest_rel(df.iloc[[1,3],i+1].tolist(),df.iloc[[7,9],i+1].tolist()).pvalue
df_test



Unnamed: 0,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,meanInten,sdInten
0,0.394247,0.010965,0.00085,3.5e-05,0.001362,0.000153,5.4e-05,0.000938,0.000504,0.005491,0.003625,0.001387,0.000153,0.005491,0.952313,0.380235
1,0.134489,0.061906,0.006696,0.001247,0.002556,0.005281,0.001951,0.006883,0.00268,0.012483,0.00502,0.004985,0.005281,0.012483,0.198633,0.286251
2,0.717413,0.230962,0.248455,0.099258,0.212316,0.111425,0.11665,0.095363,0.131437,0.233991,0.246079,0.197883,0.111425,0.233991,0.537704,0.072778


In [26]:
#客服录音和电话录音
df_test = pd.DataFrame(columns=['meanF0Hz', 'stdevF0Hz', 'HNR', 'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                        'ppq5Jitter', 'ddpJitter', 'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                        'apq11Shimmer', 'ddaShimmer','meanInten','sdInten'])

new_index = df_test.index.tolist() + list(range(0,1)) 
df_test = df_test.reindex(new_index)
n = int(df.shape[0]/2)
p = int(df_test.shape[1])
# n,p
for i in range(0,p):
    df_test.iloc[0,i] = stats.ttest_rel(df.iloc[0:n,i+1].tolist(),df.iloc[n:df.shape[0],i+1].tolist()).pvalue

df_test.to_csv("C:\\Users\\yx.fan\\Desktop\\语音筛查\\voice_data\\result_record.csv", index=False)

In [13]:
df_test

Unnamed: 0,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,meanInten,sdInten
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6e-06,0.070841,0.042411,0.0,6e-06,0.0,0.0


In [11]:
#声源距离远近对声音参数的影响
df_test = pd.DataFrame(columns=['meanF0Hz', 'stdevF0Hz', 'HNR', 'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                        'ppq5Jitter', 'ddpJitter', 'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                        'apq11Shimmer', 'ddaShimmer','meanInten','sdInten'])
new_index = df_test.index.tolist() + list(range(0,1))
df_test = df_test.reindex(new_index)
n = int(df.shape[0]/2)
p = int(df.shape[1])
for i in range(0,p-1):
    df_test.iloc[0,i] = stats.ttest_rel(df.iloc[0:n,i+1].tolist(),df.iloc[n:df.shape[0],i+1].tolist()).pvalue
df_test
df_test.to_csv("C:\\Users\\yx.fan\\Desktop\\语音筛查\\voice_data\\result_distance.csv", index=False)