In [1]:
import parselmouth

import numpy as np
import glob
import speech_recognition as sr
import pandas as pd
from parselmouth.praat import call

In [2]:
def extractPitch(sound, pitchFloor, pitchCeiling, unit, interpolation):
    pitch = call(sound, "To Pitch", 0.0, pitchFloor, pitchCeiling)
    minPitch = call(pitch, "Get minimum", 0, 0, unit, interpolation)
    maxPitch = call(pitch, "Get maximum", 0, 0, unit, interpolation)
    meanPitch = call(pitch, "Get mean", 0, 0, unit)
    sdPitch = call(pitch, "Get standard deviation", 0, 0, unit)

    return minPitch, maxPitch, meanPitch, sdPitch

In [3]:
def extractIntensity(sound, minPitch, timeStep,interpolation):
    intensity = call(sound, "To Intensity", minPitch, timeStep)
    minIntensity = call(intensity, "Get minimum", 0, 0, interpolation)
    maxIntensity = call(intensity, "Get maximum", 0, 0, interpolation)
    meanIntensity = call(intensity, "Get mean", 0, 0)
    sdIntensity = call(intensity, "Get standard deviation", 0, 0)

    return minIntensity, maxIntensity, meanIntensity, sdIntensity

In [4]:
def extractJitterAndShimmer(sound, pitchFloor, pitchCeiling):
    pitch = call(sound, "To Pitch", 0.0, pitchFloor, pitchCeiling)
    pointProcess = call(pitch, "To PointProcess")
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer = call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    return localJitter, localShimmer

In [5]:
def extractHNR(sound):
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    return hnr

In [6]:
def extractSpeakingRate(sound, transcriptionLength):
    totalDuration = call(sound, "Get total duration")
    speakingRate = transcriptionLength / totalDuration

    return speakingRate

In [7]:
pd_read = pd.read_csv('/Users/jifeng/Desktop/Medical Speech, Transcription, and Intent/overview-of-recordings-processed.csv', delimiter=',', names = ['audio_clipping', 'audio_clipping:confidence', 'background_noise_audible', 'background_noise_audible:confidence', 
                               'overall_quality_of_the_audio', 'quiet_speaker', 'quiet_speaker:confidence', 'speaker_id', 'file_download', 'file_name', 'phrase',
                              'prompt', 'writer_id'], low_memory=False) 
pd_read.head()

Unnamed: 0,audio_clipping,audio_clipping:confidence,background_noise_audible,background_noise_audible:confidence,overall_quality_of_the_audio,quiet_speaker,quiet_speaker:confidence,speaker_id,file_download,file_name,phrase,prompt,writer_id
0,audio_clipping,audio_clipping:confidence,background_noise_audible,background_noise_audible:confidence,overall_quality_of_the_audio,quiet_speaker,quiet_speaker:confidence,speaker_id,file_download,file_name,phrase,prompt,writer_id
1,no_clipping,1,light_noise,1,3.33,audible_speaker,1,43453425,https://ml.sandbox.cf3.us/cgi-bin/index.cgi?do...,1249120_43453425_58166571.wav,When I remember her I feel down,Mental health,21665495
2,light_clipping,0.6803,no_noise,0.6803,3.33,audible_speaker,1,43719934,https://ml.sandbox.cf3.us/cgi-bin/index.cgi?do...,1249120_43719934_43347848.wav,When I carry heavy things I feel like breaking...,Skin issue,44088126
3,no_clipping,1,no_noise,0.6655,3.33,audible_speaker,1,43719934,https://ml.sandbox.cf3.us/cgi-bin/index.cgi?do...,1249120_43719934_53187202.wav,there is too much pain when i move my arm,Internal medicine,44292353
4,no_clipping,1,light_noise,1,3.33,audible_speaker,1,31349958,https://ml.sandbox.cf3.us/cgi-bin/index.cgi?do...,1249120_31349958_55816195.wav,My son had his lip pierced and it is swollen a...,Surgery department,43755034


In [8]:
audioInfo = pd_read[['speaker_id', 'file_name', 'phrase', 'prompt', 'writer_id', 'audio_clipping:confidence', 'background_noise_audible:confidence', 'quiet_speaker:confidence']]
audioInfo.head()

Unnamed: 0,speaker_id,file_name,phrase,prompt,writer_id,audio_clipping:confidence,background_noise_audible:confidence,quiet_speaker:confidence
0,speaker_id,file_name,phrase,prompt,writer_id,audio_clipping:confidence,background_noise_audible:confidence,quiet_speaker:confidence
1,43453425,1249120_43453425_58166571.wav,When I remember her I feel down,Mental health,21665495,1,1,1
2,43719934,1249120_43719934_43347848.wav,When I carry heavy things I feel like breaking...,Skin issue,44088126,0.6803,0.6803,1
3,43719934,1249120_43719934_53187202.wav,there is too much pain when i move my arm,Internal medicine,44292353,1,0.6655,1
4,31349958,1249120_31349958_55816195.wav,My son had his lip pierced and it is swollen a...,Surgery department,43755034,1,1,1


In [9]:
file_list = []
prompt_list = []
minPitchList = []
maxPitchList = []
meanPitchList = []
sdPitchList = []
minIntensityList = []
maxIntensityList = []
meanIntensityList = []
sdIntensityList = []
speakingRateList = []
JitterList = []
ShimmerList = []
hnrList = []

In [10]:
for i in range(1, len(audioInfo)):
    wav_file_name = audioInfo.iloc[i].file_name
    path = "/Users/jifeng/Desktop/Medical Speech, Transcription, and Intent/recordings/test/" + wav_file_name
#     prompt_list.append(audioInfo.iloc[i].prompt)
    transcription_length = len(audioInfo.iloc[i].phrase.split())
    
    for wave_file in glob.glob(path):
        sound = parselmouth.Sound(wave_file)
        prompt_list.append(audioInfo.iloc[i].prompt)
        (minPitch, maxPitch, meanPitch, sdPitch) = extractPitch(sound, 75, 600, "Hertz", "Parabolic")
        (minIntensity, maxIntensity, meanIntensity, sdIntensity) = extractIntensity(sound, 75, 0.0, "Parabolic")
        (localJitter, localShimmer) = extractJitterAndShimmer(sound, 75, 600)
        hnr = extractHNR(sound)
        speakingRate = extractSpeakingRate(sound, transcription_length)

        file_list.append(wav_file_name)
        minPitchList.append(minPitch)
        maxPitchList.append(maxPitch)
        meanPitchList.append(meanPitch)
        sdPitchList.append(sdPitch)
        minIntensityList.append(minIntensity)
        maxIntensityList.append(maxIntensity)
        meanIntensityList.append(meanIntensity)
        sdIntensityList.append(sdIntensity)
        JitterList.append(localJitter)
        ShimmerList.append(localShimmer)
        hnrList.append(hnr)
        speakingRateList.append(speakingRate)

df = pd.DataFrame(np.column_stack([file_list, prompt_list, minPitchList, maxPitchList, meanPitchList, sdPitchList,
                                       minIntensityList, maxIntensityList, meanIntensityList, sdIntensityList, JitterList, ShimmerList, hnrList, speakingRateList]),
                      columns = ['voice_ID', 'prompt', 'min_pitch', 'max_pitch', 'mean_pitch', 'sd_pitch',
                                 'min_intensity', 'max_intensity', 'mean_intensity', 'sd_intensity', 'jitter', 'shimmer', 'hnr', 'speakingRate'])

df.to_csv("/Users/jifeng/Desktop/Medical Speech, Transcription, and Intent/speech_based_features_test.csv", index=False)