In [10]:
import numpy as np
import os
import librosa
import glob
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import librosa.display
%matplotlib inline

parent_dir = '.'
sub_dir = 'set_a'
file_ext = '*.wav'
audio_len = []
for f in glob.glob(os.path.join(parent_dir,sub_dir,file_ext)):
    s,_ = librosa.load(f)
    audio_len.append(len(s))

print("The minimum audio length is {0}: \nThe maximum audio length is: {1}".format(min(audio_len),max(audio_len)))

In [12]:
def load_sounds(file_path):
    raw_sound = []
    for fp in file_path:
        X,sr = librosa.load(fp)
        raw_sound.append(X)
    return raw_sound

def plot_waves(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60),dpi=900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(5,1,i)
        librosa.display.waveplot(np.array(f))
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 1:Waveplot',x=0.5,y=0.915,fontsize=18)
    plt.show()

def plot_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(5,1,i)
        specgram(np.array(f), Fs=22050)
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 2: Spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(5,1,i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log')
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

In [None]:
#os.listdir()[0:3]
sound_files = ['artifact__201012172012.wav','Aunlabelledtest__2011040239.wav',
              'extrahls__201101070953.wav','murmur__201102051443.wav','normal__201106210943.wav']
sound_lables = ['artifact','UNlabeled','extrahls','murmur','normal']

raw_sounds = load_sounds(sound_files)
plot_waves(sound_lables,raw_sounds)
#plot_specgram(sound_lables,raw_sounds)
#plot_log_power_specgram(sound_lables,raw_sounds)

In [None]:
def repeat_to_length(arr, length):
    results = np.empty((length,),dtype='float32')
    l = len(arr)
    pos = 0
    while pos + l <= length:
        results[pos:pos+l] = arr
        pos += l
    if pos < length:
        results[pos:length] = arr[:length-pos]
    return results

In [None]:
def extract_features(file_name):
    X,sr = librosa.load(file_name)
    x_len = len(X)
    MAX_LEN = max(x_len)
    X = repeat_to_length(X,MAX_LEN)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X,sr=sr,n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft,sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X,sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz    

In [None]:
def parse_audio_fies(parent_dir, sub_dirs, file_ext = '*.wav' ):
    features, labels = np.empty((0,175)), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir,sub_dir,file_ext)):
            mfccs,chroma,mel,contrast,tonnetz = extract_features(fn)
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            features = np.vstack([features,ext_features])
            labels = np.append(labels,fn.split('__'))
    return np.array(features),np.array(labels)