In [1]:
from time import sleep

import pickle
import shutil
import numpy as np
import librosa
import librosa.display
import os
import matplotlib.pyplot as plt

from matplotlib import cm # for color map
from python_speech_features import logfbank, fbank
from scipy.fftpack import dct

import speech_recognition as speech_r
import scipy.signal as signal
from pydub import AudioSegment

from silence_remover.silence_detector import SilenceDetector
from silence_remover.filter_generator import FilterGenerator
from silence_remover.media_editor import MediaEditor


Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


Checking librosa version and speech_recognition version

In [2]:
print("librosa version : ",librosa.__version__)
print("speech_recognition version : ", speech_r.__version__)
save_path = "../test_train_data/"

librosa version :  0.7.2
speech_recognition version :  3.8.1


#### Select WAV file

In [3]:
user_list = os.listdir(save_path)
user_list.sort()
r = speech_r.Recognizer() 

print("User List :")
testInt = 0
for i, item in zip(range(len(user_list)), user_list):
    print('[{:d}] '.format(i)+item)

    file_list = os.listdir(save_path + str(user_list[i]+"/"))
    file_list.sort()

    if os.path.exists("../feature/feature_mfcc/"+str(user_list[i]+"/")):
        shutil.rmtree("../feature/feature_mfcc/"+str(user_list[i]+"/"))
    for j, item in zip(range(len(file_list)), file_list):
        testInt = testInt+1
        final_file_name = save_path + str(user_list[i]) +"/"+ str(file_list[j])
        print(final_file_name)
        
        if not final_file_name.endswith(".wav") :
            
            (path, file_extension) = os.path.splitext(final_file_name)
            file_extension_final = file_extension.replace('.', '')

            track = AudioSegment.from_file(final_file_name,file_extension_final)
            wav_filename = final_file_name.replace(file_extension_final, 'wav')

            wav_path = wav_filename
            print('CONVERTING: ' + str(final_file_name))
            file_handle = track.export(wav_path, format='wav')
            os.remove(final_file_name)
        else :
            wav_path = final_file_name
            
        hop_length = 512
        n_mels = 40
        n_fft = 512
        sr = 16000

        jackhammer = speech_r.AudioFile(wav_path)

        with jackhammer as source:
            r.adjust_for_ambient_noise(source)
            audio = r.record(source)
            
            temp_file = wav_path.replace(".wav", "Temp.wav")
            fw = open(temp_file, "wb")
            fw.write(audio.get_wav_data())
            fw.close()
            
            y, sr = librosa.load(temp_file, sr=16000, mono=True, duration=15)
            y = y[0:int(15.0 * sr)]
            stft = librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=None, window='hamm',
             center=True, dtype=np.complex64, pad_mode='reflect')
            abs_STFT = np.abs(stft)
    
            mel = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)
            mel /= np.max(mel, axis=-1)[:, None]
            mel_S = np.dot(mel, abs_STFT)

            mel_spec_test = 20 * np.log10(np.maximum(mel_S, 1e-5))
    
            num_ceps = 12
            cep_lifter = 22
    
            mfcc = dct(mel_spec_test.T, type=2 ,axis=1, norm='ortho')[:, 1 : (num_ceps + 1)] 
            (nframes, ncoeff) = mfcc.shape

            n = np.arange(ncoeff)
    
    
            # to de-emphasize higher MFCCs (for Noise)
            lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
            mfcc_lifted = mfcc * lift  
            mfcc_normalized = mfcc_lifted -(np.mean(mfcc_lifted, axis=0) + 1e-8)
    
            feature_save_path = "../feature/feature_mfcc/"+str(user_list[i]+"/")


            if not os.path.exists(feature_save_path):
                os.makedirs(feature_save_path)
                feature_result_name = str(user_list[i]) +"_"+str(0)
            else :
                feature_files = os.listdir(feature_save_path)
                feature_files = [file for file in feature_files if file.endswith(".npy")]
                feature_result_name = str(user_list[i]) +"_"+str(len(feature_files))
        
            if os.path.exists(feature_save_path + feature_result_name+".npy"):
                os.remove(feature_save_path + feature_result_name+".npy")
           
            print(feature_result_name + "' File Write Finish !")
            np.save(feature_save_path + feature_result_name, mfcc_normalized) # x_save.npy
            print(" Saved File Name : "+feature_result_name)

            os.remove(temp_file)
            sleep(1)
print(testInt)

User List :
[0] 2017019740001_kwakjuheon
../test_train_data/2017019740001_kwakjuheon/1.wav
2017019740001_kwakjuheon_0' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_0
../test_train_data/2017019740001_kwakjuheon/2.wav
2017019740001_kwakjuheon_1' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_1
../test_train_data/2017019740001_kwakjuheon/3.wav
2017019740001_kwakjuheon_2' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_2
../test_train_data/2017019740001_kwakjuheon/4.wav
2017019740001_kwakjuheon_3' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_3
../test_train_data/2017019740001_kwakjuheon/5.wav
2017019740001_kwakjuheon_4' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_4
[1] 2017019740002_kimjiho
../test_train_data/2017019740002_kimjiho/1.wav
2017019740002_kimjiho_0' File Write Finish !
 Saved File Name : 2017019740002_kimjiho_0
../test_train_data/2017019740002_kimjiho/2.wav
2017019740002_kimjiho_1' File W

[11] 2017019740012_chaeminjoon
../test_train_data/2017019740012_chaeminjoon/1.wav
2017019740012_chaeminjoon_0' File Write Finish !
 Saved File Name : 2017019740012_chaeminjoon_0
../test_train_data/2017019740012_chaeminjoon/2.wav
2017019740012_chaeminjoon_1' File Write Finish !
 Saved File Name : 2017019740012_chaeminjoon_1
../test_train_data/2017019740012_chaeminjoon/3.wav
2017019740012_chaeminjoon_2' File Write Finish !
 Saved File Name : 2017019740012_chaeminjoon_2
../test_train_data/2017019740012_chaeminjoon/4.wav
2017019740012_chaeminjoon_3' File Write Finish !
 Saved File Name : 2017019740012_chaeminjoon_3
../test_train_data/2017019740012_chaeminjoon/5.wav
2017019740012_chaeminjoon_4' File Write Finish !
 Saved File Name : 2017019740012_chaeminjoon_4
[12] 2017019740013_kwakyiheon
../test_train_data/2017019740013_kwakyiheon/1.wav
2017019740013_kwakyiheon_0' File Write Finish !
 Saved File Name : 2017019740013_kwakyiheon_0
../test_train_data/2017019740013_kwakyiheon/2.wav
2017019740

KeyboardInterrupt: 

In [15]:
save_path = "../test_val_data/"
user_list = os.listdir(save_path)
user_list.sort()
r = speech_r.Recognizer() 

print("User List :")
testInt = 0
for i, item in zip(range(len(user_list)), user_list):
    print('[{:d}] '.format(i)+item)

    file_list = os.listdir(save_path + str(user_list[i]+"/"))
    file_list.sort()

    
    if os.path.exists("../feature/feature_mfcc_validation/"+str(user_list[i]+"/")):
        shutil.rmtree("../feature/feature_mfcc_validation/"+str(user_list[i]+"/"))
    for j, item in zip(range(len(file_list)), file_list):
        testInt = testInt+1
        final_file_name = save_path + str(user_list[i]) +"/"+ str(file_list[j])
        print(final_file_name)
        if not final_file_name.endswith(".wav") :
            
            (path, file_extension) = os.path.splitext(final_file_name)
            file_extension_final = file_extension.replace('.', '')

            track = AudioSegment.from_file(final_file_name,file_extension_final)
            wav_filename = final_file_name.replace(file_extension_final, 'wav')

            wav_path = wav_filename
            print('CONVERTING: ' + str(final_file_name))
            file_handle = track.export(wav_path, format='wav')
            os.remove(final_file_name)
        else :
            wav_path = final_file_name
            
        hop_length = 512
        n_mels = 40
        n_fft = 512
        sr = 16000

        jackhammer = speech_r.AudioFile(wav_path)

        with jackhammer as source:
            r.adjust_for_ambient_noise(source)
            audio = r.record(source)
            
            temp_file = wav_path.replace(".wav", "Temp.wav")
            fw = open(temp_file, "wb")
            fw.write(audio.get_wav_data())
            fw.close()
            
            y, sr = librosa.load(temp_file, sr=16000, mono=True, duration=15)
            y = y[0:int(15.0 * sr)]
            stft = librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=None, window='hamm',
             center=True, dtype=np.complex64, pad_mode='reflect')
            abs_STFT = np.abs(stft)
    
            mel = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)
            mel /= np.max(mel, axis=-1)[:, None]
            mel_S = np.dot(mel, abs_STFT)
            mel_spec_test = 20 * np.log10(np.maximum(mel_S, 1e-5))
    
            num_ceps = 12
            cep_lifter = 22
    
            mfcc = dct(mel_spec_test.T, type=2 ,axis=1, norm='ortho')[:, 1 : (num_ceps + 1)] 
            (nframes, ncoeff) = mfcc.shape

            n = np.arange(ncoeff)
    
    
            # to de-emphasize higher MFCCs (for Noise)
            lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
            mfcc_lifted = mfcc * lift  
            mfcc_normalized = mfcc_lifted -(np.mean(mfcc_lifted, axis=0) + 1e-8)
    
            feature_save_path = "../feature/feature_mfcc_validation/"+str(user_list[i]+"/")


            if not os.path.exists(feature_save_path):
                os.makedirs(feature_save_path)
                feature_result_name = str(user_list[i]) +"_"+str(0)
            else :
                feature_files = os.listdir(feature_save_path)
                feature_files = [file for file in feature_files if file.endswith(".npy")]
                feature_result_name = str(user_list[i]) +"_"+str(len(feature_files))
        
            if os.path.exists(feature_save_path + feature_result_name+".npy"):
                os.remove(feature_save_path + feature_result_name+".npy")
            
            print(feature_result_name + "' File Write Finish !")
            np.save(feature_save_path + feature_result_name, mfcc_normalized) # x_save.npy
            print(" Saved File Name : "+feature_result_name)

            os.remove(temp_file)
            sleep(1)
print(testInt)

User List :
[0] 2017019740001_kwakjuheon
../test_val_data/2017019740001_kwakjuheon/1.wav
2017019740001_kwakjuheon_0' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_0
../test_val_data/2017019740001_kwakjuheon/2.wav
2017019740001_kwakjuheon_1' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_1
../test_val_data/2017019740001_kwakjuheon/3.wav
2017019740001_kwakjuheon_2' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_2
../test_val_data/2017019740001_kwakjuheon/4.wav
2017019740001_kwakjuheon_3' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_3
../test_val_data/2017019740001_kwakjuheon/5.wav
2017019740001_kwakjuheon_4' File Write Finish !
 Saved File Name : 2017019740001_kwakjuheon_4
[1] 2017019880001_kimsubin
../test_val_data/2017019880001_kimsubin/1.wav
2017019880001_kimsubin_0' File Write Finish !
 Saved File Name : 2017019880001_kimsubin_0
../test_val_data/2017019880001_kimsubin/2.wav
2017019880001_kimsubin_1' File Write Fin

In [None]:


user_feature_list = os.listdir("../feature/feature_mfcc/")
user_feature_list.sort()

print("User List :")
for i, item in zip(range(len(user_feature_list)), user_feature_list):
    print('[{:d}] '.format(i)+item)

selUserNum_forFeature = int(input("\n유저 번호를 선택하세요 : "))

feature_to_file_list = os.listdir("../feature/feature_mfcc/" + str(user_feature_list[selUserNum_forFeature]+"/"))
feature_npy_file_list = [file for file in feature_to_file_list if file.endswith(".npy")]
feature_npy_file_list.sort()

print("\nFile List : ")
for i, item in zip(range(len(feature_npy_file_list)), feature_npy_file_list):
    print('[{:d}] '.format(i)+item)
    
selFileNum_forFeature = int(input("\n파일 번호를 선택하세요 : "))

final_feat_name = "../feature/feature_mfcc/" + str(user_feature_list[selUserNum_forFeature]) +"/"+ str(feature_npy_file_list[selFileNum_forFeature])
feature_load = np.load(final_feat_name)


### Visualize Loaded Feature

In [None]:
plt.figure(figsize=(14, 6))
plt.rc('font', size=18)

plt.title("Loaded Feature Spectogram")
librosa.display.specshow(feature_load.T, sr=loaded_sr, hop_length=loaded_hop_length, x_axis='s', cmap=cm.jet)
# plt.colorbar()
plt.grid('on')
plt.ylabel('MFCC Coefficients')
plt.xticks(np.arange(0, loaded_time+1, step=1), ["{:d}".format(x) for x in np.arange(0, loaded_time+1, step=1)], 
           fontsize=18)

plt.show()