In [None]:
from os import listdir
from os.path import isdir, join
import librosa
import random
import numpy as np
import matplotlib.pyplot as plt
import python_speech_features
import scipy
#C:\Users\HOUBI\Desktop\topic\SpeechRecognition\data_speech_commands_v0.02

In [None]:
# Dataset path and view possible targets
dataset_path = './recording9'
for name in listdir(dataset_path):
    if isdir(join(dataset_path, name)):
        print(name)

In [None]:
# Create an all targets list
all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, name))]
print(all_targets)

In [None]:
# See how many files are in each
num_samples = 0
for target in all_targets:
    print(len(listdir(join(dataset_path, target))))
    num_samples += len(listdir(join(dataset_path, target)))
print('Total samples:', num_samples)

In [None]:
# Settings
target_list = all_targets
#feature_sets_file = 'recording1.npz'
perc_keep_samples = 1 #取整個資料集當中的100%
val_ratio = 0.2 #0.2
test_ratio = 0.2 #0.2
sample_rate = 16000 #取樣率

In [None]:
# Create list of filenames along with ground truth vector (y)
filenames = []
y = []
for index, target in enumerate(target_list):
    print(join(dataset_path, target))
    filenames.append(listdir(join(dataset_path, target)))
    y.append(np.ones(len(filenames[index])) * index)

In [None]:
# Check ground truth Y vector
print(y)
for item in y:
    print(len(item))

In [None]:
# Flatten filename and y vectors(轉成1D張量)
filenames = [item for sublist in filenames for item in sublist]
y = [item for sublist in y for item in sublist]

In [None]:
# Associate filenames with true output and shuffle
filenames_y = list(zip(filenames, y))#filename跟y壓縮在 起
random.shuffle(filenames_y)#打散
filenames, y = zip(*filenames_y)#解壓縮

In [None]:
# Only keep the specified number of samples (shorter extraction/training)
#只用整個資料集的100%
print(len(filenames))
filenames = filenames[:int(len(filenames) * perc_keep_samples)]
print(len(filenames))

In [None]:
# Calculate validation and test set sizes(驗證集、測試集使用整個資料集10%當中的各10%)
val_set_size = int(len(filenames) * val_ratio)
test_set_size = int(len(filenames) * test_ratio)
print(val_set_size)
print(test_set_size)

In [None]:
# Break dataset apart into train, validation, and test sets(分割資料集，filename部分)
filenames_val = filenames[:val_set_size]
filenames_test = filenames[val_set_size:(val_set_size + test_set_size)]
filenames_train = filenames[(val_set_size + test_set_size):]

In [None]:
# Break y apart into train, validation, and test sets(分割資料集，target部分)
y_orig_val = y[:val_set_size]
y_orig_test = y[val_set_size:(val_set_size + test_set_size)]
y_orig_train = y[(val_set_size + test_set_size):]

In [None]:

# Function: Create MFCC from given path
def calc_fbank(path):
    
    # Load wavefile(取得時序、取樣率)
    signal, fs = librosa.load(path, sr=sample_rate)
    signal = signal.astype(np.float)
    # normalize data
    signal = (signal - signal.mean()) / (signal.max() - signal.min())

    # Create MFCCs from sound clip
    fbank = python_speech_features.base.logfbank(signal,
                                                 samplerate=sample_rate,
                                                 winlen=0.025,
                                                 winstep=0.01,
                                                 nfilt=26,
                                                 nfft=512,
                                                 lowfreq=0,
                                                 highfreq=None,
                                                 preemph=0.97)
    return fbank

In [None]:
print(len(filenames_train))

In [None]:
# Function: Create MFCCs, keeping only ones of desired length
#提取完好的音檔
def extract_features(in_files, in_y):
    prob_cnt = 0
    out_x = []
    out_y = []
        
    for index, filename in enumerate(in_files):
    
        # Create path from given filename and target item
        path = join(dataset_path, target_list[int(in_y[index])], 
                    filename)
        
        # Check to make sure we're reading a .wav file
        if not path.endswith('.wav'):
            continue

        # Create MFCCs
        fbank = calc_fbank(path)
        
        #if mfccs.shape[1] == len_mfcc:
        #    print("Good_filename:" + str(filename))
        #    prob_cnt += 1
        #else:
        #    print('Dropped:', index, mfccs.shape)
        #    print("Bad_filename:" + str(filename))
            
        # Only keep MFCCs with given length
        if fbank.shape[0] == 49:
            out_x.append(fbank)
            out_y.append(in_y[index])
        else:
            print(filename)
            print('Dropped:', index, fbank.shape)
            prob_cnt += 1

    return out_x, out_y, prob_cnt

In [None]:
# Create train, validation, and test sets
total = 0
x_train, y_train, prob = extract_features(filenames_train, 
                                          y_orig_train)
total += prob
#print('Removed percentage:', prob / len(y_orig_train))
x_val, y_val, prob = extract_features(filenames_val, y_orig_val)
total += prob
#print('Removed percentage:', prob / len(y_orig_val))
x_test, y_test, prob = extract_features(filenames_test, y_orig_test)
total += prob
#print('Removed percentage:', prob / len(y_orig_test))
print("total:" + str(total))

In [None]:
#儲存資料
np.savez('./npz/recording9_fbank.npz', 
         x_train=x_train, 
         y_train=y_train, 
         x_val=x_val, 
         y_val=y_val, 
         x_test=x_test, 
         y_test=y_test)

In [None]:
feature_sets = np.load('./npz/recording9_fbank.npz')
feature_sets.files

In [None]:
print(len(feature_sets['x_train']))
print(len(feature_sets['x_test']))
print(len(feature_sets['x_val']))