In [1]:
from os import listdir
from os.path import isdir, join
import librosa
import random
import numpy as np
import matplotlib.pyplot as plt
import python_speech_features

In [2]:
# Dataset path and view possible targets
dataset_path = 'recording'
for name in listdir(dataset_path):
    if isdir(join(dataset_path, name)):
        print(name)

的一個
啦
的那個
那那個
ㄟ
著
吼
嗯
阿
ㄏㄧㄡ
那
的這個


In [3]:
# Create an all targets list
all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, name))]
print(all_targets)

['的一個', '啦', '的那個', '那那個', 'ㄟ', '著', '吼', '嗯', '阿', 'ㄏㄧㄡ', '那', '的這個']


In [4]:
# See how many files are in each
num_samples = 0
for target in all_targets:
    print(len(listdir(join(dataset_path, target))))
    num_samples += len(listdir(join(dataset_path, target)))
print('Total samples:', num_samples)

30
30
30
30
30
30
30
30
30
30
30
30
Total samples: 360


In [5]:
# Settings
target_list = all_targets
feature_sets_file = 'all_targets_mfcc_sets.npz'
perc_keep_samples = 1 #取整個資料集當中的100%
val_ratio = 0.1 
test_ratio = 0.1
sample_rate = 8000 #取樣率
num_mfcc = 16 #回傳mfcc的量
len_mfcc = 16 #mfcc長度

In [6]:
# Create list of filenames along with ground truth vector (y)
filenames = []
y = []
for index, target in enumerate(target_list):
    print(join(dataset_path, target))
    filenames.append(listdir(join(dataset_path, target)))
    y.append(np.ones(len(filenames[index])) * index)

recording/的一個
recording/啦
recording/的那個
recording/那那個
recording/ㄟ
recording/著
recording/吼
recording/嗯
recording/阿
recording/ㄏㄧㄡ
recording/那
recording/的這個


In [7]:
# Check ground truth Y vector
print(y)
for item in y:
    print(len(item))

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]), array([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]), array([3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
       3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.]), array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.]), array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.]), array([6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.,
       6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.]), array([7., 7., 7., 7., 7., 7., 7., 7., 7., 7., 

In [8]:
# Flatten filename and y vectors(轉成1D張量)
filenames = [item for sublist in filenames for item in sublist]
y = [item for sublist in y for item in sublist]

In [9]:
# Associate filenames with true output and shuffle
filenames_y = list(zip(filenames, y))#filename跟y壓縮在 起
random.shuffle(filenames_y)#打散
filenames, y = zip(*filenames_y)#解壓縮

In [10]:
# Only keep the specified number of samples (shorter extraction/training)
#只用整個資料集的100%
print(len(filenames))
filenames = filenames[:int(len(filenames) * perc_keep_samples)]
print(len(filenames))

360
360


In [11]:
# Calculate validation and test set sizes(驗證集、測試集使用整個資料集10%當中的各10%)
val_set_size = int(len(filenames) * val_ratio)
test_set_size = int(len(filenames) * test_ratio)
print(val_set_size)
print(test_set_size)

36
36


In [12]:
# Break dataset apart into train, validation, and test sets(分割資料集，filename部分)
filenames_val = filenames[:val_set_size]
filenames_test = filenames[val_set_size:(val_set_size + test_set_size)]
filenames_train = filenames[(val_set_size + test_set_size):]

In [13]:
# Break y apart into train, validation, and test sets(分割資料集，target部分)
y_orig_val = y[:val_set_size]
y_orig_test = y[val_set_size:(val_set_size + test_set_size)]
y_orig_train = y[(val_set_size + test_set_size):]

In [14]:
# Function: Create MFCC from given path
def calc_mfcc(path):
    
    # Load wavefile(取得時序、取樣率)
    signal, fs = librosa.load(path, sr=sample_rate)
    
    # Create MFCCs from sound clip
    mfccs = python_speech_features.base.mfcc(signal, #輸入訊號
                                            samplerate=fs, #取樣率
                                            winlen=0.256, #音框涵蓋時間(s)
                                            winstep=0.050, #音框間的時間(s)
                                            numcep=num_mfcc, #返回係數的量
                                            nfilt=26, #過濾器數量
                                            nfft=2048, #FFT大小
                                            preemph=0.0, #不用預強化濾波器
                                            ceplifter=0, #robust(不用)
                                            appendEnergy=False, #係數為0的話會被替代成總音框能量的對數
                                            winfunc=np.hanning) #漢明窗，增加音框左右端連續性(在進行 FFT 時，都是假設一個音框內的訊號是代表一個週期性訊號，如果這個週期性不存在，FFT 會為了要符合左右端不連續的變化，而產生一些不存在原訊號的能量分佈，造成分析上的誤差。)
    return mfccs.transpose()

In [15]:
print(len(filenames_train))

288


In [16]:
# Function: Create MFCCs, keeping only ones of desired length
#提取完好的音檔
def extract_features(in_files, in_y):
    prob_cnt = 0
    out_x = []
    out_y = []
        
    for index, filename in enumerate(in_files):
    
        # Create path from given filename and target item
        path = join(dataset_path, target_list[int(in_y[index])], 
                    filename)
        
        # Check to make sure we're reading a .wav file
        if not path.endswith('.wav'):
            continue

        # Create MFCCs
        mfccs = calc_mfcc(path)
        
        #if mfccs.shape[1] == len_mfcc:
        #    print("Good_filename:" + str(filename))
        #    prob_cnt += 1
        #else:
        #    print('Dropped:', index, mfccs.shape)
        #    print("Bad_filename:" + str(filename))
            
        # Only keep MFCCs with given length
        if mfccs.shape[1] == len_mfcc:
            out_x.append(mfccs)
            out_y.append(in_y[index])
        else:
            print(filename)
            print('Dropped:', index, mfccs.shape)
            prob_cnt += 1
            
    return out_x, out_y, prob_cnt

In [17]:
# Create train, validation, and test sets
total = 0
x_train, y_train, prob = extract_features(filenames_train, 
                                          y_orig_train)
total += prob
#print('Removed percentage:', prob / len(y_orig_train))
x_val, y_val, prob = extract_features(filenames_val, y_orig_val)
total += prob
#print('Removed percentage:', prob / len(y_orig_val))
x_test, y_test, prob = extract_features(filenames_test, y_orig_test)
total += prob
#print('Removed percentage:', prob / len(y_orig_test))
print("total:" + str(total))

total:0


In [18]:
#儲存資料
np.savez('/Users/Willie/Part1.npz', 
         x_train=x_train, 
         y_train=y_train, 
         x_val=x_val, 
         y_val=y_val, 
         x_test=x_test, 
         y_test=y_test)

In [19]:
feature_sets = np.load('/Users/Willie/Part1.npz')
feature_sets.files

['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test']

In [20]:
print(len(feature_sets['x_train']))
print(len(feature_sets['x_test']))
print(len(feature_sets['x_val']))

288
36
36
