In [1]:
import numpy as np
import scipy.signal as ssgn
from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd

# собственные утилиты с функциями предобработки и выделения параметров
import aif_preprocessor as aifp
from aif_preprocessor import aif_preprocessor
import feature_extractor as fex
from feature_extractor import feature_extractor

# собственные утилиты с функциями визуализации 
import graphics_utility as gru

from pathlib import Path
import os

In [2]:
import importlib
importlib.reload(aifp)
importlib.reload(gru)
importlib.reload(fex)

<module 'feature_extractor' from '/media/user/DATA/Python_Projects/rb_tech/Python/feature_extractor.py'>

#### Путь к папке, содержащей исходные звуковые файлы

In [3]:
data_folder_name = '../data/commands/!select'

In [4]:
data_path = Path(data_folder_name)
command_path_dictionary = {}

for sub_path in data_path.iterdir():
    if False==sub_path.is_dir() or '_' in sub_path.name:
        continue
    command_path_dictionary[sub_path.name] = []
    for item in sub_path.iterdir():
        if False== item.is_file() or item.suffix!='.aif':
            continue
        command_path_dictionary[sub_path.name].append(item.absolute())

#### Словарь путей к файлам, содержащим голосовые команды.

In [5]:
command_path_dictionary.keys()

dict_keys(['bird', 'bed', 'seven', 'four', 'zero', 'marvin', 'dog', 'cat', 'happy', 'eight'])

In [6]:
sample_path = command_path_dictionary['marvin'][9]
sample_path.name

'c2e08f08_nohash_0.aif'

#### Размер окна кратковременного анализа и шаг между соседними окнами.

In [7]:
FRAME_SIZE = 512
HOP_SIZE = 256

#### Пути к папкам с файлами параметров размерностей 16 и 32.

In [8]:
feature16_path = Path('../data/features_16')
feature32_path = Path('../data/features_32')

#### Чтение и обработка звуковых файлов, создание набора данных соответствующих параметров.

In [9]:
OVERWRITE = False # owerwrite existing files?

audio_description_dict = {
    'file_name' : [],
    'command' : [],
    'duration' : [],
    'signal_rms' : [],
    'noise_rms' : []
}

for command, paths in command_path_dictionary.items():
    for path in paths:
        preprocessor = aif_preprocessor(str(path), FRAME_SIZE, HOP_SIZE)
        preprocessor.process()
        if not preprocessor.data_ready:
            print(str(path))
            continue
        # gathering audio info
        audio_description_dict['command'].append(command)
        audio_description_dict['file_name'].append(path.name)
        audio_description_dict['duration'].append(preprocessor.signal_duration)
        audio_description_dict['signal_rms'].append(preprocessor.signal_rms)
        audio_description_dict['noise_rms'].append(preprocessor.noise_rms)
        # audio features extraction and saving
        name_base = path.stem
        extractor = feature_extractor(preprocessor.signal_frames, preprocessor.audio_info.framerate)
        
        extractor.process(16)
        try:
            mfcc_path = feature16_path.joinpath(f'{command}/{name_base}' + '_mfcc.npy')
            if (OVERWRITE==True and mfcc_path.exists()==True) or (mfcc_path.exists()==False):
                np.save(mfcc_path, extractor.mfcc)
            cq_path = feature16_path.joinpath(f'{command}/{name_base}' + '_cq.npy')
            if (OVERWRITE==True and cq_path.exists()==True) or (cq_path.exists()==False):
                np.save(cq_path, extractor.cq)
            #lpc_path = feature16_path.joinpath(f'{command}/{name_base}' + '_lpc.npy')        
            #if (OVERWRITE==True and lpc_path.exists()==True) or (lpc_path.exists()==False):
            #    np.save(lpc_path, extractor.lpc)
            #spe_path = feature16_path.joinpath(f'{command}/{name_base}' + '_spe.npy')        
            #if (OVERWRITE==True and spe_path.exists()==True) or (spe_path.exists()==False):
            #    np.save(spe_path, extractor.spe)            
            wp_path = feature16_path.joinpath(f'{command}/{name_base}' + '_wp.npy')        
            if (OVERWRITE==True and wp_path.exists()==True) or (wp_path.exists()==False):
                np.save(wp_path, extractor.wp_envelopes)
        except Exception as ex:
            print(f'Saving 16 features for {command}/{name_base} failed : {ex}')
        
        extractor.process(32)        
        try:
            mfcc_path = feature32_path.joinpath(f'{command}/{name_base}' + '_mfcc.npy')
            if (OVERWRITE==True and mfcc_path.exists()==True) or (mfcc_path.exists()==False):
                np.save(mfcc_path, extractor.mfcc)
            cq_path = feature32_path.joinpath(f'{command}/{name_base}' + '_cq.npy')
            if (OVERWRITE==True and cq_path.exists()==True) or (cq_path.exists()==False):
                np.save(cq_path, extractor.cq)                
            #lpc_path = feature32_path.joinpath(f'{command}/{name_base}' + '_lpc.npy')        
            #if (OVERWRITE==True and lpc_path.exists()==True) or (lpc_path.exists()==False):
            #    np.save(lpc_path, extractor.lpc)
            #spe_path = feature32_path.joinpath(f'{command}/{name_base}' + '_spe.npy')        
            #if (OVERWRITE==True and spe_path.exists()==True) or (spe_path.exists()==False):
            #    np.save(spe_path, extractor.spe)                            
            wp_path = feature32_path.joinpath(f'{command}/{name_base}' + '_wp.npy')        
            if (OVERWRITE==True and wp_path.exists()==True) or (wp_path.exists()==False):
                np.save(wp_path, extractor.wp_envelopes)
        except Exception as ex:
            print(f'Saving 32 features for {command}/{name_base} failed : {ex}')
        

audio_description_df = pd.DataFrame.from_dict(audio_description_dict)

  npp_polyval(zm1, a, tensor=False))
  npp_polyval(zm1, a, tensor=False))


#### Сводная таблица сведений об обработанных файлах:
#### длительность полезного сигнала, мощность сигнала, мощность фонового шума.

In [12]:
audio_description_df

Unnamed: 0,file_name,command,duration,signal_rms,noise_rms
0,3ec05c3d_nohash_0.aif,bird,0.491312,0.036258,0.004585
1,1aed7c6d_nohash_0.aif,bird,0.406938,0.040497,0.000244
2,1a073312_nohash_0.aif,bird,0.425812,0.044931,0.000180
3,3d6bee47_nohash_0.aif,bird,0.437500,0.033978,0.000049
4,1ecfb537_nohash_0.aif,bird,0.423750,0.026736,0.000246
...,...,...,...,...,...
195,3d53244b_nohash_0.aif,eight,0.501563,0.026925,0.000055
196,02e85b60_nohash_0.aif,eight,0.500000,0.019100,0.000567
197,3c257192_nohash_0.aif,eight,0.562500,0.023937,0.000063
198,1b755c65_nohash_0.aif,eight,0.484812,0.024067,0.000078


In [13]:
audio_description_df.describe()

Unnamed: 0,duration,signal_rms,noise_rms
count,200.0,200.0,200.0
mean,0.54357,0.031605,0.000683
std,0.107505,0.006006,0.001048
min,0.317937,0.014431,1.6e-05
25%,0.473594,0.027826,0.000117
50%,0.531438,0.031306,0.000301
75%,0.599,0.036095,0.00072
max,0.919125,0.04702,0.005818
