# Features based on Representation learning strategies using Autoencoders

Compute Features based on Representation learning strategies using convolutional and recurrent Autoencoders

Two types of features are computed

1. 256 features extracted from the bottleneck layer of the autoencoders
2. 128 features based on the MSE between the decoded and input spectrograms of the autoencoder in different frequency regions


Additionally, static (for all utterance) or dynamic (for each 500 ms speech segments) features can be computed:

- The static feature vector is formed with 1536 features and contains (384 descriptors) x (4 functionals: mean, std, skewness, kurtosis)

- The dynamic feature matrix is formed with the 384 descriptors computed for speech segments with 500ms length and 250ms time-shift



In [1]:
import os
from tempfile import TemporaryDirectory

from disvoice import RepLearning

################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################



In [2]:
audio_path = os.environ['PROJECT_DIR'] + '/audios/OSR_us_000_0030_8k.wav'

In [3]:
import logging
import matplotlib.font_manager as fm

# Suppress font warnings
logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)

replearning = RepLearning('CAE')
features_static = replearning.extract_features_file(audio_path, static=True, plots=False, fmt="dataframe")
features_dynamic = replearning.extract_features_file(audio_path, static=False, plots=False, fmt="dataframe")

  self.AE.load_state_dict(torch.load(PATH+"/"+str(units)+'_CAE.pt', map_location='cpu'))


In [4]:
features_static

Unnamed: 0,mean_bottleneck_0,mean_bottleneck_1,mean_bottleneck_2,mean_bottleneck_3,mean_bottleneck_4,mean_bottleneck_5,mean_bottleneck_6,mean_bottleneck_7,mean_bottleneck_8,mean_bottleneck_9,...,kurtosis_error_118,kurtosis_error_119,kurtosis_error_120,kurtosis_error_121,kurtosis_error_122,kurtosis_error_123,kurtosis_error_124,kurtosis_error_125,kurtosis_error_126,kurtosis_error_127
0,-0.73981,0.35187,0.811061,1.566601,0.271215,-2.484548,0.179768,-1.944128,0.526664,-2.142602,...,-1.29693,-1.297037,-1.302423,-1.321554,-1.326523,-1.319162,-1.338517,-1.362602,-1.394343,-1.43672


In [5]:
features_dynamic

Unnamed: 0,bottleneck_0,bottleneck_1,bottleneck_2,bottleneck_3,bottleneck_4,bottleneck_5,bottleneck_6,bottleneck_7,bottleneck_8,bottleneck_9,...,error_118,error_119,error_120,error_121,error_122,error_123,error_124,error_125,error_126,error_127
0,-0.369256,0.270143,1.042802,1.286064,0.152346,-2.128943,0.079210,-1.767680,0.466306,-2.254430,...,0.136254,0.140187,0.144276,0.153807,0.164736,0.176371,0.185838,0.201479,0.206286,0.199580
1,-0.571109,0.268809,1.087888,1.282011,0.190486,-2.245750,-0.042741,-1.840584,0.461271,-2.230600,...,0.123636,0.127201,0.131212,0.140421,0.150800,0.162155,0.171843,0.187823,0.193658,0.189468
2,-0.373683,0.367692,0.980976,1.474157,0.193229,-2.262034,0.052319,-1.780345,0.453727,-2.185068,...,0.117109,0.120120,0.123705,0.133047,0.142840,0.154083,0.163747,0.180151,0.188045,0.186285
3,-0.397616,0.212431,1.087547,1.413247,0.343646,-2.277966,0.093174,-1.756428,0.374241,-2.094728,...,0.122998,0.126303,0.129976,0.139388,0.149462,0.160653,0.170453,0.186379,0.192899,0.189096
4,-0.332414,0.071022,1.067888,1.301613,0.213364,-2.218960,0.142771,-1.749578,0.465004,-2.257886,...,0.128763,0.132924,0.137035,0.146744,0.157314,0.168976,0.178664,0.194619,0.200294,0.193770
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181,-0.534517,0.339326,1.034141,1.182174,0.239245,-2.207869,0.053993,-1.880664,0.436413,-2.117141,...,0.120245,0.123784,0.127567,0.136862,0.147031,0.158417,0.167930,0.183589,0.190003,0.186498
182,-0.173150,0.234122,1.029255,1.433155,0.253811,-2.240433,0.229339,-1.712549,0.560794,-2.290974,...,0.122119,0.125473,0.129003,0.138234,0.148037,0.159174,0.168756,0.184547,0.191551,0.187528
183,-0.399415,0.294402,1.039773,1.317094,0.155743,-2.291083,0.020405,-1.770658,0.467809,-2.270263,...,0.120027,0.123602,0.127388,0.136679,0.146825,0.158219,0.167702,0.183208,0.190087,0.187428
184,-0.176845,0.276888,0.848233,1.404263,0.224300,-2.271604,0.003751,-1.681036,0.368569,-2.154999,...,0.114550,0.117418,0.120612,0.129965,0.139579,0.151046,0.161356,0.177523,0.186541,0.185729
