/
env_dce_mfa.m
219 lines (191 loc) · 7.38 KB
/
env_dce_mfa.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
% Multi-Frame Analysis based on Discrete Cepstral Envelope (DCE-MFA)
%
% Input
% af : Array of structures with matrices containing sinusoidal
% parameters (as the output of sin_analysis.m).
% Each matrix is made of:
% The 1st line for the frequency of each sinusoid [Hz]
% The 2nd line for their amplitude (linear scale)
% The DC has to be included (in the first column)
% fs : [Hz] Signal's sampling frequency
% order : Cepstral order
% [extrap_dcny] : If true (default), alleviate stability problems of the DCE
% by replacing the DCE value and extrapolating sinusoidal
% components up to Nyquist.
% If false, use the sinusoidal components as they are.
% [scale] : empty : Frequency linear scale (default)
% 'mel' : see frq2mel (for MFCC computation)
% 'bark' : see frq2bark
% 'erb' : see frq2erb
% [Bw] : [Hz] Standard-deviation of the Gaussian used as weighting
% function (for emphasizing the importance of the low
% frequencies in the solution).
% Bw As to be big enough to have the weights still
% significant up to Nyquist.
% [lr] : Regularization parameter (as in [2]) (def. 0)
% [dftlen] : DFT's length, if the 4th output argument is requested.
%
% Output
% cc : Cepstral coefficients
% Dk : [log] Log energy corrections
% af : As in input, plus the aligned amplitudes values in the
% 'a' field.
% E : The amplitude cepstral envelope
%
% References
% [1] Y. Shiga and S. King, "Estimation of voice source and vocal tract
% characteristics based on multi-frame analysis," EUROSPEECH, 2003.
% [2] M. Campedel-Oudot, O. Cappe and E. Moulines, "Estimation of the Spectral
% Envelope of Voiced Sounds Using a Penalized Likelihood Approach"
%
% Copyright (c) Yannis Stylianou, 2011, Bilbao
%
% License
% This file is part of libphoni. libphoni is free software: you can
% redistribute it and/or modify it under the terms of the GNU Lesser General
% Public License as published by the Free Software Foundation, either version 3
% of the License, or (at your option) any later version. libphoni is
% distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
% without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
% PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
% details.
%
% This function is part of the COVAREP project: http://covarep.github.io/covarep
%
% Authors
% Yannis Stylianou <yannis@csd.uoc.gr>
% Gilles Degottex <degottex@csd.uoc.gr> (regularization term)
%
function [cc Dk af E] = env_dce_mfa(af, fs, order, extrap_dcny, scale, Bw, lr, dftlen)
debug = 0; % 0:Do nothing; 1:Plot iterations info; 2:Plot results
% Input parameters
if nargin<4; extrap_dcny=true; end
if nargin<5; scale = []; end
if nargin<6 || isempty(Bw); Bw = 2000*(fs/16000); end% 2kHz with 16kHz[1]
if nargin<7 || isempty(lr); lr = 0; end
if nargin<8; dftlen=4096; end
if ~isempty(scale)
Bw = 100*fs; % To make it as similar as possible to the DCE-SFA
lr = 3.5e-2; % To make it as similar as possible to the DCE-SFA [2]
eval(['fnscale=@frq2' scale ';']);
for n=1:numel(af)
af(n).f = 0.5*fs*fnscale(af(n).f)/fnscale(fs/2);
end
end
% If asked, extrapolate components at DC and up to Nyquist
if extrap_dcny; af = env_extrap_sins_dcny(af, fs); end
for n=1:numel(af)
af(n).f = af(n).sins(1,:);
af(n).a = log(af(n).sins(2,:));
end
if debug>1; subplot(211); hold off; end
M = length(af);
BWB = 0; % [1](9)
erI = 0; % Error of previous step (initialization error)
iter = 0; % Iteration number
cc = zeros(order+1,1); % initialization for ceps
rt = 0;
for k=1:M
fk = af(k).f;
ak = af(k).a;
Nk = length(fk);
fk = fk(:);
ak = ak(:);
Bk = [ones(Nk,1) 2*cos(2*pi*fk/fs*(1:order))];
wk = exp(-fk.^2 / (2 * Bw * Bw))'; % Bottom-right paragraph p.3
Wk = diag(wk)/Nk; % (8)
% keep the main matrices
BWB = BWB+(Bk'*Wk*Bk); % order by order
af(k).Bk = Bk; % keep Bk
af(k).Wk = Wk; % keep Wk
uk = ones(Nk,1);
dk = uk'*Wk*(ak)/(uk'*Wk*uk); % (10) with c=0
erI = erI + ((ak-dk*uk)'*Wk*(ak-dk*uk));% (7) with c=0
rt = rt + (Bk'*Wk*(ak-dk*uk)); % Right-hand term of (9)
end
if debug>0; disp(['iter:' num2str(iter) ' error=' num2str(erI) 'log']); end
% first estimation of ceps
if lr==0
cc = BWB\rt; % Solution of (9)
else
cc = (BWB+lr*diag(ones(size(BWB,1),1)))\rt; % Solution of (9) + Regul term
end
cc(1) = 0;
iter = 1;
while(1)
er = 0;
rt = 0; % Right-hand term of (9)
Dk = zeros(M,1); % log energy corrections
for k=1:M
ak = af(k).a;ak= ak(:);
Bk = af(k).Bk;
Wk = af(k).Wk;
Nk = length(ak);
uk = ones(Nk,1);
dk = uk'*Wk*(ak-Bk*cc)/(uk'*Wk*uk);% (10)
h = (ak-dk*uk-Bk*cc);
er = er + (h'*Wk*h);% (7)
rt = rt + (Bk'*Wk*(ak-dk*uk)); % Right-hand term of (9)
Dk(k) = dk;
end
if lr==0
cc = BWB\rt; % Solution of (9)
else
cc = (BWB+lr*diag(ones(size(BWB,1),1)))\rt; % Solution of (9) + Regul term
end
cc(1) = 0;
if debug>0; disp(['iter:' num2str(iter) ' error=' num2str(er) 'log']); end
% disp(['reldiff=' num2str(abs((erI-er)/er))]);
if debug>1
% plot
subplot(211);
plot(iter-1,log(erI), 'o');
hold on
plot(iter,log(er), 'x');
title(num2str(iter));
subplot(212);
hold off;
for k=1:M
plot(af(k).f,af(k).a-Dk(k),'+');
hold on;
end
dftlen = 2048;
fv = fs*(0:dftlen/2)/dftlen;
lF = 2*cos(2*pi*fv'/fs*(1:order))*cc(2:order+1);
plot(fv,lF,'r', 'LineWidth', 2);
keyboard
end
if( abs((erI-er)/er)<0.001 ) % Stop if error doesn't improve more than 0.1%
break;
else
erI = er;
iter = iter+1;
end
end
% Align the gain corrections with respect to the central frame
ci = floor((numel(af)-1)/2)+1;
cc(1) = cc(1)+Dk(ci);
Dk = Dk - Dk(ci);
cc(2:end) = 2*cc(2:end);
% Include the log energy corrections into the output
for fi=1:numel(af)
af(fi).a = af(fi).a - Dk(fi);
end
% If asked, compute the envelope
if nargout>2
if isempty(scale)
E = exp(fft(cc, dftlen));
E = E(1:end/2+1);
else
if strcmp(scale,'bark')
E = barkcc2spec(cc, fs, dftlen);
E = E(1:end/2+1);
elseif strcmp(scale,'mel')
E = exp(fft(cc, dftlen));
E = E(1:dftlen/2+1);
E = cc2hspec(cc, fs, dftlen);
E = fwcep2hspec(cc, fs, dftlen);
end
end
end
return