-
Notifications
You must be signed in to change notification settings - Fork 113
/
cpp.m
135 lines (116 loc) · 3.56 KB
/
cpp.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
% Cepstral Peak Prominence (CPP) for discriminating breathy from
% neutral/modal phonation.
%
% Octave compatible
%
% Description
% The Cepstral peak promience feature derived from the log magnitude
% cepstrum based on the paper by Hillenbrand & Houde (1996). The code
% facilitates time and quefrency smoothing variants, and regression
% line (as per Hillenbrand), mean and no normalisation variants
%
% Inputs
% x : [samples] [Nx1] Speech signal
% fs : [Hz] [1x1] sampling frequency
% smoothOpt : [binary] [1x1] flag for using smoothing
% normOpt : [string] [1x1] 'line', 'mean' or 'nonorm'
% for selecting normalisation type
% dBScaleOpt : [binary] [1x1] flag for using a dB CPP scale
% Outputs
% CPP : [s,samples] [Mx2] CPP with time values
%
% Example
% Please see the HOWTO_glottalsource.m example file.
%
% References
% [1] Hillenbrand, J., Houde, R. A. (1996) ``Acoustic correlates of
% breathy vocal quality: dysphonic voices and continuous
% speech'', Journal of Speech and Hearing research 39:311-321
%
% This function is part of the Covarep project:
% http://covarep.github.io/covarep
%
% Author
% John Kane kanejo@tcd.ie
%
% TODO: Use vectorised regression line fitting
% Add band-pass filtering variant
function CPP = cpp( x, fs, smoothOpt, normOpt )
if nargin < 3
smoothOpt = 0;
end
if nargin < 4
normOpt = 'mean';
end
if nargin < 5
dBScaleOpt = 0;
end
%% Settings
filterType = 'highpass';
HPfilt_b = [1 - 0.97];
frameLength = round( 0.04 * fs );
if smoothOpt
frameShift = round( 0.002 * fs);
timeSmoothLen = 10;
quefSmoothLen = 10;
else
frameShift = round( 0.01 * fs);
end
halfLen = round( frameLength / 2 );
xLen = length( x );
frameLen = halfLen * 2 + 1;
NFFT = 2 ^ ( ceil ( log (frameLen) / log(2) ) );
quef = linspace( 0, frameLen / 1000, NFFT );
F0lim = [ 500, 50 ]; % Note that this differs from Hillenbrands
% settings of 60-300 Hz, to allow for a
% fuller range of potential F0 values
quefLim = round(fs ./ F0lim);
quefSeq = ( quefLim(1):quefLim(2) )';
time_samples = frameLength+1:frameShift:xLen-frameLength;
N = length(time_samples);
frameStart = time_samples-halfLen;
frameStop = time_samples+halfLen;
%% Apply filtering if requested
if strcmp( filterType, 'highpass' )
x = filter( HPfilt_b, 1, x );
end
%% Create frame matrix
frameMat = zeros( NFFT, N );
for n=1:N
frameMat(1:frameLen,n) = x( frameStart(n):frameStop(n) );
end
%% Apply Hann window function
win = hanning( frameLen);
winMat = repmat( win,1,N );
frameMat = frameMat(1:frameLen,:) .* winMat;
%% Compute magnitude spectrum
SpecMat = abs( fft( frameMat ) );
SpecdB = 10 * log10( SpecMat.^2 );
%% Compute log Cepstrum
if dBScaleOpt
Ceps = 10 * log10( abs( fft( SpecdB ) ).^2 );
else
Ceps = log( abs( fft( SpecdB ) ).^2 );
end
%% If selected smooth across time and quefrency
if smoothOpt
timeFilter_b = ones( 1, timeSmoothLen ) / timeSmoothLen;
quefFilter_b = ones( 1, quefSmoothLen ) / quefSmoothLen;
Ceps = filter( timeFilter_b, 1, Ceps' )';
Ceps = filter( quefFilter_b, 1, Ceps );
end
%% Take quefrency range and compute max
CepsLim = Ceps( quefSeq,: );
[CepsMax,maxIdx] = max( CepsLim, [], 1 );
%% Do normalisation
CepsNorm = zeros( 1, N );
if strcmp( normOpt, 'line' )
for n=1:N
p = polyfit( quefSeq, CepsLim(:,n), 1 );
CepsNorm(n) = polyval( p, quefSeq( maxIdx(n) ) );
end
elseif strcmp( normOpt, 'nonorm' )==0
CepsNorm = mean( CepsLim, 1 );
end
CPP = CepsMax - CepsNorm;
CPP = [CPP(:) time_samples(:)];