Add tests vs Matlab (#15)

* hann vs hanning * test findpeaks * floor indin * vocoder test wip * floor to round * add pvoc [skip ci]
JeffFessler · Mar 14, 2022 · 3dae645 · 3dae645 · JeffFessler · Mar 14, 2022
1 parent 5e45a01
commit 3dae645
Show file tree

Hide file tree

Showing 15 changed files with 779 additions and 6 deletions.
diff --git a/matlab/findPeaks4.m b/matlab/findPeaks4.m
@@ -0,0 +1,46 @@
+%{
+Matlab code downloaded from:
+https://sethares.engr.wisc.edu/vocoders/matlabphasevocoder.html
+%}
+
+function peaks = findPeaks4( Amp, MAX_PEAK, EPS_PEAK, SSF )
+
+%   This version modified from findPeaks.m by P. Moller-Nielson 
+%   28.3.03, pm-n. ( see http://www.daimi.au.dk/~pmn/sound/ )
+
+SPECTRUM_SIZE=length(Amp);
+peakAmp = ( Amp(3:SPECTRUM_SIZE-1) > Amp(2:SPECTRUM_SIZE-2) ) .* ...
+          ( Amp(3:SPECTRUM_SIZE-1) > Amp(4:SPECTRUM_SIZE) ) .* ...
+          Amp(3:SPECTRUM_SIZE-1);
+peakPos = zeros( MAX_PEAK, 1);
+
+maxAmp = max( peakAmp );
+nPeaks = 0;
+for p = 1 : MAX_PEAK
+  [m, b] = max( peakAmp );
+  if m <= ( EPS_PEAK * maxAmp )
+    break;
+  end;
+  peakPos(p) = b+2;
+  peakAmp(b) = 0;
+  nPeaks = p;
+end;
+
+peakPos = sort( peakPos );
+peaks = zeros( nPeaks, 3 );
+
+last_b = 1;
+for p = 1 : nPeaks
+  b = peakPos(MAX_PEAK-nPeaks+p);
+  first_b = last_b+1;
+  if p == nPeaks
+    last_b = SPECTRUM_SIZE;
+  else
+    next_b = peakPos(MAX_PEAK-nPeaks+p+1);
+    [dummy, rel_min] = min( Amp(b:next_b));
+    last_b = b+rel_min-1;
+  end;
+  peaks(p,1) = first_b;
+  peaks(p,2) = b;
+  peaks(p,3) = last_b;
+end;
diff --git a/matlab/findpeaks.jl b/matlab/findpeaks.jl
@@ -0,0 +1,22 @@
+# findpeaks.jl
+# Compare Matlab and Sound versions of findpeaks.
+
+using Sound: findpeaks
+using MATLAB
+using Test: @testset, @test
+
+ENV["MATLAB_ROOT"] = "/Applications/freeware/matlab"
+
+@testset "findpeaks" begin
+    N = 32
+    i = [7, 12, 25]
+    M = length(i)
+    x = zeros(N)
+    x[i] .= [2, 5, 3]
+    pj = findpeaks(x)
+
+    mat"pm = findPeaks4($x, 50, 0.005, 0)"
+    @mget pm
+    pm == Int.(pm)
+    @test pj == pm
+end
diff --git a/matlab/hann.jl b/matlab/hann.jl
@@ -0,0 +1,20 @@
+# hann.jl
+# Compare Matlab and Sound versions
+# Note that Matlab has "hann" and "hanning" and they differ!
+
+using Sound: hann
+using MATLAB
+using Test: @testset, @test
+
+ENV["MATLAB_ROOT"] = "/Applications/freeware/matlab"
+
+@testset "hann" begin
+    for n in [8, 9] # test odd and even
+        eval_string("h1 = hanning($n)")
+        eval_string("h2 = hann($n+2)")
+        @mget h1
+        @mget h2
+        @test [0; h1; 0] ≈ h2
+        @test h1 ≈ hann(n)
+    end
+end
diff --git a/matlab/pv.m b/matlab/pv.m
@@ -0,0 +1,110 @@
+%{
+Matlab code from
+https://sethares.engr.wisc.edu/vocoders/matlabphasevocoder.html
+modified by JF to work as a function rather than as a script.
+%}
+
+% This implements a phase vocoder for time-stretching/compression
+% Put the file name of the input file in "infile"
+%     the file to be created is "outfile"
+% The amount of time-stretch is determined by the ratio of the hopin
+% and hopout variables. For example, hopin=242 and hopout=161.3333
+% (integers are not required) increases the tempo by 
+% hopin/hopout = 1.5. To slow down a comparable amount,
+% choose hopin = 161.3333, hopout = 242.
+% 5/2005 Bill Sethares
+
+%{
+    clear; clf
+    infile='yoursong.wav';
+    outfile='yoursongchanged';
+    [y,sr]=wavread(infile);                    % read song file
+%}
+
+function output = pv(y, sr, hopin, hopout)
+    time=0;                                    % total time to process
+%   hopin=121;                                 % hop length for input 
+%   hopout=242;                                % hop length for output
+
+    all2pi=2*pi*(0:100);                       % all multiples of 2 pi (used in PV-style freq search) 
+    max_peak=50;                               % parameters for peak finding: number of peaks
+    eps_peak=0.005;                            % height of peaks
+    nfft=2^12; nfft2=nfft/2;                   % fft length 
+    win=hanning(nfft)';                        % windows and windowing variables   
+%   siz=wavread(infile,'size');                % length of song in samples
+    siz = size(y);                % length of song in samples
+    stmo=min(siz); leny=max(siz);              % stereo (stmo=2) or mono (stmo=1)
+    if siz(2)<siz(1), y=y'; end
+    if time==0, time = 100000; end
+    tt=zeros(stmo,ceil(hopout/hopin)*min(leny,time*sr)); % place for output 
+    lenseg=floor((min(leny,time*sr)-nfft)/hopin); % number of nfft segments to process
+
+    ssf=sr*(0:nfft2)/nfft;                     % frequency vector
+    phold=zeros(stmo,nfft2+1);
+    phadvance=zeros(stmo,nfft2+1);
+    outbeat=zeros(stmo,nfft);
+    dtin=hopin/sr;                             % time advances dt per hop for input
+    dtout=hopout/sr;                           % time advances dt per hop for output
+
+    for k=1:lenseg-1                           % main loop - process each beat separately
+        if k/1000==round(k/1000), disp(k), end % for display so I know where we are
+        indin=round(((k-1)*hopin+1):((k-1)*hopin+nfft));
+
+        for sk=1:stmo                          % do L/R channels separately
+            s=win.*y(sk,indin);                % get this frame and take FFT
+            ffts=fft(s);
+            mag=abs(ffts(1:nfft2+1)); 
+            ph=angle(ffts(1:nfft2+1));
+
+            % find peaks to define spectral mapping
+
+            peaks=findPeaks4(mag, max_peak, eps_peak, ssf);
+            [dummy,inds]=sort(mag(peaks(:,2)));
+            peaksort=peaks(inds,:);
+            pc=peaksort(:,2);
+
+            bestf=zeros(size(pc));
+            for tk=1:length(pc)                % estimate frequency using PV strategy  
+                dtheta=(ph(pc(tk))-phold(sk,pc(tk)))+all2pi;
+                fest=dtheta./(2*pi*dtin);      % see pvanalysis.m for same idea
+                [er,indf]=min(abs(ssf(pc(tk))-fest));
+                bestf(tk)=fest(indf);          % find best freq estimate for each row
+            end
+
+            % generate output mag and phase
+
+            magout=mag; phout=ph;
+            for tk=1:length(pc)
+                fdes=bestf(tk);                           % reconstruct with original frequency
+                freqind=(peaksort(tk,1):peaksort(tk,3));  % indices of the surrounding bins
+
+                % specify magnitude and phase of each partial
+                magout(freqind)=mag(freqind);
+                phadvance(sk,peaksort(tk,2))=phadvance(sk,peaksort(tk,2))+2*pi*fdes*dtout;
+                pizero=pi*ones(1,length(freqind));
+                pcent=peaksort(tk,2)-peaksort(tk,1)+1;
+                indpc=(2-mod(pcent,2)):2:length(freqind);
+                pizero(indpc)=zeros(1,length(indpc));
+                phout(freqind)=phadvance(sk,peaksort(tk,2))+pizero;
+            end
+
+            % reconstruct time signal (stretched or compressed)
+
+            compl=magout.*exp(sqrt(-1)*phout);
+            compl(nfft2+1)=ffts(nfft2+1);
+            compl=[compl,fliplr(conj(compl(2:(nfft2))))];
+            wave=real(ifft(compl));
+            outbeat(sk,:)=wave;
+            phold(sk,:)=ph; 
+
+        end % end stereo
+        indout=round(((k-1)*hopout+1):((k-1)*hopout+nfft));
+        tt(:,indout)=tt(:,indout)+outbeat;
+    end
+
+%   tt=0.8*tt/max(max(abs(tt)));
+    [rtt,ctt] = size(tt); if rtt==2, tt=tt'; end
+%   wavwrite(tt,sr,16,outfile);
+%   fclose('all');
+    output = tt;
+end
diff --git a/matlab/pvoc/istft.m b/matlab/pvoc/istft.m
@@ -0,0 +1,61 @@
+function x = istft(d, ftsize, w, h)
+% X = istft(D, F, W, H)                   Inverse short-time Fourier transform.
+%	Performs overlap-add resynthesis from the short-time Fourier transform 
+%	data in D.  Each column of D is taken as the result of an F-point 
+%	fft; each successive frame was offset by H points (default
+%	W/2, or F/2 if W==0). Data is hann-windowed at W pts, or 
+%       W = 0 gives a rectangular window (default); 
+%       W as a vector uses that as window.
+%       This version scales the output so the loop gain is 1.0 for
+%       either hann-win an-syn with 25% overlap, or hann-win on
+%       analysis and rect-win (W=0) on synthesis with 50% overlap.
+% dpwe 1994may24.  Uses built-in 'ifft' etc.
+% $Header: /home/empire6/dpwe/public_html/resources/matlab/pvoc/RCS/istft.m,v 1.5 2010/08/12 20:39:42 dpwe Exp $
+
+if nargin < 2; ftsize = 2*(size(d,1)-1); end
+if nargin < 3; w = 0; end
+if nargin < 4; h = 0; end  % will become winlen/2 later
+
+s = size(d);
+if s(1) ~= (ftsize/2)+1
+  error('number of rows should be fftsize/2+1')
+end
+cols = s(2);
+
+if length(w) == 1
+  if w == 0
+    % special case: rectangular window
+    win = ones(1,ftsize);
+  else
+    if rem(w, 2) == 0   % force window to be odd-len
+      w = w + 1;
+    end
+    halflen = (w-1)/2;
+    halff = ftsize/2;
+    halfwin = 0.5 * ( 1 + cos( pi * (0:halflen)/halflen));
+    win = zeros(1, ftsize);
+    acthalflen = min(halff, halflen);
+    win((halff+1):(halff+acthalflen)) = halfwin(1:acthalflen);
+    win((halff+1):-1:(halff-acthalflen+2)) = halfwin(1:acthalflen);
+    % 2009-01-06: Make stft-istft loop be identity for 25% hop
+    win = 2/3*win;
+  end
+else
+  win = w;
+end
+
+w = length(win);
+% now can set default hop
+if h == 0 
+  h = floor(w/2);
+end
+
+xlen = ftsize + (cols-1)*h;
+x = zeros(1,xlen);
+
+for b = 0:h:(h*(cols-1))
+  ft = d(:,1+b/h)';
+  ft = [ft, conj(ft([((ftsize/2)):-1:2]))];
+  px = real(ifft(ft));
+  x((b+1):(b+ftsize)) = x((b+1):(b+ftsize))+px.*win;
+end;
diff --git a/matlab/pvoc/pvoc.m b/matlab/pvoc/pvoc.m
@@ -0,0 +1,36 @@
+function y = pvoc(x, r, n)
+% y = pvoc(x, r, n)  Time-scale a signal to r times faster with phase vocoder
+%      x is an input sound. n is the FFT size, defaults to 1024.  
+%      Calculate the 25%-overlapped STFT, squeeze it by a factor of r, 
+%      inverse spegram.
+% 2000-12-05, 2002-02-13 dpwe@ee.columbia.edu.  Uses pvsample, stft, istft
+% $Header: /home/empire6/dpwe/public_html/resources/matlab/pvoc/RCS/pvoc.m,v 1.3 2011/02/08 21:08:39 dpwe Exp $
+
+if nargin < 3
+  n = 1024;
+end
+
+% With hann windowing on both input and output, 
+% we need 25% window overlap for smooth reconstruction
+hop = n/4;
+% Effect of hanns at both ends is a cumulated cos^2 window (for
+% r = 1 anyway); need to scale magnitudes by 2/3 for
+% identity input/output
+%scf = 2/3;
+% 2011-02-07: this factor is now included in istft.m
+scf = 1.0;
+
+% Calculate the basic STFT, magnitude scaled
+X = scf * stft(x', n, n, hop);
+
+% Calculate the new timebase samples
+[rows, cols] = size(X);
+t = 0:r:(cols-2);
+% Have to stay two cols off end because (a) counting from zero, and 
+% (b) need col n AND col n+1 to interpolate
+
+% Generate the new spectrogram
+X2 = pvsample(X, t, hop);
+
+% Invert to a waveform
+y = istft(X2, n, n, hop)';
diff --git a/matlab/pvoc/pvsample.m b/matlab/pvoc/pvsample.m
@@ -0,0 +1,62 @@
+function c = pvsample(b, t, hop)
+% c = pvsample(b, t, hop)   Interpolate an STFT array according to the 'phase vocoder'
+%     b is an STFT array, of the form generated by 'specgram'.
+%     t is a vector of (real) time-samples, which specifies a path through 
+%     the time-base defined by the columns of b.  For each value of t, 
+%     the spectral magnitudes in the columns of b are interpolated, and 
+%     the phase difference between the successive columns of b is 
+%     calculated; a new column is created in the output array c that 
+%     preserves this per-step phase advance in each bin.
+%     hop is the STFT hop size, defaults to N/2, where N is the FFT size
+%     and b has N/2+1 rows.  hop is needed to calculate the 'null' phase 
+%     advance expected in each bin.
+%     Note: t is defined relative to a zero origin, so 0.1 is 90% of 
+%     the first column of b, plus 10% of the second.
+% 2000-12-05 dpwe@ee.columbia.edu
+% $Header: /homes/dpwe/public_html/resources/matlab/dtw/../RCS/pvsample.m,v 1.3 2003/04/09 03:17:10 dpwe Exp $
+
+if nargin < 3
+  hop = 0;
+end
+
+[rows,cols] = size(b);
+
+N = 2*(rows-1);
+
+if hop == 0
+  % default value
+  hop = N/2;
+end
+
+% Empty output array
+c = zeros(rows, length(t));
+
+% Expected phase advance in each bin
+dphi = zeros(1,N/2+1);
+dphi(2:(1 + N/2)) = (2*pi*hop)./(N./(1:(N/2)));
+
+% Phase accumulator
+% Preset to phase of first frame for perfect reconstruction
+% in case of 1:1 time scaling
+ph = angle(b(:,1));
+
+% Append a 'safety' column on to the end of b to avoid problems 
+% taking *exactly* the last frame (i.e. 1*b(:,cols)+0*b(:,cols+1))
+b = [b,zeros(rows,1)];
+
+ocol = 1;
+for tt = t
+  % Grab the two columns of b
+  bcols = b(:,floor(tt)+[1 2]);
+  tf = tt - floor(tt);
+  bmag = (1-tf)*abs(bcols(:,1)) + tf*(abs(bcols(:,2)));
+  % calculate phase advance
+  dp = angle(bcols(:,2)) - angle(bcols(:,1)) - dphi';
+  % Reduce to -pi:pi range
+  dp = dp - 2 * pi * round(dp/(2*pi));
+  % Save the column
+  c(:,ocol) = bmag .* exp(j*ph);
+  % Cumulate phase, ready for next frame
+  ph = ph + dphi' + dp;
+  ocol = ocol+1;
+end