Skip to content

Commit

Permalink
Merge pull request #53 from SuperKogito/docs-cleanups
Browse files Browse the repository at this point in the history
clean up docs and fix formatting
  • Loading branch information
SuperKogito committed Dec 30, 2022
2 parents 378d194 + 84eab23 commit 3b9b51d
Show file tree
Hide file tree
Showing 16 changed files with 101 additions and 68 deletions.
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,16 @@ cd spafe
python setup.py install
```

## Why use Spafe?
## Why use Spafe?

Unlike most existing audio feature extraction libraries ([python_speech_features](https://github.com/jameslyons/python_speech_features), [SpeechPy](https://github.com/astorfi/speechpy), [surfboard](https://github.com/novoic/surfboard) and [Bob](https://gitlab.idiap.ch/bob)), Spafe provides more options for spectral features, notably, Bark Frequency Cepstral Coefficients (BFCCs), Constant Q-transform Cepstral Coefficients (CQCCs), Gammatone Frequency Cepstral Coefficients (GFCCs), Power-Normalized Cepstral Coefficients (PNCCs), Phase based Spectral Root Cepstral Coefficients (PSRCCs) extraction algorithms.
Most existing libraries and to their credits provide great implementations for features extraction but are unfortunately limited to the Mel Frequency Features (e.g. MFCC) and at best have Bark frequency and linear predictive coefficients additionally. [Librosa](https://github.com/librosa/librosa) for example includes great implementation of various algorithms (only MFCC and LPC are included), based on the Short Time Fourrier Transform (STFT), which is theoretically more accurate but slower than the Discret Fourrier Transform used in Spafe's implementation.
Unlike most existing audio feature extraction libraries ([python_speech_features](https://github.com/jameslyons/python_speech_features), [SpeechPy](https://github.com/astorfi/speechpy), [surfboard](https://github.com/novoic/surfboard) and [Bob](https://gitlab.idiap.ch/bob)), Spafe provides more options for spectral features extraction algorithms, notably:
- Bark Frequency Cepstral Coefficients (BFCCs)
- Constant Q-transform Cepstral Coefficients (CQCCs)
- Gammatone Frequency Cepstral Coefficients (GFCCs)
- Power-Normalized Cepstral Coefficients (PNCCs)
- Phase based Spectral Root Cepstral Coefficients (PSRCCs)

Most existing libraries and to their credits provide great implementations for features extraction but are unfortunately limited to the Mel Frequency Features (MFCC) and at best have Bark frequency and linear predictive coefficients additionally. [Librosa](https://github.com/librosa/librosa) for example includes great implementation of various algorithms (only MFCC and LPC are included), based on the **Short Time Fourrier Transform (STFT)**, which is theoretically more accurate but slower than the **Discret Fourrier Transform used in Spafe**'s implementation.


## How to use
Expand Down
2 changes: 1 addition & 1 deletion spafe/fbanks/gammatone_fbanks.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def gammatone_filter_banks(
Returns:
(tuple) :
- (numpy.ndarray) : array of size nfilts * (nfft/2 + 1) containing filter bank. Each row holds 1 filter.
- (numpy.ndarray) : array of center frequencies
- (numpy.ndarray) : array of center frequencies in Erb.
Tip:
- :code:`scale` : can take the following options ["constant", "ascendant", "descendant"].
Expand Down
11 changes: 6 additions & 5 deletions spafe/features/bfcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def bark_spectrogram(
fs: int = 16000,
pre_emph: float = 0,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down Expand Up @@ -160,10 +160,12 @@ def bark_spectrogram(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand All @@ -186,7 +188,7 @@ def bfcc(
num_ceps: int = 13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 26,
nfft: int = 512,
low_freq: float = 0,
Expand Down Expand Up @@ -292,7 +294,6 @@ def bfcc(
if nfilts < num_ceps:
raise ParameterError(ErrorMsgs["nfilts"])


# compute features
features, fourrier_transform = bark_spectrogram(
sig=sig,
Expand Down
10 changes: 6 additions & 4 deletions spafe/features/cqcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def cqt_spectrogram(
fs: int = 16000,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfft: int = 512,
low_freq: float = 0,
high_freq: Optional[float] = None,
Expand Down Expand Up @@ -123,10 +123,12 @@ def cqt_spectrogram(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand Down Expand Up @@ -155,7 +157,7 @@ def cqcc(
num_ceps: int = 13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfft: int = 512,
low_freq: float = 0,
high_freq: Optional[float] = None,
Expand Down
10 changes: 6 additions & 4 deletions spafe/features/gfcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def erb_spectrogram(
fs: int = 16000,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down Expand Up @@ -138,10 +138,12 @@ def erb_spectrogram(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand All @@ -165,7 +167,7 @@ def gfcc(
num_ceps: int = 13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down
10 changes: 6 additions & 4 deletions spafe/features/lfcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def linear_spectrogram(
fs: int = 16000,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down Expand Up @@ -130,10 +130,12 @@ def linear_spectrogram(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand All @@ -157,7 +159,7 @@ def lfcc(
num_ceps=13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down
16 changes: 10 additions & 6 deletions spafe/features/lpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def lpc(
order=13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
):
"""
Compute the Linear prediction coefficents (LPC) from an audio signal.
Expand Down Expand Up @@ -156,10 +156,12 @@ def lpc(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand Down Expand Up @@ -227,7 +229,7 @@ def lpcc(
order=13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
lifter: Optional[int] = None,
normalize: Optional[NormalizationType] = None,
) -> np.ndarray:
Expand Down Expand Up @@ -294,10 +296,12 @@ def lpcc(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand Down
18 changes: 11 additions & 7 deletions spafe/features/mfcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def mel_spectrogram(
fs: int = 16000,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down Expand Up @@ -139,10 +139,12 @@ def mel_spectrogram(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand All @@ -166,7 +168,7 @@ def mfcc(
num_ceps: int = 13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand All @@ -182,9 +184,11 @@ def mfcc(
"""
Compute MFCC features (Mel-frequency cepstral coefficients) from an audio
signal. This function offers multiple approaches to features extraction
depending on the input parameters. Implemenation is using FFT and based on
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.63.8029&rep=rep1&type=pdf
depending on the input parameters. This MFCC implemenation is using FFT and
can summarized in the following:
- pre-empahsis
- framing + Windowing
- take the absolute value of the FFT
- warp to a Mel frequency scale
- take the DCT of the log-Mel-spectrum
Expand Down Expand Up @@ -324,7 +328,7 @@ def imfcc(
num_ceps=13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down
2 changes: 1 addition & 1 deletion spafe/features/msrcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def msrcc(
num_ceps=13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: float = 0,
Expand Down
8 changes: 5 additions & 3 deletions spafe/features/ngcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def ngcc(
num_ceps=13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: Optional[float] = None,
Expand Down Expand Up @@ -154,10 +154,12 @@ def ngcc(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand Down
16 changes: 9 additions & 7 deletions spafe/features/pncc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
from ..utils.exceptions import ParameterError, ErrorMsgs
from ..utils.filters import ScaleType
from ..utils.preprocessing import (
pre_emphasis,
framing,
windowing,
pre_emphasis,
framing,
windowing,
SlidingWindow,
)

Expand Down Expand Up @@ -287,7 +287,7 @@ def pncc(
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
power=2,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 24,
nfft: int = 512,
low_freq: Optional[float] = None,
Expand Down Expand Up @@ -412,10 +412,12 @@ def pncc(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand Down
8 changes: 5 additions & 3 deletions spafe/features/psrcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def psrcc(
num_ceps: int = 13,
pre_emph: bool = True,
pre_emph_coeff: float = 0.97,
window : Optional[SlidingWindow] = None,
window: Optional[SlidingWindow] = None,
nfilts: int = 26,
nfft: int = 512,
low_freq: Optional[float] = None,
Expand Down Expand Up @@ -152,10 +152,12 @@ def psrcc(

# init window
if window is None:
window = SlidingWindow()
window = SlidingWindow()

# -> framing
frames, frame_length = framing(sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop)
frames, frame_length = framing(
sig=sig, fs=fs, win_len=window.win_len, win_hop=window.win_hop
)

# -> windowing
windows = windowing(frames=frames, frame_len=frame_length, win_type=window.win_type)
Expand Down

0 comments on commit 3b9b51d

Please sign in to comment.