Skip to content

Commit

Permalink
Finish documentation for the soundfile module
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnVinyard committed Sep 4, 2017
1 parent e216c89 commit 955254c
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 131 deletions.
4 changes: 1 addition & 3 deletions zounds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
from soundfile import \
MetaData, AudioMetaDataEncoder, FreesoundOrgConfig, \
OggVorbis, OggVorbisDecoder, OggVorbisEncoder, OggVorbisFeature, \
OggVorbisWrapper, \
AudioStream, \
Resampler
OggVorbisWrapper, AudioStream, Resampler, ChunkSizeBytes

from spectral import \
SlidingWindow, OggVorbisWindowingFunc, WindowingFunc, \
Expand Down
217 changes: 112 additions & 105 deletions zounds/basic/audiograph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,24 @@
from featureflow import BaseModel, JSONFeature, ByteStream, ByteStreamFeature
from zounds.soundfile import \
MetaData, AudioMetaDataEncoder, OggVorbis, OggVorbisFeature, AudioStream, \
Resampler
Resampler, ChunkSizeBytes
from zounds.segment import \
ComplexDomain, MovingAveragePeakPicker, TimeSliceFeature
from zounds.persistence import ArrayWithUnitsFeature, AudioSamplesFeature
from zounds.timeseries import SR44100, HalfLapped, Stride
from zounds.timeseries import SR44100, HalfLapped, Stride, Seconds
from zounds.spectral import \
SlidingWindow, OggVorbisWindowingFunc, FFT, BarkBands, SpectralCentroid, \
Chroma, BFCC, DCT

DEFAULT_CHUNK_SIZE = ChunkSizeBytes(
samplerate=SR44100(),
duration=Seconds(30),
bit_depth=16,
channels=2)


def resampled(
chunksize_bytes=2 * 44100 * 30 * 2,
chunksize_bytes=DEFAULT_CHUNK_SIZE,
resample_to=SR44100(),
store_resampled=False):
"""
Expand All @@ -26,88 +32,89 @@ def resampled(
:param resample_to: The new, normalized sampling rate
:return: A simple processing pipeline
"""

class Resampled(BaseModel):
meta = JSONFeature(
MetaData,
store=True,
encoder=AudioMetaDataEncoder)
MetaData,
store=True,
encoder=AudioMetaDataEncoder)

raw = ByteStreamFeature(
ByteStream,
chunksize=chunksize_bytes,
needs=meta,
store=False)
ByteStream,
chunksize=chunksize_bytes,
needs=meta,
store=False)

ogg = OggVorbisFeature(
OggVorbis,
needs=raw,
store=True)
OggVorbis,
needs=raw,
store=True)

pcm = AudioSamplesFeature(
AudioStream,
needs=raw,
store=False)
AudioStream,
needs=raw,
store=False)

resampled = AudioSamplesFeature(
Resampler,
needs=pcm,
samplerate=resample_to,
store=store_resampled)
Resampler,
needs=pcm,
samplerate=resample_to,
store=store_resampled)

return Resampled


def stft(
chunksize_bytes=2 * 44100 * 30 * 2,
chunksize_bytes=DEFAULT_CHUNK_SIZE,
resample_to=SR44100(),
wscheme=HalfLapped(),
store_fft=False,
store_windowed=False):
class ShortTimeFourierTransform(BaseModel):
meta = JSONFeature(
MetaData,
store=True,
encoder=AudioMetaDataEncoder)
MetaData,
store=True,
encoder=AudioMetaDataEncoder)

raw = ByteStreamFeature(
ByteStream,
chunksize=chunksize_bytes,
needs=meta,
store=False)
ByteStream,
chunksize=chunksize_bytes,
needs=meta,
store=False)

ogg = OggVorbisFeature(
OggVorbis,
needs=raw,
store=True)
OggVorbis,
needs=raw,
store=True)

pcm = AudioSamplesFeature(
AudioStream,
needs=raw,
store=False)
AudioStream,
needs=raw,
store=False)

resampled = AudioSamplesFeature(
Resampler,
needs=pcm,
samplerate=resample_to,
store=False)
Resampler,
needs=pcm,
samplerate=resample_to,
store=False)

windowed = ArrayWithUnitsFeature(
SlidingWindow,
needs=resampled,
wscheme=wscheme,
wfunc=OggVorbisWindowingFunc(),
store=store_windowed)
SlidingWindow,
needs=resampled,
wscheme=wscheme,
wfunc=OggVorbisWindowingFunc(),
store=store_windowed)

fft = ArrayWithUnitsFeature(
FFT,
needs=windowed,
store=store_fft)
FFT,
needs=windowed,
store=store_fft)

return ShortTimeFourierTransform


def audio_graph(
chunksize_bytes=2 * 44100 * 30 * 2,
chunksize_bytes=DEFAULT_CHUNK_SIZE,
resample_to=SR44100(),
freesound_api_key=None,
store_fft=False):
Expand All @@ -121,70 +128,70 @@ def audio_graph(

class AudioGraph(BaseModel):
meta = JSONFeature(
MetaData,
store=True,
encoder=AudioMetaDataEncoder)
MetaData,
store=True,
encoder=AudioMetaDataEncoder)

raw = ByteStreamFeature(
ByteStream,
chunksize=chunksize_bytes,
needs=meta,
store=False)
ByteStream,
chunksize=chunksize_bytes,
needs=meta,
store=False)

ogg = OggVorbisFeature(
OggVorbis,
needs=raw,
store=True)
OggVorbis,
needs=raw,
store=True)

pcm = AudioSamplesFeature(
AudioStream,
needs=raw,
store=False)
AudioStream,
needs=raw,
store=False)

resampled = AudioSamplesFeature(
Resampler,
needs=pcm,
samplerate=resample_to,
store=False)
Resampler,
needs=pcm,
samplerate=resample_to,
store=False)

windowed = ArrayWithUnitsFeature(
SlidingWindow,
needs=resampled,
wscheme=HalfLapped(),
wfunc=OggVorbisWindowingFunc(),
store=False)
SlidingWindow,
needs=resampled,
wscheme=HalfLapped(),
wfunc=OggVorbisWindowingFunc(),
store=False)

dct = ArrayWithUnitsFeature(
DCT,
needs=windowed,
store=True)
DCT,
needs=windowed,
store=True)

fft = ArrayWithUnitsFeature(
FFT,
needs=windowed,
store=store_fft)
FFT,
needs=windowed,
store=store_fft)

bark = ArrayWithUnitsFeature(
BarkBands,
needs=fft,
samplerate=resample_to,
store=True)
BarkBands,
needs=fft,
samplerate=resample_to,
store=True)

centroid = ArrayWithUnitsFeature(
SpectralCentroid,
needs=bark,
store=True)
SpectralCentroid,
needs=bark,
store=True)

chroma = ArrayWithUnitsFeature(
Chroma,
needs=fft,
samplerate=resample_to,
store=True)
Chroma,
needs=fft,
samplerate=resample_to,
store=True)

bfcc = ArrayWithUnitsFeature(
BFCC,
needs=fft,
store=True)
BFCC,
needs=fft,
store=True)

return AudioGraph

Expand All @@ -198,27 +205,27 @@ def with_onsets(fft_feature):

class Onsets(BaseModel):
onset_prep = ArrayWithUnitsFeature(
SlidingWindow,
needs=fft_feature,
wscheme=HalfLapped() * Stride(frequency=1, duration=3),
store=False)
SlidingWindow,
needs=fft_feature,
wscheme=HalfLapped() * Stride(frequency=1, duration=3),
store=False)

complex_domain = ArrayWithUnitsFeature(
ComplexDomain,
needs=onset_prep,
store=False)
ComplexDomain,
needs=onset_prep,
store=False)

sliding_detection = ArrayWithUnitsFeature(
SlidingWindow,
needs=complex_domain,
wscheme=HalfLapped() * Stride(frequency=1, duration=11),
padwith=5,
store=False)
SlidingWindow,
needs=complex_domain,
wscheme=HalfLapped() * Stride(frequency=1, duration=11),
padwith=5,
store=False)

slices = TimeSliceFeature(
MovingAveragePeakPicker,
needs=sliding_detection,
aggregate=np.median,
store=True)
MovingAveragePeakPicker,
needs=sliding_detection,
aggregate=np.median,
store=True)

return Onsets
2 changes: 2 additions & 0 deletions zounds/soundfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@
from audiostream import AudioStream

from resample import Resampler

from chunksize import ChunkSizeBytes
7 changes: 6 additions & 1 deletion zounds/soundfile/audiostream.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ class AudioStream(Node):
import featureflow as ff
import zounds
chunksize = zounds.ChunkSizeBytes(
samplerate=zounds.SR44100(),
duration=zounds.Seconds(30),
bit_depth=16,
channels=2)
@zounds.simple_in_memory_settings
class Document(ff.BaseModel):
Expand All @@ -37,7 +42,7 @@ class Document(ff.BaseModel):
raw = ff.ByteStreamFeature(
ff.ByteStream,
chunksize=2 * 44100 * 30 * 2,
chunksize=chunksize,
needs=meta,
store=False)
Expand Down
41 changes: 41 additions & 0 deletions zounds/soundfile/chunksize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

class ChunkSizeBytes(object):
"""
A convenience class to help describe a chunksize in bytes for the
:class:`featureflow.ByteStream` in terms of audio sample batch sizes.
Args:
samplerate (SampleRate): The samples-per-second factor
duration (numpy.timedelta64): The length of desired chunks in seconds
channels (int): Then audio channels factor
bit_depth (int): The bit depth factor
Examples:
>>> from zounds import ChunkSizeBytes, Seconds, SR44100
>>> chunksize = ChunkSizeBytes(SR44100(), Seconds(30))
>>> chunksize
ChunkSizeBytes(samplerate=SR44100(f=2.2675736e-05, d=2.2675736e-05)...
>>> int(chunksize)
5292000
"""
def __init__(self, samplerate, duration, channels=2, bit_depth=16):
self.duration = duration
self.bit_depth = bit_depth
self.channels = channels
self.samplerate = samplerate

def __int__(self):
byte_depth = self.bit_depth // 8
total_samples = int(self.duration / self.samplerate.frequency)
return int(total_samples * byte_depth * self.channels)

def __repr__(self):
msg = 'ChunkSizeBytes(samplerate={samplerate}, duration={duration}, ' \
'channels={channels}, bit_depth={bit_depth})'

return msg.format(
samplerate=self.samplerate,
duration=str(self.duration),
channels=self.channels,
bit_depth=self.bit_depth)

0 comments on commit 955254c

Please sign in to comment.