Finish documentation for the soundfile module

JohnVinyard · Sep 4, 2017 · 955254c · 955254c
1 parent e216c89
commit 955254c
Show file tree

Hide file tree

Showing 8 changed files with 264 additions and 131 deletions.
diff --git a/zounds/__init__.py b/zounds/__init__.py
@@ -10,9 +10,7 @@
 from soundfile import \
     MetaData, AudioMetaDataEncoder, FreesoundOrgConfig, \
     OggVorbis, OggVorbisDecoder, OggVorbisEncoder, OggVorbisFeature, \
-    OggVorbisWrapper, \
-    AudioStream, \
-    Resampler
+    OggVorbisWrapper, AudioStream, Resampler, ChunkSizeBytes
 
 from spectral import \
     SlidingWindow, OggVorbisWindowingFunc, WindowingFunc, \

diff --git a/zounds/basic/audiograph.py b/zounds/basic/audiograph.py
@@ -2,18 +2,24 @@
 from featureflow import BaseModel, JSONFeature, ByteStream, ByteStreamFeature
 from zounds.soundfile import \
     MetaData, AudioMetaDataEncoder, OggVorbis, OggVorbisFeature, AudioStream, \
-    Resampler
+    Resampler, ChunkSizeBytes
 from zounds.segment import \
     ComplexDomain, MovingAveragePeakPicker, TimeSliceFeature
 from zounds.persistence import ArrayWithUnitsFeature, AudioSamplesFeature
-from zounds.timeseries import SR44100, HalfLapped, Stride
+from zounds.timeseries import SR44100, HalfLapped, Stride, Seconds
 from zounds.spectral import \
     SlidingWindow, OggVorbisWindowingFunc, FFT, BarkBands, SpectralCentroid, \
     Chroma, BFCC, DCT
 
+DEFAULT_CHUNK_SIZE = ChunkSizeBytes(
+    samplerate=SR44100(),
+    duration=Seconds(30),
+    bit_depth=16,
+    channels=2)
+
 
 def resampled(
-        chunksize_bytes=2 * 44100 * 30 * 2,
+        chunksize_bytes=DEFAULT_CHUNK_SIZE,
         resample_to=SR44100(),
         store_resampled=False):
     """
@@ -26,88 +32,89 @@ def resampled(
     :param resample_to: The new, normalized sampling rate
     :return: A simple processing pipeline
     """
+
     class Resampled(BaseModel):
         meta = JSONFeature(
-                MetaData,
-                store=True,
-                encoder=AudioMetaDataEncoder)
+            MetaData,
+            store=True,
+            encoder=AudioMetaDataEncoder)
 
         raw = ByteStreamFeature(
-                ByteStream,
-                chunksize=chunksize_bytes,
-                needs=meta,
-                store=False)
+            ByteStream,
+            chunksize=chunksize_bytes,
+            needs=meta,
+            store=False)
 
         ogg = OggVorbisFeature(
-                OggVorbis,
-                needs=raw,
-                store=True)
+            OggVorbis,
+            needs=raw,
+            store=True)
 
         pcm = AudioSamplesFeature(
-                AudioStream,
-                needs=raw,
-                store=False)
+            AudioStream,
+            needs=raw,
+            store=False)
 
         resampled = AudioSamplesFeature(
-                Resampler,
-                needs=pcm,
-                samplerate=resample_to,
-                store=store_resampled)
+            Resampler,
+            needs=pcm,
+            samplerate=resample_to,
+            store=store_resampled)
 
     return Resampled
 
 
 def stft(
-        chunksize_bytes=2 * 44100 * 30 * 2,
+        chunksize_bytes=DEFAULT_CHUNK_SIZE,
         resample_to=SR44100(),
         wscheme=HalfLapped(),
         store_fft=False,
         store_windowed=False):
     class ShortTimeFourierTransform(BaseModel):
         meta = JSONFeature(
-                MetaData,
-                store=True,
-                encoder=AudioMetaDataEncoder)
+            MetaData,
+            store=True,
+            encoder=AudioMetaDataEncoder)
 
         raw = ByteStreamFeature(
-                ByteStream,
-                chunksize=chunksize_bytes,
-                needs=meta,
-                store=False)
+            ByteStream,
+            chunksize=chunksize_bytes,
+            needs=meta,
+            store=False)
 
         ogg = OggVorbisFeature(
-                OggVorbis,
-                needs=raw,
-                store=True)
+            OggVorbis,
+            needs=raw,
+            store=True)
 
         pcm = AudioSamplesFeature(
-                AudioStream,
-                needs=raw,
-                store=False)
+            AudioStream,
+            needs=raw,
+            store=False)
 
         resampled = AudioSamplesFeature(
-                Resampler,
-                needs=pcm,
-                samplerate=resample_to,
-                store=False)
+            Resampler,
+            needs=pcm,
+            samplerate=resample_to,
+            store=False)
 
         windowed = ArrayWithUnitsFeature(
-                SlidingWindow,
-                needs=resampled,
-                wscheme=wscheme,
-                wfunc=OggVorbisWindowingFunc(),
-                store=store_windowed)
+            SlidingWindow,
+            needs=resampled,
+            wscheme=wscheme,
+            wfunc=OggVorbisWindowingFunc(),
+            store=store_windowed)
 
         fft = ArrayWithUnitsFeature(
-                FFT,
-                needs=windowed,
-                store=store_fft)
+            FFT,
+            needs=windowed,
+            store=store_fft)
 
     return ShortTimeFourierTransform
 
 
 def audio_graph(
-        chunksize_bytes=2 * 44100 * 30 * 2,
+        chunksize_bytes=DEFAULT_CHUNK_SIZE,
         resample_to=SR44100(),
         freesound_api_key=None,
         store_fft=False):
@@ -121,70 +128,70 @@ def audio_graph(
 
     class AudioGraph(BaseModel):
         meta = JSONFeature(
-                MetaData,
-                store=True,
-                encoder=AudioMetaDataEncoder)
+            MetaData,
+            store=True,
+            encoder=AudioMetaDataEncoder)
 
         raw = ByteStreamFeature(
-                ByteStream,
-                chunksize=chunksize_bytes,
-                needs=meta,
-                store=False)
+            ByteStream,
+            chunksize=chunksize_bytes,
+            needs=meta,
+            store=False)
 
         ogg = OggVorbisFeature(
-                OggVorbis,
-                needs=raw,
-                store=True)
+            OggVorbis,
+            needs=raw,
+            store=True)
 
         pcm = AudioSamplesFeature(
-                AudioStream,
-                needs=raw,
-                store=False)
+            AudioStream,
+            needs=raw,
+            store=False)
 
         resampled = AudioSamplesFeature(
-                Resampler,
-                needs=pcm,
-                samplerate=resample_to,
-                store=False)
+            Resampler,
+            needs=pcm,
+            samplerate=resample_to,
+            store=False)
 
         windowed = ArrayWithUnitsFeature(
-                SlidingWindow,
-                needs=resampled,
-                wscheme=HalfLapped(),
-                wfunc=OggVorbisWindowingFunc(),
-                store=False)
+            SlidingWindow,
+            needs=resampled,
+            wscheme=HalfLapped(),
+            wfunc=OggVorbisWindowingFunc(),
+            store=False)
 
         dct = ArrayWithUnitsFeature(
-                DCT,
-                needs=windowed,
-                store=True)
+            DCT,
+            needs=windowed,
+            store=True)
 
         fft = ArrayWithUnitsFeature(
-                FFT,
-                needs=windowed,
-                store=store_fft)
+            FFT,
+            needs=windowed,
+            store=store_fft)
 
         bark = ArrayWithUnitsFeature(
-                BarkBands,
-                needs=fft,
-                samplerate=resample_to,
-                store=True)
+            BarkBands,
+            needs=fft,
+            samplerate=resample_to,
+            store=True)
 
         centroid = ArrayWithUnitsFeature(
-                SpectralCentroid,
-                needs=bark,
-                store=True)
+            SpectralCentroid,
+            needs=bark,
+            store=True)
 
         chroma = ArrayWithUnitsFeature(
-                Chroma,
-                needs=fft,
-                samplerate=resample_to,
-                store=True)
+            Chroma,
+            needs=fft,
+            samplerate=resample_to,
+            store=True)
 
         bfcc = ArrayWithUnitsFeature(
-                BFCC,
-                needs=fft,
-                store=True)
+            BFCC,
+            needs=fft,
+            store=True)
 
     return AudioGraph
 
@@ -198,27 +205,27 @@ def with_onsets(fft_feature):
 
     class Onsets(BaseModel):
         onset_prep = ArrayWithUnitsFeature(
-                SlidingWindow,
-                needs=fft_feature,
-                wscheme=HalfLapped() * Stride(frequency=1, duration=3),
-                store=False)
+            SlidingWindow,
+            needs=fft_feature,
+            wscheme=HalfLapped() * Stride(frequency=1, duration=3),
+            store=False)
 
         complex_domain = ArrayWithUnitsFeature(
-                ComplexDomain,
-                needs=onset_prep,
-                store=False)
+            ComplexDomain,
+            needs=onset_prep,
+            store=False)
 
         sliding_detection = ArrayWithUnitsFeature(
-                SlidingWindow,
-                needs=complex_domain,
-                wscheme=HalfLapped() * Stride(frequency=1, duration=11),
-                padwith=5,
-                store=False)
+            SlidingWindow,
+            needs=complex_domain,
+            wscheme=HalfLapped() * Stride(frequency=1, duration=11),
+            padwith=5,
+            store=False)
 
         slices = TimeSliceFeature(
-                MovingAveragePeakPicker,
-                needs=sliding_detection,
-                aggregate=np.median,
-                store=True)
+            MovingAveragePeakPicker,
+            needs=sliding_detection,
+            aggregate=np.median,
+            store=True)
 
     return Onsets
diff --git a/zounds/soundfile/__init__.py b/zounds/soundfile/__init__.py
@@ -12,3 +12,5 @@
 from audiostream import AudioStream
 
 from resample import Resampler
+
+from chunksize import ChunkSizeBytes
diff --git a/zounds/soundfile/audiostream.py b/zounds/soundfile/audiostream.py
@@ -27,6 +27,11 @@ class AudioStream(Node):
         import featureflow as ff
         import zounds
 
+        chunksize = zounds.ChunkSizeBytes(
+            samplerate=zounds.SR44100(),
+            duration=zounds.Seconds(30),
+            bit_depth=16,
+            channels=2)
 
         @zounds.simple_in_memory_settings
         class Document(ff.BaseModel):
@@ -37,7 +42,7 @@ class Document(ff.BaseModel):
 
             raw = ff.ByteStreamFeature(
                 ff.ByteStream,
-                chunksize=2 * 44100 * 30 * 2,
+                chunksize=chunksize,
                 needs=meta,
                 store=False)
 

diff --git a/zounds/soundfile/chunksize.py b/zounds/soundfile/chunksize.py
@@ -0,0 +1,41 @@
+
+class ChunkSizeBytes(object):
+    """
+    A convenience class to help describe a chunksize in bytes for the
+    :class:`featureflow.ByteStream` in terms of audio sample batch sizes.
+
+    Args:
+        samplerate (SampleRate): The samples-per-second factor
+        duration (numpy.timedelta64): The length of desired chunks in seconds
+        channels (int): Then audio channels factor
+        bit_depth (int): The bit depth factor
+
+    Examples:
+        >>> from zounds import ChunkSizeBytes, Seconds, SR44100
+        >>> chunksize = ChunkSizeBytes(SR44100(), Seconds(30))
+        >>> chunksize
+        ChunkSizeBytes(samplerate=SR44100(f=2.2675736e-05, d=2.2675736e-05)...
+        >>> int(chunksize)
+        5292000
+
+    """
+    def __init__(self, samplerate, duration, channels=2, bit_depth=16):
+        self.duration = duration
+        self.bit_depth = bit_depth
+        self.channels = channels
+        self.samplerate = samplerate
+
+    def __int__(self):
+        byte_depth = self.bit_depth // 8
+        total_samples = int(self.duration / self.samplerate.frequency)
+        return int(total_samples * byte_depth * self.channels)
+
+    def __repr__(self):
+        msg = 'ChunkSizeBytes(samplerate={samplerate}, duration={duration}, ' \
+              'channels={channels}, bit_depth={bit_depth})'
+
+        return msg.format(
+            samplerate=self.samplerate,
+            duration=str(self.duration),
+            channels=self.channels,
+            bit_depth=self.bit_depth)