Start updating README

JohnVinyard · Sep 4, 2017 · 0f1a869 · 0f1a869
1 parent 944b8c8
commit 0f1a869
Show file tree

Hide file tree

Showing 2 changed files with 157 additions and 73 deletions.
diff --git a/README.md b/README.md
@@ -3,97 +3,102 @@
 [![PyPI](https://img.shields.io/pypi/v/zounds.svg)](https://pypi.python.org/pypi/zounds)
 [![Docs](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat&maxAge=86400)](http://zounds.readthedocs.io/en/latest/?badge=latest)
 
-# Usage
-Zounds is a dataflow library for building directed acyclic graphs that transform audio. It uses the 
-[featureflow](https://github.com/JohnVinyard/featureflow) library to define the processing pipelines.
-
+# Motivation
 
-For example, here's the definition of a pipeline that computes a sliding short-time fourier transform of some audio, 
-and then computes spectrograms on the bark and chroma scales.
-
-```python
-import featureflow as ff
-import zounds
-
-windowing = zounds.HalfLapped()
-samplerate = zounds.SR44100()
+Zounds is a python library for working with sound.  Its primary goals are to:
+    - layer semantically meaningful audio manipulations on top of numpy arrays
+    - help to organize the definition and persistence of audio processing
+      pipelines and machine learning experiments with sound
 
+Audio processing graphs and machine learning pipelines are built using
+[featureflow](https://github.com/JohnVinyard/featureflow).
 
-class Settings(ff.PersistenceSettings):
-    id_provider = ff.UuidProvider()
-    key_builder = ff.StringDelimitedKeyBuilder()
-    database = ff.FileSystemDatabase(path='data', key_builder=key_builder)
+# A Quick Example
 
+```python
+import zounds
 
-class AudioGraph(ff.BaseModel):
-
-    meta = ff.JSONFeature(
-        zounds.MetaData,
-        encoder=zounds.AudioMetaDataEncoder,
-        store=True)
-
-    raw = ff.ByteStreamFeature(
-        ff.ByteStream,
-        chunksize=2 * 44100 * 30 * 2,
-        needs=meta,
-        store=False)
-
-    ogg = zounds.OggVorbisFeature(
-        zounds.OggVorbis,
-        needs=raw,
-        store=True)
+Resampled = zounds.resampled(resample_to=zounds.SR11025())
 
-    pcm = zounds.ConstantRateTimeSeriesFeature(
-        zounds.AudioStream,
-        needs=raw,
-        store=False)
 
-    resampled = zounds.ConstantRateTimeSeriesFeature(
-        zounds.Resampler,
-        needs=pcm,
-        samplerate=samplerate,
-        store=False)
+@zounds.simple_in_memory_settings
+class Sound(Resampled):
+    """
+    A simple pipeline that computes a perceptually weighted modified discrete
+    cosine transform, and "persists" feature data in an in-memory store.
+    """
 
-    windowed = zounds.ConstantRateTimeSeriesFeature(
+    windowed = zounds.ArrayWithUnitsFeature(
         zounds.SlidingWindow,
-        needs=resampled,
+        needs=Resampled.resampled,
         wscheme=zounds.HalfLapped(),
         wfunc=zounds.OggVorbisWindowingFunc(),
-        store=False)
+        store=True)
 
-    fft = zounds.ConstantRateTimeSeriesFeature(
-        zounds.FFT,
+    mdct = zounds.ArrayWithUnitsFeature(
+        zounds.MDCT,
         needs=windowed,
         store=False)
 
-    bark = zounds.ConstantRateTimeSeriesFeature(
-        zounds.BarkBands,
-        needs=fft,
-        store=True)
-
-    chroma = zounds.ConstantRateTimeSeriesFeature(
-        zounds.Chroma,
-        needs=fft,
-        store=True)
-
-    bfcc = zounds.ConstantRateTimeSeriesFeature(
-        zounds.BFCC,
-        needs=fft,
-        store=True)
-
+    weighted = zounds.ArrayWithUnitsFeature(
+        zounds.FrequencyWeighting,
+        weighting=zounds.AWeighting(),
+        needs=mdct,
+        store=False)
 
-class Document(AudioGraph, Settings):
-    pass
+if __name__ == '__main__':
+
+    # produce some audio to test our pipeline
+    synth = zounds.SineSynthesizer(zounds.SR44100())
+    samples = synth.synthesize(zounds.Seconds(5), [220., 440., 880.])
+
+    # process the audio, and fetch features from our in-memory store
+    _id = Sound.process(meta=samples.encode())
+    sound = Sound(_id)
+
+    # produce a time slice that starts half a second in, and lasts for two
+    # seconds
+    time_slice = zounds.TimeSlice(
+        start=zounds.Milliseconds(500),
+        duration=zounds.Seconds(2))
+    # grab all the frequency information, for a subset of the duration
+    snippet = sound.weighted[time_slice, :]
+
+    # produce a frequency slice that spans 400hz-500hz
+    freq_band = zounds.FrequencyBand(400, 500)
+    # grab a subset of frequency information for the duration of the sound
+    a440 = sound.mdct[:, freq_band]
+
+    # produce a new set of coefficients where only the 440hz sine wave is
+    # present
+    filtered = sound.mdct.copy()
+    filtered[:] = 0
+    filtered[:, freq_band] = a440
+
+    # apply a geometric scale, which more closely matches human pitch
+    # perception, and apply it to the linear frequency axis
+    scale = zounds.GeometricScale(50, 4000, 0.05, 100)
+    bands = [sound.weighted[:, band] for band in scale]
+    band_sizes = [band.shape[1] for band in bands]
+
+    # reconstruct audio from the MDCT coefficients
+    mdct_synth = zounds.MDCTSynthesizer()
+    reconstructed = mdct_synth.synthesize(sound.mdct)
+    filtered_reconstruction = mdct_synth.synthesize(filtered)
+
+    # start an in-browser REPL that will allow you to listen to and visualize
+    # the variables defined above (and any new ones you create in the session)
+    app = zounds.ZoundsApp(
+        model=Sound,
+        audio_feature=Sound.ogg,
+        visualization_feature=Sound.weighted,
+        globals=globals(),
+        locals=locals())
+    app.start(8888)
 ```
 
-Data can be processed, and later retrieved as follows:
-
-```python
->>> _id = doc = Document.process(meta='https://example.com/audio.wav')
->>> doc = Document(_id)
->>> doc.chroma.shape
-(321, 12)
-```
+Find more inspiration in the [examples folder](https://github.com/JohnVinyard/zounds/tree/master/examples),
+or on the [blog](http://johnvinyard.github.io/).
 
 # Installation
 

diff --git a/examples/demo.py b/examples/demo.py
@@ -0,0 +1,79 @@
+import zounds
+
+Resampled = zounds.resampled(resample_to=zounds.SR11025())
+
+
+@zounds.simple_in_memory_settings
+class Sound(Resampled):
+    """
+    A simple pipeline that computes a perceptually weighted modified discrete
+    cosine transform, and "persists" feature data in an in-memory store.
+    """
+
+    windowed = zounds.ArrayWithUnitsFeature(
+        zounds.SlidingWindow,
+        needs=Resampled.resampled,
+        wscheme=zounds.HalfLapped(),
+        wfunc=zounds.OggVorbisWindowingFunc(),
+        store=True)
+
+    mdct = zounds.ArrayWithUnitsFeature(
+        zounds.MDCT,
+        needs=windowed,
+        store=False)
+
+    weighted = zounds.ArrayWithUnitsFeature(
+        zounds.FrequencyWeighting,
+        weighting=zounds.AWeighting(),
+        needs=mdct,
+        store=False)
+
+if __name__ == '__main__':
+
+    # produce some audio to test our pipeline
+    synth = zounds.SineSynthesizer(zounds.SR44100())
+    samples = synth.synthesize(zounds.Seconds(5), [220., 440., 880.])
+
+    # process the audio, and fetch features from our in-memory store
+    _id = Sound.process(meta=samples.encode())
+    sound = Sound(_id)
+
+    # produce a time slice that starts half a second in, and lasts for two
+    # seconds
+    time_slice = zounds.TimeSlice(
+        start=zounds.Milliseconds(500),
+        duration=zounds.Seconds(2))
+    # grab all the frequency information, for a subset of the duration
+    snippet = sound.weighted[time_slice, :]
+
+    # produce a frequency slice that spans 400hz-500hz
+    freq_band = zounds.FrequencyBand(400, 500)
+    # grab a subset of frequency information for the duration of the sound
+    a440 = sound.mdct[:, freq_band]
+
+    # produce a new set of coefficients where only the 440hz sine wave is
+    # present
+    filtered = sound.mdct.copy()
+    filtered[:] = 0
+    filtered[:, freq_band] = a440
+
+    # apply a geometric scale, which more closely matches human pitch
+    # perception, and apply it to the linear frequency axis
+    scale = zounds.GeometricScale(50, 4000, 0.05, 100)
+    bands = [sound.weighted[:, band] for band in scale]
+    band_sizes = [band.shape[1] for band in bands]
+
+    # reconstruct audio from the MDCT coefficients
+    mdct_synth = zounds.MDCTSynthesizer()
+    reconstructed = mdct_synth.synthesize(sound.mdct)
+    filtered_reconstruction = mdct_synth.synthesize(filtered)
+
+    # start an in-browser REPL that will allow you to listen to and visualize
+    # the variables defined above (and any new ones you create in the session)
+    app = zounds.ZoundsApp(
+        model=Sound,
+        audio_feature=Sound.ogg,
+        visualization_feature=Sound.weighted,
+        globals=globals(),
+        locals=locals())
+    app.start(8888)