Skip to content

Commit

Permalink
Start updating README
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnVinyard committed Sep 4, 2017
1 parent 944b8c8 commit 0f1a869
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 73 deletions.
151 changes: 78 additions & 73 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,97 +3,102 @@
[![PyPI](https://img.shields.io/pypi/v/zounds.svg)](https://pypi.python.org/pypi/zounds)
[![Docs](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat&maxAge=86400)](http://zounds.readthedocs.io/en/latest/?badge=latest)

# Usage
Zounds is a dataflow library for building directed acyclic graphs that transform audio. It uses the
[featureflow](https://github.com/JohnVinyard/featureflow) library to define the processing pipelines.

# Motivation

For example, here's the definition of a pipeline that computes a sliding short-time fourier transform of some audio,
and then computes spectrograms on the bark and chroma scales.

```python
import featureflow as ff
import zounds

windowing = zounds.HalfLapped()
samplerate = zounds.SR44100()
Zounds is a python library for working with sound. Its primary goals are to:
- layer semantically meaningful audio manipulations on top of numpy arrays
- help to organize the definition and persistence of audio processing
pipelines and machine learning experiments with sound

Audio processing graphs and machine learning pipelines are built using
[featureflow](https://github.com/JohnVinyard/featureflow).

class Settings(ff.PersistenceSettings):
id_provider = ff.UuidProvider()
key_builder = ff.StringDelimitedKeyBuilder()
database = ff.FileSystemDatabase(path='data', key_builder=key_builder)
# A Quick Example

```python
import zounds

class AudioGraph(ff.BaseModel):

meta = ff.JSONFeature(
zounds.MetaData,
encoder=zounds.AudioMetaDataEncoder,
store=True)

raw = ff.ByteStreamFeature(
ff.ByteStream,
chunksize=2 * 44100 * 30 * 2,
needs=meta,
store=False)

ogg = zounds.OggVorbisFeature(
zounds.OggVorbis,
needs=raw,
store=True)
Resampled = zounds.resampled(resample_to=zounds.SR11025())

pcm = zounds.ConstantRateTimeSeriesFeature(
zounds.AudioStream,
needs=raw,
store=False)

resampled = zounds.ConstantRateTimeSeriesFeature(
zounds.Resampler,
needs=pcm,
samplerate=samplerate,
store=False)
@zounds.simple_in_memory_settings
class Sound(Resampled):
"""
A simple pipeline that computes a perceptually weighted modified discrete
cosine transform, and "persists" feature data in an in-memory store.
"""

windowed = zounds.ConstantRateTimeSeriesFeature(
windowed = zounds.ArrayWithUnitsFeature(
zounds.SlidingWindow,
needs=resampled,
needs=Resampled.resampled,
wscheme=zounds.HalfLapped(),
wfunc=zounds.OggVorbisWindowingFunc(),
store=False)
store=True)

fft = zounds.ConstantRateTimeSeriesFeature(
zounds.FFT,
mdct = zounds.ArrayWithUnitsFeature(
zounds.MDCT,
needs=windowed,
store=False)

bark = zounds.ConstantRateTimeSeriesFeature(
zounds.BarkBands,
needs=fft,
store=True)

chroma = zounds.ConstantRateTimeSeriesFeature(
zounds.Chroma,
needs=fft,
store=True)

bfcc = zounds.ConstantRateTimeSeriesFeature(
zounds.BFCC,
needs=fft,
store=True)

weighted = zounds.ArrayWithUnitsFeature(
zounds.FrequencyWeighting,
weighting=zounds.AWeighting(),
needs=mdct,
store=False)

class Document(AudioGraph, Settings):
pass
if __name__ == '__main__':

# produce some audio to test our pipeline
synth = zounds.SineSynthesizer(zounds.SR44100())
samples = synth.synthesize(zounds.Seconds(5), [220., 440., 880.])

# process the audio, and fetch features from our in-memory store
_id = Sound.process(meta=samples.encode())
sound = Sound(_id)

# produce a time slice that starts half a second in, and lasts for two
# seconds
time_slice = zounds.TimeSlice(
start=zounds.Milliseconds(500),
duration=zounds.Seconds(2))
# grab all the frequency information, for a subset of the duration
snippet = sound.weighted[time_slice, :]

# produce a frequency slice that spans 400hz-500hz
freq_band = zounds.FrequencyBand(400, 500)
# grab a subset of frequency information for the duration of the sound
a440 = sound.mdct[:, freq_band]

# produce a new set of coefficients where only the 440hz sine wave is
# present
filtered = sound.mdct.copy()
filtered[:] = 0
filtered[:, freq_band] = a440

# apply a geometric scale, which more closely matches human pitch
# perception, and apply it to the linear frequency axis
scale = zounds.GeometricScale(50, 4000, 0.05, 100)
bands = [sound.weighted[:, band] for band in scale]
band_sizes = [band.shape[1] for band in bands]

# reconstruct audio from the MDCT coefficients
mdct_synth = zounds.MDCTSynthesizer()
reconstructed = mdct_synth.synthesize(sound.mdct)
filtered_reconstruction = mdct_synth.synthesize(filtered)

# start an in-browser REPL that will allow you to listen to and visualize
# the variables defined above (and any new ones you create in the session)
app = zounds.ZoundsApp(
model=Sound,
audio_feature=Sound.ogg,
visualization_feature=Sound.weighted,
globals=globals(),
locals=locals())
app.start(8888)
```

Data can be processed, and later retrieved as follows:

```python
>>> _id = doc = Document.process(meta='https://example.com/audio.wav')
>>> doc = Document(_id)
>>> doc.chroma.shape
(321, 12)
```
Find more inspiration in the [examples folder](https://github.com/JohnVinyard/zounds/tree/master/examples),
or on the [blog](http://johnvinyard.github.io/).

# Installation

Expand Down
79 changes: 79 additions & 0 deletions examples/demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import zounds

Resampled = zounds.resampled(resample_to=zounds.SR11025())


@zounds.simple_in_memory_settings
class Sound(Resampled):
"""
A simple pipeline that computes a perceptually weighted modified discrete
cosine transform, and "persists" feature data in an in-memory store.
"""

windowed = zounds.ArrayWithUnitsFeature(
zounds.SlidingWindow,
needs=Resampled.resampled,
wscheme=zounds.HalfLapped(),
wfunc=zounds.OggVorbisWindowingFunc(),
store=True)

mdct = zounds.ArrayWithUnitsFeature(
zounds.MDCT,
needs=windowed,
store=False)

weighted = zounds.ArrayWithUnitsFeature(
zounds.FrequencyWeighting,
weighting=zounds.AWeighting(),
needs=mdct,
store=False)

if __name__ == '__main__':

# produce some audio to test our pipeline
synth = zounds.SineSynthesizer(zounds.SR44100())
samples = synth.synthesize(zounds.Seconds(5), [220., 440., 880.])

# process the audio, and fetch features from our in-memory store
_id = Sound.process(meta=samples.encode())
sound = Sound(_id)

# produce a time slice that starts half a second in, and lasts for two
# seconds
time_slice = zounds.TimeSlice(
start=zounds.Milliseconds(500),
duration=zounds.Seconds(2))
# grab all the frequency information, for a subset of the duration
snippet = sound.weighted[time_slice, :]

# produce a frequency slice that spans 400hz-500hz
freq_band = zounds.FrequencyBand(400, 500)
# grab a subset of frequency information for the duration of the sound
a440 = sound.mdct[:, freq_band]

# produce a new set of coefficients where only the 440hz sine wave is
# present
filtered = sound.mdct.copy()
filtered[:] = 0
filtered[:, freq_band] = a440

# apply a geometric scale, which more closely matches human pitch
# perception, and apply it to the linear frequency axis
scale = zounds.GeometricScale(50, 4000, 0.05, 100)
bands = [sound.weighted[:, band] for band in scale]
band_sizes = [band.shape[1] for band in bands]

# reconstruct audio from the MDCT coefficients
mdct_synth = zounds.MDCTSynthesizer()
reconstructed = mdct_synth.synthesize(sound.mdct)
filtered_reconstruction = mdct_synth.synthesize(filtered)

# start an in-browser REPL that will allow you to listen to and visualize
# the variables defined above (and any new ones you create in the session)
app = zounds.ZoundsApp(
model=Sound,
audio_feature=Sound.ogg,
visualization_feature=Sound.weighted,
globals=globals(),
locals=locals())
app.start(8888)

0 comments on commit 0f1a869

Please sign in to comment.