Skip to content

Commit

Permalink
Use the raw version of musicnet, and lose the intervaltree dependency…
Browse files Browse the repository at this point in the history
… required by the numpy format
  • Loading branch information
JohnVinyard committed Nov 28, 2018
1 parent 1d81c44 commit 7a7d4d0
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 21 deletions.
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ requests
tornado==4.5.3
pysoundfile
matplotlib==1.5.0
intervaltree
numpy==1.15.3
scipy==1.1.0
torch==0.4.0
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
'pysoundfile',
'matplotlib==1.5.0',
'argparse',
'intervaltree',
'ujson',
'numpy==1.15.3',
'scipy==1.1.0',
Expand Down
38 changes: 19 additions & 19 deletions zounds/datasets/musicnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,26 @@
from util import ensure_local_file
from zounds.soundfile import AudioMetaData
from zounds.timeseries import SR44100, AudioSamples
import os


class MusicNet(object):
"""
Provides access to the audio and high-level metadata from MusicNet. More
info can be found here:
https://homes.cs.washington.edu/~thickstn/musicnet.html
This assumes you've downloaded and extracted the files from
https://homes.cs.washington.edu/~thickstn/media/musicnet.tar.gz to path
"""
def __init__(self, path):
super(MusicNet, self).__init__()
self.path = path
self._url = \
'https://homes.cs.washington.edu/~thickstn/media/musicnet.npz'
self._metadata = \
'https://homes.cs.washington.edu/~thickstn/media/musicnet_metadata.csv'
self._samplerate = SR44100()

def __iter__(self):
local_data = ensure_local_file(self._url, self.path)
local_metadata = ensure_local_file(self._metadata, self.path)

metadata = dict()
Expand All @@ -33,19 +34,18 @@ def __iter__(self):
for row in reader:
metadata[row['id']] = row

with open(local_data, 'rb') as f:
data = np.load(f)
for k, v in data.iteritems():
_id = k
samples, labels = v
samples = AudioSamples(samples, self._samplerate)
meta = metadata[_id]
url = \
'https://homes.cs.washington.edu/~thickstn/media/{_id}'\
.format(**locals())
meta['web_url'] = \
'https://homes.cs.washington.edu/~thickstn/musicnet.html'
yield AudioMetaData(
uri=PreDownload(samples.encode().read(), url),
samplerate=int(SR44100()),
**meta)
train_audio_path = os.path.join(self.path, 'train_data')

for filename in os.listdir(train_audio_path):
full_path = os.path.join(train_audio_path, filename)
_id, ext = os.path.splitext(filename)
url = \
'https://homes.cs.washington.edu/~thickstn/media/{_id}'\
.format(**locals())
meta = metadata[_id]
samples = AudioSamples.from_file(full_path)
uri = PreDownload(samples.encode().read(), url)
yield AudioMetaData(
uri=uri,
samplerate=int(self._samplerate),
**meta)

0 comments on commit 7a7d4d0

Please sign in to comment.