From 27cfc7766c2b637a8474934b41ddcd8d95864651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Thu, 10 Dec 2015 15:03:29 +0100 Subject: [PATCH 01/18] add tests for features.beats_hmm module --- madmom/features/beats_hmm.pyx | 4 +- tests/test_features_beats_hmm.py | 206 +++++++++++++++++++++++++++++++ tests/test_ml_hmm.py | 57 ++++++++- 3 files changed, 262 insertions(+), 5 deletions(-) create mode 100644 tests/test_features_beats_hmm.py diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index 70af97994..4fab45039 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -67,6 +67,8 @@ class BeatTrackingStateSpace(object): # quantize to integer tempo states states = np.unique(np.round(states)) num_log_states += 1 + # beat_states is the number of states each tempo has + # TODO: refactor this or find a better name self.beat_states = np.ascontiguousarray(states, dtype=np.uint32) # compute the position and tempo mapping self.position_mapping, self.tempo_mapping = self.compute_mapping() @@ -74,7 +76,7 @@ class BeatTrackingStateSpace(object): @property def num_states(self): """Number of states.""" - return np.sum(self.beat_states) + return int(np.sum(self.beat_states)) @property def num_tempo_states(self): diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py new file mode 100644 index 000000000..c24e9a35d --- /dev/null +++ b/tests/test_features_beats_hmm.py @@ -0,0 +1,206 @@ +# encoding: utf-8 +# pylint: skip-file +""" +This file contains tests for the madmom.ml.hmm module. + +""" + +from __future__ import absolute_import, division, print_function + +import unittest +from madmom.ml.hmm import * +from madmom.features.beats_hmm import * + + +class TestBeatTrackingStateSpaceClass(unittest.TestCase): + + def setUp(self): + self.btss = BeatTrackingStateSpace(1, 4) + + def test_types(self): + self.assertIsInstance(self.btss.beat_states, np.ndarray) + self.assertIsInstance(self.btss.position_mapping, np.ndarray) + self.assertIsInstance(self.btss.tempo_mapping, np.ndarray) + self.assertIsInstance(self.btss.num_states, int) + self.assertIsInstance(self.btss.num_tempo_states, int) + self.assertIsInstance(self.btss.first_beat_positions, np.ndarray) + self.assertIsInstance(self.btss.last_beat_positions, np.ndarray) + + def test_values(self): + print(self.btss.beat_states) + self.assertTrue(np.allclose(self.btss.beat_states, + [1, 2, 3, 4])) + self.assertTrue(np.allclose(self.btss.position_mapping, + [0, 0, 0.5, 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75])) + self.assertTrue(np.allclose(self.btss.tempo_mapping, + [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + self.assertTrue(np.allclose(self.btss.first_beat_positions, + [0, 1, 3, 6])) + self.assertTrue(np.allclose(self.btss.last_beat_positions, + [0, 2, 5, 9])) + self.assertTrue(self.btss.num_states == 10) + self.assertTrue(self.btss.num_tempo_states == 4) + self.assertTrue(np.allclose(self.btss.position(np.arange(10)), + [0, 0, 0.5, 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75])) + self.assertTrue(np.allclose(self.btss.tempo(np.arange(10)), + [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + + +class TestBeatTrackingTransitionModelClass(unittest.TestCase): + + def setUp(self): + btss = BeatTrackingStateSpace(1, 4) + self.tm = BeatTrackingTransitionModel(btss, 100) + + def test_types(self): + self.assertIsInstance(self.tm, BeatTrackingTransitionModel) + self.assertIsInstance(self.tm, TransitionModel) + self.assertIsInstance(self.tm.state_space, BeatTrackingStateSpace) + self.assertIsInstance(self.tm.transition_lambda, np.ndarray) + self.assertIsInstance(self.tm.states, np.ndarray) + self.assertIsInstance(self.tm.pointers, np.ndarray) + self.assertIsInstance(self.tm.probabilities, np.ndarray) + self.assertIsInstance(self.tm.log_probabilities, np.ndarray) + self.assertIsInstance(self.tm.num_states, int) + self.assertIsInstance(self.tm.num_transitions, int) + self.assertTrue(self.tm.states.dtype == np.uint32) + self.assertTrue(self.tm.pointers.dtype == np.uint32) + self.assertTrue(self.tm.probabilities.dtype == np.float) + self.assertTrue(self.tm.log_probabilities.dtype == np.float) + + def test_values(self): + self.assertTrue(np.allclose(self.tm.states, + [0, 2, 5, 1, 5, 9, 3, 4, 5, 9, 6, 7, 8])) + self.assertTrue(np.allclose(self.tm.pointers, + [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13])) + self.assertTrue(np.allclose(self.tm.probabilities, + [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1])) + self.assertTrue(np.allclose(self.tm.log_probabilities, + [0, 0, -33.3333333, 0, 0, -25, 0, 0, + -33.3333333, 0, 0, 0, 0])) + self.assertTrue(self.tm.num_states == 10) + self.assertTrue(self.tm.num_transitions == 13) + + +class TestBeatTrackingObservationModelClass(unittest.TestCase): + + def setUp(self): + btss = BeatTrackingStateSpace(1, 4) + self.om = BeatTrackingObservationModel(btss, 4) + self.obs = np.asarray([1, 0.1, 0.01, 0], dtype=np.float32) + + def test_types(self): + self.assertIsInstance(self.om.pointers, np.ndarray) + self.assertIsInstance(self.om.densities(self.obs), np.ndarray) + self.assertIsInstance(self.om.log_densities(self.obs), np.ndarray) + self.assertTrue(self.om.pointers.dtype == np.uint32) + self.assertTrue(self.om.densities(self.obs).dtype == np.float) + self.assertTrue(self.om.log_densities(self.obs).dtype == np.float) + + def test_values(self): + self.assertTrue(np.allclose(self.om.pointers, + [0, 0, 1, 0, 1, 1, 0, 1, 1, 1])) + self.assertTrue(np.allclose(self.om.densities(self.obs), + [[1, 0], [0.1, 0.3], + [0.01, 0.33], [0, 1. / 3]])) + self.assertTrue(np.allclose(self.om.log_densities(self.obs), + [[0, -np.inf], [-2.30258508, -1.20397281], + [-4.60517021, -1.10866262], + [-np.inf, -1.09861229]])) + + +class TestPatternTrackingStateSpaceClass(unittest.TestCase): + + def setUp(self): + self.ptss = PatternTrackingStateSpace([1, 2], [4, 6]) + + def test_types(self): + self.assertIsInstance(self.ptss.beat_states, list) + self.assertIsInstance(self.ptss.position_mapping, np.ndarray) + self.assertIsInstance(self.ptss.tempo_mapping, np.ndarray) + self.assertIsInstance(self.ptss.num_states, int) + self.assertIsInstance(self.ptss.num_tempo_states, list) + self.assertIsInstance(self.ptss.num_patterns, int) + + def test_values(self): + self.assertTrue(np.allclose(self.ptss.beat_states[0], [1, 2, 3, 4])) + self.assertTrue(np.allclose(self.ptss.beat_states[1], [2, 3, 4, 5, 6])) + self.assertTrue(self.ptss.num_states == 30) + self.assertTrue(self.ptss.num_tempo_states == [4, 5]) + self.assertTrue(self.ptss.num_patterns == 2) + # first pattern + self.assertTrue(np.allclose(self.ptss.position(np.arange(10)), + [0, 0, 0.5, 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75])) + self.assertTrue(np.allclose(self.ptss.tempo(np.arange(10)), + [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + self.assertTrue(np.allclose(self.ptss.pattern(np.arange(10)), + np.zeros(10))) + # second pattern + self.assertTrue(np.allclose(self.ptss.position(np.arange(10, 30)), + [0, 0.5, + 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75, + 0, 0.2, 0.4, 0.6, 0.8, + 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) + self.assertTrue(np.allclose(self.ptss.tempo(np.arange(10, 30)), + [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4])) + self.assertTrue(np.allclose(self.ptss.pattern(np.arange(10, 30)), + np.ones(20))) + + +class TestPatternTrackingTransitionModelClass(unittest.TestCase): + + def setUp(self): + ptss = PatternTrackingStateSpace([1, 2], [4, 6]) + self.tm = PatternTrackingTransitionModel(ptss, 100) + + def test_types(self): + self.assertIsInstance(self.tm, PatternTrackingTransitionModel) + self.assertIsInstance(self.tm, TransitionModel) + # self.assertIsInstance(self.tm.state_space, PatternTrackingStateSpace) + self.assertIsInstance(self.tm.transition_lambda, list) + self.assertIsInstance(self.tm.states, np.ndarray) + self.assertIsInstance(self.tm.pointers, np.ndarray) + self.assertIsInstance(self.tm.probabilities, np.ndarray) + self.assertIsInstance(self.tm.log_probabilities, np.ndarray) + self.assertIsInstance(self.tm.num_states, int) + self.assertIsInstance(self.tm.num_transitions, int) + self.assertTrue(self.tm.states.dtype == np.uint32) + self.assertTrue(self.tm.pointers.dtype == np.uint32) + self.assertTrue(self.tm.probabilities.dtype == np.float) + self.assertTrue(self.tm.log_probabilities.dtype == np.float) + + def test_values(self): + print(self.tm.probabilities) + print(self.tm.log_probabilities) + # the first pattern has 13 transitions + self.assertTrue(np.allclose(self.tm.states[:13], + [0, 2, 5, 1, 5, 9, 3, 4, 5, 9, 6, 7, 8])) + self.assertTrue(np.allclose(self.tm.states[13:], + [11, 14, 10, 14, 18, 12, 13, 14, 18, 23, + 29, 15, 16, 17, 18, 23, 29, 19, 20, 21, + 22, 23, 29, 24, 25, 26, 27, 28])) + # the first pattern has 10 states (pointers has one more element) + self.assertTrue(np.allclose(self.tm.pointers[:11], + [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13])) + self.assertTrue(np.allclose(self.tm.pointers[11:], + [15, 16, 18, 19, 20, 24, 25, 26, 27, 30, + 31, 32, 33, 34, 36, 37, 38, 39, 40, 41])) + self.assertTrue(np.allclose(self.tm.probabilities, + [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 2.06e-09, 0, 1, 1, + 1, 0, 1, 5.78e-08, 1, 1, 1, 1, 2.06e-09, + 1, 1, 1, 1, 1, 1])) + self.assertTrue(np.allclose(self.tm.log_probabilities, + [0, 0, -33.3333333, 0, 0, -25, 0, 0, + -33.3333333, 0, 0, 0, 0, 0, + -33.3333333, 0, 0, -25, 0, 0, + -33.3333333, 0, -20, -33.3333334, 0, 0, + 0, -25, -4.1e-09, -16.666666, 0, 0, 0, + 0, -20, -5.78e-08, 0, 0, 0, 0, 0])) + self.assertTrue(self.tm.num_states == 30) + self.assertTrue(self.tm.num_transitions == 41) diff --git a/tests/test_ml_hmm.py b/tests/test_ml_hmm.py index fb12992cc..d046cf426 100644 --- a/tests/test_ml_hmm.py +++ b/tests/test_ml_hmm.py @@ -55,23 +55,72 @@ [0.58171672, 0.30436365, 0.11391962]]) -class TestHmmInference(unittest.TestCase): +class TestTransitionModelClass(unittest.TestCase): def setUp(self): frm, to, prob = list(zip(*TRANSITIONS)) + self.tm = TransitionModel.from_dense(to, frm, prob) + + def test_types(self): + self.assertIsInstance(self.tm.states, np.ndarray) + self.assertIsInstance(self.tm.pointers, np.ndarray) + self.assertIsInstance(self.tm.probabilities, np.ndarray) + self.assertIsInstance(self.tm.log_probabilities, np.ndarray) + self.assertIsInstance(self.tm.num_states, int) + self.assertIsInstance(self.tm.num_transitions, int) + self.assertTrue(self.tm.states.dtype == np.uint32) + self.assertTrue(self.tm.pointers.dtype == np.uint32) + self.assertTrue(self.tm.probabilities.dtype == np.float) + self.assertTrue(self.tm.log_probabilities.dtype == np.float) + + def test_values(self): + self.assertTrue(np.allclose(self.tm.states, [0, 1, 0, 1, 2, 1, 2])) + self.assertTrue(np.allclose(self.tm.pointers, [0, 2, 5, 7])) + self.assertTrue(np.allclose(self.tm.probabilities, + [0.7, 0.1, 0.3, 0.6, 0.3, 0.3, 0.7])) + log_prob = [-0.35667494, -2.30258509, -1.2039728, -0.51082562, + -1.2039728, -1.2039728, -0.35667494] + self.assertTrue(np.allclose(self.tm.log_probabilities, log_prob)) + self.assertTrue(self.tm.num_states == 3) + self.assertTrue(self.tm.num_transitions == 7) + + +class TestDiscreteObservationModelClass(unittest.TestCase): + def setUp(self): + self.om = DiscreteObservationModel(OBS_PROB) + + def test_types(self): + self.assertIsInstance(self.om.pointers, np.ndarray) + self.assertIsInstance(self.om.densities(OBS_SEQ), np.ndarray) + self.assertIsInstance(self.om.log_densities(OBS_SEQ), np.ndarray) + self.assertTrue(self.om.pointers.dtype == np.uint32) + self.assertTrue(self.om.densities(OBS_SEQ).dtype == np.float) + self.assertTrue(self.om.log_densities(OBS_SEQ).dtype == np.float) + + def test_values(self): + self.assertTrue(np.allclose(self.om.pointers, [0, 1, 2])) + self.assertTrue(np.allclose(self.om.observation_probabilities, + OBS_PROB)) + self.assertTrue(np.allclose(self.om.densities(OBS_SEQ), + OBS_PROB[:, OBS_SEQ].T)) + self.assertTrue(np.allclose(self.om.log_densities(OBS_SEQ), + np.log(OBS_PROB[:, OBS_SEQ].T))) + + +class TestHiddenMarkovModelClass(unittest.TestCase): + + def setUp(self): + frm, to, prob = list(zip(*TRANSITIONS)) tm = TransitionModel.from_dense(to, frm, prob) om = DiscreteObservationModel(OBS_PROB) - self.hmm = HiddenMarkovModel(tm, om, PRIOR) def test_viterbi(self): correct_state_seq = np.array([0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) correct_log_p = -35.2104311327 - state_seq, log_p = self.hmm.viterbi(OBS_SEQ) - self.assertTrue((state_seq == correct_state_seq).all()) self.assertAlmostEqual(log_p, correct_log_p) From 46e4a74b93fbe30743c9e9a4c102814975b1b018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 11 Dec 2015 16:12:35 +0100 Subject: [PATCH 02/18] Renamed the state spaces, transition and observation models. Also renamed some attributes of the classes to better reflect what they actually are, e.g. tempi to intervals. --- madmom/features/beats.py | 10 +- madmom/features/beats_hmm.pyx | 312 +++++++++++++++---------------- madmom/features/tempo.py | 13 +- tests/test_features_beats_hmm.py | 72 +++---- 4 files changed, 200 insertions(+), 207 deletions(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index de3082f8e..21c669ba3 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -677,9 +677,9 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, # pylint: disable=no-name-in-module from madmom.ml.hmm import HiddenMarkovModel as Hmm - from .beats_hmm import (BeatTrackingStateSpace as St, - BeatTrackingTransitionModel as Tm, - BeatTrackingObservationModel as Om) + from .beats_hmm import (BeatStateSpace as St, + BeatTransitionModel as Tm, + RNNBeatTrackingObservationModel as Om) # convert timing information to construct state space min_interval = 60. * fps / max_bpm @@ -912,8 +912,8 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, import pickle from madmom.ml.hmm import HiddenMarkovModel as Hmm - from .beats_hmm import (PatternTrackingStateSpace as St, - PatternTrackingTransitionModel as Tm, + from .beats_hmm import (MultiPatternStateSpace as St, + MultiPatternTransitionModel as Tm, GMMPatternTrackingObservationModel as Om) # expand num_tempo_states and transition_lambda to lists if needed diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index 4fab45039..c16e1d858 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -27,19 +27,20 @@ from libc.math cimport log, exp from madmom.ml.hmm import TransitionModel, ObservationModel -class BeatTrackingStateSpace(object): +# state spaces +class BeatStateSpace(object): """ State space for beat tracking with a HMM. Parameters ---------- min_interval : float - Minimum tempo (i.e. inter beat interval) to model. + Minimum interval to model. max_interval : float - Maximum tempo (i.e. inter beat interval) to model. - num_tempo_states : int, optional - Number of tempo states; if set, limit the number of states and use a - log spacing, otherwise use a linear spacing. + Maximum interval to model. + num_intervals : int, optional + Number of intervals to model; if set, limit the number of intervals + and use a log spacing instead of the default linear spacing. References ---------- @@ -50,142 +51,137 @@ class BeatTrackingStateSpace(object): """ - def __init__(self, min_interval, max_interval, num_tempo_states=None): - # use a linear spacing as default - states = np.arange(np.round(min_interval), np.round(max_interval) + 1) - # if num_tempo_states is given (and smaller than the number of states - # of the linear spacing) use a log spacing and limit the number of - # states to the given value - if num_tempo_states is not None and num_tempo_states < len(states): - # we must approach num_tempo_states iteratively - num_log_states = num_tempo_states - states = [] - while len(states) < num_tempo_states: - states = np.logspace(np.log2(min_interval), + def __init__(self, min_interval, max_interval, num_intervals=None): + # per default, use a linear spacing of the tempi + intervals = np.arange(np.round(min_interval), + np.round(max_interval) + 1) + # if num_intervals is given (and smaller than the length of the linear + # spacing of the intervals) use a log spacing and limit the number of + # intervals to the given value + if num_intervals is not None and num_intervals < len(intervals): + # we must approach intervals iteratively + num_log_tempi = num_intervals + intervals = [] + while len(intervals) < num_intervals: + intervals = np.logspace(np.log2(min_interval), np.log2(max_interval), - num_log_states, base=2) + num_log_tempi, base=2) # quantize to integer tempo states - states = np.unique(np.round(states)) - num_log_states += 1 - # beat_states is the number of states each tempo has - # TODO: refactor this or find a better name - self.beat_states = np.ascontiguousarray(states, dtype=np.uint32) - # compute the position and tempo mapping - self.position_mapping, self.tempo_mapping = self.compute_mapping() + intervals = np.unique(np.round(intervals)) + num_log_tempi += 1 + # intervals to model + self.intervals = np.ascontiguousarray(intervals, dtype=np.uint32) + # compute the position and interval mapping + self.position_mapping, self.interval_mapping = self.compute_mapping() @property def num_states(self): """Number of states.""" - return int(np.sum(self.beat_states)) + return int(np.sum(self.intervals)) @property - def num_tempo_states(self): - """Number of tempo states.""" - return len(self.beat_states) + def num_intervals(self): + """Number of different intervals.""" + return len(self.intervals) @property - def first_beat_positions(self): - """First state for each tempo.""" - return np.cumsum(np.r_[0, self.beat_states[:-1]]).astype(np.uint32) + def first_states(self): + """First state for each interval.""" + return np.cumsum(np.r_[0, self.intervals[:-1]]).astype(np.uint32) @property - def last_beat_positions(self): - """Last state for each tempo.""" - return np.cumsum(self.beat_states).astype(np.uint32) - 1 + def last_states(self): + """Last state for each interval.""" + return np.cumsum(self.intervals).astype(np.uint32) - 1 @cython.cdivision(True) @cython.boundscheck(False) @cython.wraparound(False) def compute_mapping(self): """ - Compute the mapping from state numbers to position and tempo states. + Compute the mapping from state numbers to position inside the beat + and interval states. Returns ------- position_mapping : numpy array Mapping from state number to position inside beat. - tempo_mapping : numpy array - Mapping from state number to tempo (i.e. inter beat interval). + interval_mapping : numpy array + Mapping from state number to interval. """ # counters etc. - cdef unsigned int tempo_state, first_beat, last_beat - cdef unsigned int num_states = np.sum(self.beat_states) - cdef float pos, num_beat_states - - # mapping arrays from state numbers to tempo / position - cdef unsigned int [::1] tempo = \ + cdef unsigned int interval, first_state, last_state + cdef unsigned int num_states = np.sum(self.intervals) + cdef float pos + # mapping arrays from state numbers to interval / position + cdef unsigned int [::1] interval_mapping = \ np.empty(num_states, dtype=np.uint32) - cdef double [::1] position = \ + cdef double [::1] position_mapping = \ np.empty(num_states, dtype=np.float) # cache variables - cdef unsigned int [::1] beat_states = \ - self.beat_states - cdef unsigned int [::1] first_beat_positions = \ - self.first_beat_positions - cdef unsigned int [::1] last_beat_positions = \ - self.last_beat_positions - # loop over all tempi - for tempo_state in range(self.num_tempo_states): - # first and last beat (exclusive) for tempo - first_beat = first_beat_positions[tempo_state] - last_beat = last_beat_positions[tempo_state] - # number of beats for tempo - num_beat_states = float(beat_states[tempo_state]) + cdef unsigned int [::1] intervals = self.intervals + cdef unsigned int [::1] first_states = self.first_states + cdef unsigned int [::1] last_states = self.last_states + # loop over all intervals + for interval in range(self.num_intervals): + # first and last state of interval + first_state = first_states[interval] + last_state = last_states[interval] # reset position counter pos = 0 - for state in range(first_beat, last_beat + 1): - # tempo state mapping - tempo[state] = tempo_state + for state in range(first_state, last_state + 1): + # interval state mapping + interval_mapping[state] = interval # position inside beat mapping - position[state] = pos / num_beat_states + position_mapping[state] = pos / float(intervals[interval]) pos += 1 # return the mappings - return np.asarray(position), np.asarray(tempo) + return np.asarray(position_mapping), np.asarray(interval_mapping) def position(self, state): """ - Position (inside one beat) for a given state sequence. + Position (inside one beat) of the given states. Parameters ---------- state : numpy array - State (sequence). + States. Returns ------- numpy array - Corresponding beat state sequence. + Corresponding position. """ return self.position_mapping[state] - def tempo(self, state): + def interval(self, state): """ - Tempo (i.e. inter beat interval) for a given state sequence. + Intervals of the given states. Parameters ---------- state : numpy array - State (sequence). + States. Returns ------- numpy array - Corresponding tempo state sequence. + Corresponding intervals. """ - return self.tempo_mapping[state] + return self.interval_mapping[state] -class BeatTrackingTransitionModel(TransitionModel): +class BeatTransitionModel(TransitionModel): """ Transition model for beat tracking with a HMM. Parameters ---------- - state_space : :class:`BeatTrackingStateSpace` instance - BeatTrackingStateSpace instance. + state_space : :class:`BeatStateSpace` instance + BeatStateSpace instance. transition_lambda : float Lambda for the exponential tempo change distribution (higher values prefer a constant tempo from one beat to the next one). @@ -206,7 +202,7 @@ class BeatTrackingTransitionModel(TransitionModel): # compute the transitions transitions = self.make_sparse(*self.compute_transitions()) # instantiate a TransitionModel with the transitions - super(BeatTrackingTransitionModel, self).__init__(*transitions) + super(BeatTransitionModel, self).__init__(*transitions) @cython.cdivision(True) @cython.boundscheck(False) @@ -228,13 +224,16 @@ class BeatTrackingTransitionModel(TransitionModel): """ # cache variables - cdef unsigned int [::1] beat_states = self.state_space.beat_states + # Note: convert all intervals to float here + cdef float [::1] intervals = \ + self.state_space.intervals.astype(np.float32) + # cdef unsigned int [::1] tempo_states = self.state_space.tempo_states cdef double transition_lambda = self.transition_lambda # number of tempo & total states - cdef unsigned int num_tempo_states = self.state_space.num_tempo_states + cdef unsigned int num_intervals = self.state_space.num_intervals cdef unsigned int num_states = self.state_space.num_states # counters etc. - cdef unsigned int state, prev_state, old_tempo, new_tempo + cdef unsigned int state, prev_state, old_interval, new_interval cdef double ratio, u, prob, prob_sum cdef double threshold = np.spacing(1) @@ -244,29 +243,29 @@ class BeatTrackingTransitionModel(TransitionModel): # tempo changes can only occur at the beginning of a beat # transition matrix for the tempo changes - cdef double [:, ::1] trans_prob = np.zeros((num_tempo_states, - num_tempo_states), + cdef double [:, ::1] trans_prob = np.zeros((num_intervals, + num_intervals), dtype=np.float) # iterate over all tempo states - for old_tempo in range(num_tempo_states): + for old_interval in range(num_intervals): # reset probability sum prob_sum = 0 # compute transition probabilities to all other tempo states - for new_tempo in range(num_tempo_states): + for new_interval in range(num_intervals): # compute the ratio of the two tempi - ratio = beat_states[new_tempo] / float(beat_states[old_tempo]) + ratio = intervals[new_interval] / intervals[old_interval] # compute the probability for the tempo change following an # exponential distribution prob = exp(-transition_lambda * abs(ratio - 1)) # keep only transition probabilities > threshold if prob > threshold: # save the probability - trans_prob[old_tempo, new_tempo] = prob + trans_prob[old_interval, new_interval] = prob # collect normalization data prob_sum += prob # normalize the tempo transitions to other tempi - for new_tempo in range(num_tempo_states): - trans_prob[old_tempo, new_tempo] /= prob_sum + for new_interval in range(num_intervals): + trans_prob[old_interval, new_interval] /= prob_sum # number of tempo transitions (= non-zero probabilities) cdef unsigned int num_tempo_transitions = \ @@ -277,7 +276,7 @@ class BeatTrackingTransitionModel(TransitionModel): # plus the number of tempo transitions minus the number of tempo states # since these transitions are already included in the tempo transitions cdef int num_transitions = num_states + num_tempo_transitions - \ - num_tempo_states + num_intervals # arrays for transition matrix creation cdef unsigned int [::1] states = \ np.empty(num_transitions, dtype=np.uint32) @@ -290,26 +289,26 @@ class BeatTrackingTransitionModel(TransitionModel): # cache first and last positions cdef unsigned int [::1] first_beat_positions = \ - self.state_space.first_beat_positions - cdef unsigned int [::1] last_beat_positions =\ - self.state_space.last_beat_positions + self.state_space.first_states + cdef unsigned int [::1] last_beat_positions = \ + self.state_space.last_states # state counter cdef int i = 0 # loop over all tempi - for new_tempo in range(num_tempo_states): + for new_interval in range(num_intervals): # generate all transitions from other tempi - for old_tempo in range(num_tempo_states): + for old_interval in range(num_intervals): # but only if it is a probable transition - if trans_prob[old_tempo, new_tempo] != 0: + if trans_prob[old_interval, new_interval] != 0: # generate a transition - prev_states[i] = last_beat_positions[old_tempo] - states[i] = first_beat_positions[new_tempo] - probabilities[i] = trans_prob[old_tempo, new_tempo] + prev_states[i] = last_beat_positions[old_interval] + states[i] = first_beat_positions[new_interval] + probabilities[i] = trans_prob[old_interval, new_interval] # increase counter i += 1 # transitions within the same tempo - for prev_state in range(first_beat_positions[new_tempo], - last_beat_positions[new_tempo]): + for prev_state in range(first_beat_positions[new_interval], + last_beat_positions[new_interval]): # generate a transition with probability 1 prev_states[i] = prev_state states[i] = prev_state + 1 @@ -321,14 +320,14 @@ class BeatTrackingTransitionModel(TransitionModel): return states, prev_states, probabilities -class BeatTrackingObservationModel(ObservationModel): +class RNNBeatTrackingObservationModel(ObservationModel): """ Observation model for beat tracking with a HMM. Parameters ---------- - state_space : :class:`BeatTrackingStateSpace` instance - BeatTrackingStateSpace instance. + state_space : :class:`BeatStateSpace` instance + BeatStateSpace instance. observation_lambda : int Split one beat period into `observation_lambda` parts, the first representing beat states and the remaining non-beat states. @@ -358,7 +357,7 @@ class BeatTrackingObservationModel(ObservationModel): dtype=np.int)) < border pointers[beat_idx] = 0 # instantiate a ObservationModel with the pointers - super(BeatTrackingObservationModel, self).__init__(pointers) + super(RNNBeatTrackingObservationModel, self).__init__(pointers) @cython.cdivision(True) @cython.boundscheck(False) @@ -370,7 +369,7 @@ class BeatTrackingObservationModel(ObservationModel): Parameters ---------- observations : numpy array - Observations (i.e. activations of the NN). + Observations (i.e. activations of the RNN). Returns ------- @@ -397,11 +396,11 @@ class BeatTrackingObservationModel(ObservationModel): return np.asarray(log_densities) -class PatternTrackingStateSpace(object): +class MultiPatternStateSpace(object): """ State space for rythmic pattern tracking with a HMM. - A rhythmic pattern is modeled similar to :class:`BeatTrackingStateSpace`, + A rhythmic pattern is modeled similar to :class:`BeatStateSpace`, but models multiple rhythmic patterns instead of a single beat. The pattern's length can span multiple beats (e.g. 3 or 4 beats). @@ -411,13 +410,13 @@ class PatternTrackingStateSpace(object): Minimum intervals (i.e. rhythmic pattern length) to model. max_intervals : list or numpy array Maximum intervals (i.e. rhythmic pattern length) to model. - num_tempo_states : list or numpy array, optional - Corresponding number of tempo states; if set, limit the number of - states and use a log spacing, otherwise use a linear spacing. + num_intervals : list or numpy array, optional + Corresponding number of intervals; if set, limit the number of + intervals and use a log spacing instead of the default linear spacing. See Also -------- - :class:`BeatTrackingStateSpace` + :class:`BeatStateSpace` References ---------- @@ -428,110 +427,104 @@ class PatternTrackingStateSpace(object): """ - def __init__(self, min_intervals, max_intervals, num_tempo_states=None): - if num_tempo_states is None: - num_tempo_states = [None] * len(min_intervals) - # for each pattern, compute a beat state space - state_spaces = [] - enum = enumerate(zip(min_intervals, max_intervals, num_tempo_states)) + def __init__(self, min_intervals, max_intervals, num_intervals=None): + if num_intervals is None: + num_intervals = [None] * len(min_intervals) + # for each pattern, compute a bar state space (i.e. a beat state space + # which spans a complete bar) + bar_state_spaces = [] + enum = enumerate(zip(min_intervals, max_intervals, num_intervals)) for pattern, (min_, max_, num_) in enum: - # create a BeatTrackingStateSpace and append it to the list - state_spaces.append(BeatTrackingStateSpace(min_, max_, num_)) - self.pattern_state_spaces = state_spaces + # create a BeatStateSpace and append it to the list + bar_state_spaces.append(BeatStateSpace(min_, max_, num_)) + self.bar_state_spaces = bar_state_spaces # define mappings self.position_mapping = \ np.hstack([st.position(np.arange(st.num_states, dtype=np.int)) - for st in self.pattern_state_spaces]) - self.tempo_mapping = \ - np.hstack([st.tempo(np.arange(st.num_states, dtype=np.int)) - for st in self.pattern_state_spaces]) + for st in self.bar_state_spaces]) + self.interval_mapping = \ + np.hstack([st.interval(np.arange(st.num_states, dtype=np.int)) + for st in self.bar_state_spaces]) self.pattern_mapping = \ np.hstack([np.repeat(i, st.num_states) - for i, st in enumerate(self.pattern_state_spaces)]) - self.beat_states = [st.beat_states for st in self.pattern_state_spaces] + for i, st in enumerate(self.bar_state_spaces)]) @property def num_states(self): """Number of states.""" - return int(sum([st.num_states for st in self.pattern_state_spaces])) - - @property - def num_tempo_states(self): - """Number of tempo states for each pattern.""" - return [len(t) for t in self.beat_states] + return int(sum([st.num_states for st in self.bar_state_spaces])) @property def num_patterns(self): """Number of rhythmic patterns""" - return len(self.beat_states) + return len(self.bar_state_spaces) def position(self, state): """ - Position (inside one pattern) for a given state sequence. + Position (inside one pattern) for the given states. Parameters ---------- state : numpy array - State (sequence). + States. Returns ------- numpy array - Corresponding beat state sequence. + Corresponding positions. """ return self.position_mapping[state] - def tempo(self, state): + def interval(self, state): """ - Tempo for a given state sequence. + Interval for a given states. Parameters ---------- state : numpy array - State (sequence). + States. Returns ------- numpy array - Corresponding tempo state sequence. + Corresponding intervals. """ - return self.tempo_mapping[state] + return self.interval_mapping[state] def pattern(self, state): """ - Pattern for the given state sequence. + Pattern for the given states. Parameters ---------- state : numpy array - State (sequence). + States. Returns ------- numpy array - Corresponding pattern state sequence. + Corresponding patterns. """ return self.pattern_mapping[state] -class PatternTrackingTransitionModel(TransitionModel): +class MultiPatternTransitionModel(TransitionModel): """ Transition model for pattern tracking with a HMM. - Instead of modelling only a single beat (as - :class:`BeatTrackingTransitionModel`), the - :class:`PatternTrackingTransitionModel` models rhythmic patterns. It - accepts the same arguments as the :class:`BeatTrackingTransitionModel`, - but everything as lists, with the list entries at the same position + Instead of modelling only a single beat (as :class:`BeatTransitionModel`), + the :class:`MultiPatternTransitionModel` models rhythmic patterns. It + accepts the same arguments as the :class:`BeatTransitionModel`, but + everything as lists, with the list entries at the same position corresponding to one rhythmic pattern. Parameters ---------- - state_space : :class:`PatternTrackingTransitionModel` instance - PatternTrackingTransitionModel instance. + state_space : :class:`MultiPatternTransitionModel` instance + MultiPatternTransitionModel instance. transition_lambda : list Lambda(s) for the exponential tempo change distribution of the patterns (higher values prefer a constant tempo from one bar to the next one). @@ -539,7 +532,7 @@ class PatternTrackingTransitionModel(TransitionModel): See Also -------- - :class:`BeatTrackingTransitionModel` + :class:`BeatTransitionModel` Notes ----- @@ -575,14 +568,13 @@ class PatternTrackingTransitionModel(TransitionModel): raise ValueError('number of patterns of the `state_space` and the ' 'length `transition_lambda` must be the same') # save the given arguments - self.beat_states = state_space.beat_states + self.state_space = state_space self.transition_lambda = transition_lambda # compute the transitions for each pattern and stack them - enum = enumerate(zip(state_space.pattern_state_spaces, - transition_lambda)) + enum = enumerate(zip(state_space.bar_state_spaces, transition_lambda)) for pattern, (state_space, transition_lambda) in enum: - # create a BeatTrackingTransitionModel - tm = BeatTrackingTransitionModel(state_space, transition_lambda) + # create a BeatTransitionModel + tm = BeatTransitionModel(state_space, transition_lambda) seq = np.arange(tm.num_states, dtype=np.int) # set/update the probabilities, states and pointers if pattern == 0: @@ -605,7 +597,7 @@ class PatternTrackingTransitionModel(TransitionModel): probabilities = np.hstack((probabilities, tm.probabilities)) # instantiate a TransitionModel with the transition arrays transitions = states, pointers, probabilities - super(PatternTrackingTransitionModel, self).__init__(*transitions) + super(MultiPatternTransitionModel, self).__init__(*transitions) class GMMPatternTrackingObservationModel(ObservationModel): @@ -616,8 +608,8 @@ class GMMPatternTrackingObservationModel(ObservationModel): ---------- gmms : list Fitted GMM(s), one entry per rhythmic pattern. - transition_model : :class:`PatternTrackingTransitionModel` instance - PatternTrackingTransitionModel instance. + transition_model : :class:`MultiPatternTransitionModel` instance + MultiPatternTransitionModel instance. norm_observations : bool, optional Normalize the observations. diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index c97302683..a1617d5a6 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -353,16 +353,15 @@ def interval_histogram(self, activations): num_tempo_states=None, fps=self.fps) # get the best state path by calling the viterbi algorithm path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) - intervals = dbn.st.tempo(path) + intervals = dbn.st.interval(path) # add the minimum interval of the beat state space - intervals += dbn.st.beat_states.min() + intervals += dbn.st.intervals.min() # get the counts of the bins - bins = np.bincount(intervals, - minlength=dbn.st.beat_states.max() + 1) + bins = np.bincount(intervals, minlength=dbn.st.intervals.max() + 1) # truncate everything below the minimum interval of the state space - bins = bins[dbn.st.beat_states.min():] - # build a histogram together with the beat states and return it - return bins, dbn.st.beat_states + bins = bins[dbn.st.intervals.min():] + # build a histogram together with the intervals and return it + return bins, dbn.st.intervals else: raise ValueError('tempo estimation method unknown') diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index c24e9a35d..108b9432f 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -12,52 +12,52 @@ from madmom.features.beats_hmm import * -class TestBeatTrackingStateSpaceClass(unittest.TestCase): +class TestBeatStateSpaceClass(unittest.TestCase): def setUp(self): - self.btss = BeatTrackingStateSpace(1, 4) + self.btss = BeatStateSpace(1, 4) def test_types(self): - self.assertIsInstance(self.btss.beat_states, np.ndarray) + self.assertIsInstance(self.btss.intervals, np.ndarray) self.assertIsInstance(self.btss.position_mapping, np.ndarray) - self.assertIsInstance(self.btss.tempo_mapping, np.ndarray) + self.assertIsInstance(self.btss.interval_mapping, np.ndarray) self.assertIsInstance(self.btss.num_states, int) - self.assertIsInstance(self.btss.num_tempo_states, int) - self.assertIsInstance(self.btss.first_beat_positions, np.ndarray) - self.assertIsInstance(self.btss.last_beat_positions, np.ndarray) + self.assertIsInstance(self.btss.num_intervals, int) + self.assertIsInstance(self.btss.first_states, np.ndarray) + self.assertIsInstance(self.btss.last_states, np.ndarray) def test_values(self): - print(self.btss.beat_states) - self.assertTrue(np.allclose(self.btss.beat_states, + print(self.btss.intervals) + self.assertTrue(np.allclose(self.btss.intervals, [1, 2, 3, 4])) self.assertTrue(np.allclose(self.btss.position_mapping, [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(self.btss.tempo_mapping, + self.assertTrue(np.allclose(self.btss.interval_mapping, [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) - self.assertTrue(np.allclose(self.btss.first_beat_positions, + self.assertTrue(np.allclose(self.btss.first_states, [0, 1, 3, 6])) - self.assertTrue(np.allclose(self.btss.last_beat_positions, + self.assertTrue(np.allclose(self.btss.last_states, [0, 2, 5, 9])) self.assertTrue(self.btss.num_states == 10) - self.assertTrue(self.btss.num_tempo_states == 4) + self.assertTrue(self.btss.num_intervals == 4) self.assertTrue(np.allclose(self.btss.position(np.arange(10)), [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(self.btss.tempo(np.arange(10)), + self.assertTrue(np.allclose(self.btss.interval(np.arange(10)), [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) -class TestBeatTrackingTransitionModelClass(unittest.TestCase): +class TestBeatTransitionModelClass(unittest.TestCase): def setUp(self): - btss = BeatTrackingStateSpace(1, 4) - self.tm = BeatTrackingTransitionModel(btss, 100) + btss = BeatStateSpace(1, 4) + self.tm = BeatTransitionModel(btss, 100) def test_types(self): - self.assertIsInstance(self.tm, BeatTrackingTransitionModel) + self.assertIsInstance(self.tm, BeatTransitionModel) self.assertIsInstance(self.tm, TransitionModel) - self.assertIsInstance(self.tm.state_space, BeatTrackingStateSpace) + self.assertIsInstance(self.tm.state_space, BeatStateSpace) self.assertIsInstance(self.tm.transition_lambda, np.ndarray) self.assertIsInstance(self.tm.states, np.ndarray) self.assertIsInstance(self.tm.pointers, np.ndarray) @@ -84,11 +84,11 @@ def test_values(self): self.assertTrue(self.tm.num_transitions == 13) -class TestBeatTrackingObservationModelClass(unittest.TestCase): +class TestRNNBeatTrackingObservationModelClass(unittest.TestCase): def setUp(self): - btss = BeatTrackingStateSpace(1, 4) - self.om = BeatTrackingObservationModel(btss, 4) + btss = BeatStateSpace(1, 4) + self.om = RNNBeatTrackingObservationModel(btss, 4) self.obs = np.asarray([1, 0.1, 0.01, 0], dtype=np.float32) def test_types(self): @@ -111,30 +111,32 @@ def test_values(self): [-np.inf, -1.09861229]])) -class TestPatternTrackingStateSpaceClass(unittest.TestCase): +class TestMultiPatternStateSpaceClass(unittest.TestCase): def setUp(self): - self.ptss = PatternTrackingStateSpace([1, 2], [4, 6]) + self.ptss = MultiPatternStateSpace([1, 2], [4, 6]) def test_types(self): - self.assertIsInstance(self.ptss.beat_states, list) + self.assertIsInstance(self.ptss.bar_state_spaces, list) self.assertIsInstance(self.ptss.position_mapping, np.ndarray) - self.assertIsInstance(self.ptss.tempo_mapping, np.ndarray) + self.assertIsInstance(self.ptss.interval_mapping, np.ndarray) self.assertIsInstance(self.ptss.num_states, int) - self.assertIsInstance(self.ptss.num_tempo_states, list) + # self.assertIsInstance(self.ptss.num_intervals, list) self.assertIsInstance(self.ptss.num_patterns, int) def test_values(self): - self.assertTrue(np.allclose(self.ptss.beat_states[0], [1, 2, 3, 4])) - self.assertTrue(np.allclose(self.ptss.beat_states[1], [2, 3, 4, 5, 6])) + self.assertTrue(np.allclose(self.ptss.bar_state_spaces[0].intervals, + [1, 2, 3, 4])) + self.assertTrue(np.allclose(self.ptss.bar_state_spaces[1].intervals, + [2, 3, 4, 5, 6])) self.assertTrue(self.ptss.num_states == 30) - self.assertTrue(self.ptss.num_tempo_states == [4, 5]) + # self.assertTrue(self.ptss.num_intervals == [4, 5]) self.assertTrue(self.ptss.num_patterns == 2) # first pattern self.assertTrue(np.allclose(self.ptss.position(np.arange(10)), [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(self.ptss.tempo(np.arange(10)), + self.assertTrue(np.allclose(self.ptss.interval(np.arange(10)), [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) self.assertTrue(np.allclose(self.ptss.pattern(np.arange(10)), np.zeros(10))) @@ -145,7 +147,7 @@ def test_values(self): 0, 0.25, 0.5, 0.75, 0, 0.2, 0.4, 0.6, 0.8, 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) - self.assertTrue(np.allclose(self.ptss.tempo(np.arange(10, 30)), + self.assertTrue(np.allclose(self.ptss.interval(np.arange(10, 30)), [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4])) self.assertTrue(np.allclose(self.ptss.pattern(np.arange(10, 30)), @@ -155,11 +157,11 @@ def test_values(self): class TestPatternTrackingTransitionModelClass(unittest.TestCase): def setUp(self): - ptss = PatternTrackingStateSpace([1, 2], [4, 6]) - self.tm = PatternTrackingTransitionModel(ptss, 100) + ptss = MultiPatternStateSpace([1, 2], [4, 6]) + self.tm = MultiPatternTransitionModel(ptss, 100) def test_types(self): - self.assertIsInstance(self.tm, PatternTrackingTransitionModel) + self.assertIsInstance(self.tm, MultiPatternTransitionModel) self.assertIsInstance(self.tm, TransitionModel) # self.assertIsInstance(self.tm.state_space, PatternTrackingStateSpace) self.assertIsInstance(self.tm.transition_lambda, list) From c9a75e5639ae0b09f7ec25a5bb9858f9d6a5b2a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 11 Dec 2015 16:14:01 +0100 Subject: [PATCH 03/18] Reordering of the classes. --- madmom/features/beats_hmm.pyx | 384 +++++++++++++++++----------------- 1 file changed, 193 insertions(+), 191 deletions(-) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index c16e1d858..9bcf8acdc 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -174,6 +174,122 @@ class BeatStateSpace(object): return self.interval_mapping[state] +class MultiPatternStateSpace(object): + """ + State space for rythmic pattern tracking with a HMM. + + A rhythmic pattern is modeled similar to :class:`BeatStateSpace`, + but models multiple rhythmic patterns instead of a single beat. The + pattern's length can span multiple beats (e.g. 3 or 4 beats). + + Parameters + ---------- + min_intervals : list or numpy array + Minimum intervals (i.e. rhythmic pattern length) to model. + max_intervals : list or numpy array + Maximum intervals (i.e. rhythmic pattern length) to model. + num_intervals : list or numpy array, optional + Corresponding number of intervals; if set, limit the number of + intervals and use a log spacing instead of the default linear spacing. + + See Also + -------- + :class:`BeatStateSpace` + + References + ---------- + .. [1] Florian Krebs, Sebastian Böck and Gerhard Widmer, + "An Efficient State Space Model for Joint Tempo and Meter Tracking", + Proceedings of the 16th International Society for Music Information + Retrieval Conference (ISMIR), 2015. + + """ + + def __init__(self, min_intervals, max_intervals, num_intervals=None): + if num_intervals is None: + num_intervals = [None] * len(min_intervals) + # for each pattern, compute a bar state space (i.e. a beat state space + # which spans a complete bar) + bar_state_spaces = [] + enum = enumerate(zip(min_intervals, max_intervals, num_intervals)) + for pattern, (min_, max_, num_) in enum: + # create a BeatStateSpace and append it to the list + bar_state_spaces.append(BeatStateSpace(min_, max_, num_)) + self.bar_state_spaces = bar_state_spaces + # define mappings + self.position_mapping = \ + np.hstack([st.position(np.arange(st.num_states, dtype=np.int)) + for st in self.bar_state_spaces]) + self.interval_mapping = \ + np.hstack([st.interval(np.arange(st.num_states, dtype=np.int)) + for st in self.bar_state_spaces]) + self.pattern_mapping = \ + np.hstack([np.repeat(i, st.num_states) + for i, st in enumerate(self.bar_state_spaces)]) + + @property + def num_states(self): + """Number of states.""" + return int(sum([st.num_states for st in self.bar_state_spaces])) + + @property + def num_patterns(self): + """Number of rhythmic patterns""" + return len(self.bar_state_spaces) + + def position(self, state): + """ + Position (inside one pattern) for the given states. + + Parameters + ---------- + state : numpy array + States. + + Returns + ------- + numpy array + Corresponding positions. + + """ + return self.position_mapping[state] + + def interval(self, state): + """ + Interval for a given states. + + Parameters + ---------- + state : numpy array + States. + + Returns + ------- + numpy array + Corresponding intervals. + + """ + return self.interval_mapping[state] + + def pattern(self, state): + """ + Pattern for the given states. + + Parameters + ---------- + state : numpy array + States. + + Returns + ------- + numpy array + Corresponding patterns. + + """ + return self.pattern_mapping[state] + + +# transition models class BeatTransitionModel(TransitionModel): """ Transition model for beat tracking with a HMM. @@ -320,197 +436,6 @@ class BeatTransitionModel(TransitionModel): return states, prev_states, probabilities -class RNNBeatTrackingObservationModel(ObservationModel): - """ - Observation model for beat tracking with a HMM. - - Parameters - ---------- - state_space : :class:`BeatStateSpace` instance - BeatStateSpace instance. - observation_lambda : int - Split one beat period into `observation_lambda` parts, the first - representing beat states and the remaining non-beat states. - norm_observations : bool, optional - Normalize the observations. - - References - ---------- - .. [1] Sebastian Böck, Florian Krebs and Gerhard Widmer, - "A Multi-Model Approach to Beat Tracking Considering Heterogeneous - Music Styles", - Proceedings of the 15th International Society for Music Information - Retrieval Conference (ISMIR), 2014. - - """ - - def __init__(self, state_space, observation_lambda, - norm_observations=False): - self.observation_lambda = observation_lambda - self.norm_observations = norm_observations - # compute observation pointers - # always point to the non-beat densities - pointers = np.ones(state_space.num_states, dtype=np.uint32) - # unless they are in the beat range of the state space - border = 1. / observation_lambda - beat_idx = state_space.position(np.arange(state_space.num_states, - dtype=np.int)) < border - pointers[beat_idx] = 0 - # instantiate a ObservationModel with the pointers - super(RNNBeatTrackingObservationModel, self).__init__(pointers) - - @cython.cdivision(True) - @cython.boundscheck(False) - @cython.wraparound(False) - def log_densities(self, float [::1] observations): - """ - Computes the log densities of the observations. - - Parameters - ---------- - observations : numpy array - Observations (i.e. activations of the RNN). - - Returns - ------- - numpy array - Log densities of the observations. - - """ - # init variables - cdef unsigned int i - cdef unsigned int num_observations = len(observations) - cdef float observation_lambda = self.observation_lambda - # norm observations - if self.norm_observations: - observations /= np.max(observations) - # init densities - cdef double [:, ::1] log_densities = np.empty((num_observations, 2), - dtype=np.float) - # define the observation densities - for i in range(num_observations): - log_densities[i, 0] = log(observations[i]) - log_densities[i, 1] = log((1. - observations[i]) / - (observation_lambda - 1)) - # return the densities - return np.asarray(log_densities) - - -class MultiPatternStateSpace(object): - """ - State space for rythmic pattern tracking with a HMM. - - A rhythmic pattern is modeled similar to :class:`BeatStateSpace`, - but models multiple rhythmic patterns instead of a single beat. The - pattern's length can span multiple beats (e.g. 3 or 4 beats). - - Parameters - ---------- - min_intervals : list or numpy array - Minimum intervals (i.e. rhythmic pattern length) to model. - max_intervals : list or numpy array - Maximum intervals (i.e. rhythmic pattern length) to model. - num_intervals : list or numpy array, optional - Corresponding number of intervals; if set, limit the number of - intervals and use a log spacing instead of the default linear spacing. - - See Also - -------- - :class:`BeatStateSpace` - - References - ---------- - .. [1] Florian Krebs, Sebastian Böck and Gerhard Widmer, - "An Efficient State Space Model for Joint Tempo and Meter Tracking", - Proceedings of the 16th International Society for Music Information - Retrieval Conference (ISMIR), 2015. - - """ - - def __init__(self, min_intervals, max_intervals, num_intervals=None): - if num_intervals is None: - num_intervals = [None] * len(min_intervals) - # for each pattern, compute a bar state space (i.e. a beat state space - # which spans a complete bar) - bar_state_spaces = [] - enum = enumerate(zip(min_intervals, max_intervals, num_intervals)) - for pattern, (min_, max_, num_) in enum: - # create a BeatStateSpace and append it to the list - bar_state_spaces.append(BeatStateSpace(min_, max_, num_)) - self.bar_state_spaces = bar_state_spaces - # define mappings - self.position_mapping = \ - np.hstack([st.position(np.arange(st.num_states, dtype=np.int)) - for st in self.bar_state_spaces]) - self.interval_mapping = \ - np.hstack([st.interval(np.arange(st.num_states, dtype=np.int)) - for st in self.bar_state_spaces]) - self.pattern_mapping = \ - np.hstack([np.repeat(i, st.num_states) - for i, st in enumerate(self.bar_state_spaces)]) - - @property - def num_states(self): - """Number of states.""" - return int(sum([st.num_states for st in self.bar_state_spaces])) - - @property - def num_patterns(self): - """Number of rhythmic patterns""" - return len(self.bar_state_spaces) - - def position(self, state): - """ - Position (inside one pattern) for the given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding positions. - - """ - return self.position_mapping[state] - - def interval(self, state): - """ - Interval for a given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding intervals. - - """ - return self.interval_mapping[state] - - def pattern(self, state): - """ - Pattern for the given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding patterns. - - """ - return self.pattern_mapping[state] - - class MultiPatternTransitionModel(TransitionModel): """ Transition model for pattern tracking with a HMM. @@ -600,6 +525,83 @@ class MultiPatternTransitionModel(TransitionModel): super(MultiPatternTransitionModel, self).__init__(*transitions) +# observation models +class RNNBeatTrackingObservationModel(ObservationModel): + """ + Observation model for beat tracking with a HMM. + + Parameters + ---------- + state_space : :class:`BeatStateSpace` instance + BeatStateSpace instance. + observation_lambda : int + Split one beat period into `observation_lambda` parts, the first + representing beat states and the remaining non-beat states. + norm_observations : bool, optional + Normalize the observations. + + References + ---------- + .. [1] Sebastian Böck, Florian Krebs and Gerhard Widmer, + "A Multi-Model Approach to Beat Tracking Considering Heterogeneous + Music Styles", + Proceedings of the 15th International Society for Music Information + Retrieval Conference (ISMIR), 2014. + + """ + + def __init__(self, state_space, observation_lambda, + norm_observations=False): + self.observation_lambda = observation_lambda + self.norm_observations = norm_observations + # compute observation pointers + # always point to the non-beat densities + pointers = np.ones(state_space.num_states, dtype=np.uint32) + # unless they are in the beat range of the state space + border = 1. / observation_lambda + beat_idx = state_space.position(np.arange(state_space.num_states, + dtype=np.int)) < border + pointers[beat_idx] = 0 + # instantiate a ObservationModel with the pointers + super(RNNBeatTrackingObservationModel, self).__init__(pointers) + + @cython.cdivision(True) + @cython.boundscheck(False) + @cython.wraparound(False) + def log_densities(self, float [::1] observations): + """ + Computes the log densities of the observations. + + Parameters + ---------- + observations : numpy array + Observations (i.e. activations of the RNN). + + Returns + ------- + numpy array + Log densities of the observations. + + """ + # init variables + cdef unsigned int i + cdef unsigned int num_observations = len(observations) + cdef float observation_lambda = self.observation_lambda + # norm observations + if self.norm_observations: + observations /= np.max(observations) + # init densities + cdef double [:, ::1] log_densities = np.empty((num_observations, 2), + dtype=np.float) + # define the observation densities + for i in range(num_observations): + log_densities[i, 0] = log(observations[i]) + log_densities[i, 1] = log((1. - observations[i]) / + (observation_lambda - 1)) + # return the densities + return np.asarray(log_densities) + + class GMMPatternTrackingObservationModel(ObservationModel): """ Observation model for GMM based beat tracking with a HMM. From 3db4db325ae9129753274f025d7824cc11cdf12d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Thu, 17 Dec 2015 14:39:58 +0100 Subject: [PATCH 04/18] renamed num_tempo_states to num_tempi; redefined all mappings to simple arrays --- madmom/features/beats.py | 100 +++++++++---------- madmom/features/beats_hmm.pyx | 165 ++++--------------------------- madmom/features/tempo.py | 4 +- tests/test_features_beats_hmm.py | 76 +++++++------- 4 files changed, 111 insertions(+), 234 deletions(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index 21c669ba3..c4864b473 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -624,8 +624,8 @@ class DBNBeatTrackingProcessor(Processor): Minimum tempo used for beat tracking [bpm]. max_bpm : float, optional Maximum tempo used for beat tracking [bpm]. - num_tempo_states : int, optional - Number of tempo states; if set, limit the number of states and use a + num_tempi : int, optional + Number of tempi to model; if set, limit the number of tempi and use a log spacing, otherwise a linear spacing. transition_lambda : float, optional Lambda for the exponential tempo change distribution (higher values @@ -659,16 +659,15 @@ class DBNBeatTrackingProcessor(Processor): Retrieval Conference (ISMIR), 2015. """ - CORRECT = True - NUM_TEMPO_STATES = None + MIN_BPM = 55. + MAX_BPM = 215. + NUM_TEMPI = None TRANSITION_LAMBDA = 100 OBSERVATION_LAMBDA = 16 NORM_OBSERVATIONS = False - MIN_BPM = 55. - MAX_BPM = 215. + CORRECT = True - def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - num_tempo_states=NUM_TEMPO_STATES, + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempi=NUM_TEMPI, transition_lambda=TRANSITION_LAMBDA, observation_lambda=OBSERVATION_LAMBDA, norm_observations=NORM_OBSERVATIONS, correct=CORRECT, @@ -681,10 +680,10 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, BeatTransitionModel as Tm, RNNBeatTrackingObservationModel as Om) - # convert timing information to construct state space + # convert timing information to construct a beat state space min_interval = 60. * fps / max_bpm max_interval = 60. * fps / min_bpm - self.st = St(min_interval, max_interval, num_tempo_states) + self.st = St(min_interval, max_interval, num_tempi) # transition model self.tm = Tm(self.st, transition_lambda) # observation model @@ -735,8 +734,7 @@ def process(self, activations): else: # just take the frames with the smallest beat state values from scipy.signal import argrelmin - beats = argrelmin(self.st.position(path), - mode='wrap')[0] + beats = argrelmin(self.st.position[path], mode='wrap')[0] # recheck if they are within the "beat range", i.e. the pointers # of the observation model for that state must be 0 # Note: interpolation and alignment of the beats to be at state 0 @@ -747,12 +745,11 @@ def process(self, activations): @staticmethod def add_arguments(parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - num_tempo_states=NUM_TEMPO_STATES, - transition_lambda=TRANSITION_LAMBDA, + num_tempi=NUM_TEMPI, transition_lambda=TRANSITION_LAMBDA, observation_lambda=OBSERVATION_LAMBDA, norm_observations=NORM_OBSERVATIONS, correct=CORRECT): """ - Add HMM related arguments to an existing parser object. + Add DBN related arguments to an existing parser object. Parameters ---------- @@ -762,8 +759,8 @@ def add_arguments(parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, Minimum tempo used for beat tracking [bpm]. max_bpm : float, optional Maximum tempo used for beat tracking [bpm]. - num_tempo_states : int, optional - Number of tempo states; if set, limit the number of states and use + num_tempi : int, optional + Number of tempi to model; if set, limit the number of tempi and use a log spacing, otherwise a linear spacing. transition_lambda : float, optional Lambda for the exponential tempo change distribution (higher values @@ -794,12 +791,12 @@ def add_arguments(parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, g.add_argument('--max_bpm', action='store', type=float, default=max_bpm, help='maximum tempo [bpm, default=%(default).2f]') - g.add_argument('--num_tempo_states', action='store', type=int, - default=num_tempo_states, - help='limit the number of tempo states; if set, align ' - 'them with a log spacing, otherwise linearly') - g.add_argument('--transition_lambda', action='store', - type=float, default=transition_lambda, + g.add_argument('--num_tempi', action='store', type=int, + default=num_tempi, + help='limit the number of tempi; if set, align the ' + 'tempi with a log spacing, otherwise linearly') + g.add_argument('--transition_lambda', action='store', type=float, + default=transition_lambda, help='lambda of the tempo transition distribution; ' 'higher values prefer a constant tempo over a ' 'tempo change from one beat to the next one ' @@ -856,8 +853,8 @@ class PatternTrackingProcessor(Processor): Minimum tempi used for pattern tracking [bpm]. max_bpm : list, optional Maximum tempi used for pattern tracking [bpm]. - num_tempo_states : int or list, optional - Number of tempo states; if set, limit the number of states and use + num_tempi : int or list, optional + Number of tempi to model; if set, limit the number of tempi and use a log spacings, otherwise a linear spacings. transition_lambda : float or list, optional Lambdas for the exponential tempo change distributions (higher values @@ -897,13 +894,12 @@ class PatternTrackingProcessor(Processor): # TODO: this should not be lists (lists are mutable!) MIN_BPM = [55, 60] MAX_BPM = [205, 225] - NUM_TEMPO_STATES = [None, None] + NUM_TEMPI = [None, None] TRANSITION_LAMBDA = [100, 100] NORM_OBSERVATIONS = False def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - num_tempo_states=NUM_TEMPO_STATES, - transition_lambda=TRANSITION_LAMBDA, + num_tempi=NUM_TEMPI, transition_lambda=TRANSITION_LAMBDA, norm_observations=NORM_OBSERVATIONS, downbeats=False, fps=None, **kwargs): # pylint: disable=unused-argument @@ -916,15 +912,15 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, MultiPatternTransitionModel as Tm, GMMPatternTrackingObservationModel as Om) - # expand num_tempo_states and transition_lambda to lists if needed - if not isinstance(num_tempo_states, list): - num_tempo_states = [num_tempo_states] * len(num_tempo_states) + # expand num_tempi and transition_lambda to lists if needed + if not isinstance(num_tempi, list): + num_tempi = [num_tempi] * len(num_tempi) if not isinstance(transition_lambda, list): - transition_lambda = [transition_lambda] * len(num_tempo_states) + transition_lambda = [transition_lambda] * len(num_tempi) # check if all lists have the same length - if not (len(min_bpm) == len(max_bpm) == len(num_tempo_states) == + if not (len(min_bpm) == len(max_bpm) == len(num_tempi) == len(transition_lambda) == len(pattern_files)): - raise ValueError('`min_bpm`, `max_bpm`, `num_tempo_states` and ' + raise ValueError('`min_bpm`, `max_bpm`, `num_tempi` and ' '`transition_lambda` must have the same length ' 'as number of patterns.') @@ -954,7 +950,7 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, min_interval = 60. * self.fps / np.asarray(max_bpm) * self.num_beats max_interval = 60. * self.fps / np.asarray(min_bpm) * self.num_beats # state space - self.st = St(min_interval, max_interval, num_tempo_states) + self.st = St(min_interval, max_interval, num_tempi) # transition model self.tm = Tm(self.st, transition_lambda) # observation model @@ -964,12 +960,12 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, def process(self, activations): """ - Detect the beats in the given activation function. + Detect the beats based on the given activations. Parameters ---------- activations : numpy array - Beat activation function. + Activations (i.e. multi-band spectral features). Returns ------- @@ -981,9 +977,9 @@ def process(self, activations): path, _ = self.hmm.viterbi(activations) # get the corresponding pattern (use only the first state, since it # doesn't change throughout the sequence) - pattern = self.st.pattern(path[0]) + pattern = self.st.pattern[path[0]] # the position inside the pattern (0..1) - position = self.st.position(path) + position = self.st.position[path] # beat position (= weighted by number of beats in bar) beat_counter = (position * self.num_beats[pattern]).astype(int) # transitions are the points where the beat counters change @@ -1003,10 +999,12 @@ def process(self, activations): @staticmethod def add_arguments(parser, pattern_files=None, min_bpm=MIN_BPM, - max_bpm=MAX_BPM, num_tempo_states=NUM_TEMPO_STATES, + max_bpm=MAX_BPM, num_tempi=NUM_TEMPI, transition_lambda=TRANSITION_LAMBDA, norm_observations=NORM_OBSERVATIONS): """ + Add DBN related arguments for pattern tracking to an existing parser + object. Parameters ---------- @@ -1018,9 +1016,9 @@ def add_arguments(parser, pattern_files=None, min_bpm=MIN_BPM, Minimum tempi used for beat tracking [bpm]. max_bpm : list, optional Maximum tempi used for beat tracking [bpm]. - num_tempo_states : int or list, optional - Number of tempo states; if set, limit the number of states and use - log spacings, otherwise a linear spacings. + num_tempi : int or list, optional + Number of tempi to model; if set, limit the number of states and + use log spacings, otherwise a linear spacings. transition_lambda : float or list, optional Lambdas for the exponential tempo change distribution (higher values prefer constant tempi from one beat to the next one). @@ -1030,11 +1028,11 @@ def add_arguments(parser, pattern_files=None, min_bpm=MIN_BPM, Returns ------- parser_group : argparse argument group - Downbeat tracking argument parser group + Pattern tracking argument parser group Notes ----- - `pattern_files`, `min_bpm`, `max_bpm`, `num_tempo_states`, and + `pattern_files`, `min_bpm`, `max_bpm`, `num_tempi`, and `transition_lambda` must the same number of items. """ @@ -1056,10 +1054,10 @@ def add_arguments(parser, pattern_files=None, min_bpm=MIN_BPM, default=max_bpm, type=float, sep=',', help='maximum tempo (comma separated list with one ' 'value per pattern) [bpm, default=%(default)s]') - g.add_argument('--num_tempo_states', action=OverrideDefaultListAction, - default=num_tempo_states, type=int, sep=',', - help='limit the number of tempo states; if set, align ' - 'them with a log spacing, otherwise linearly ' + g.add_argument('--num_tempi', action=OverrideDefaultListAction, + default=num_tempi, type=int, sep=',', + help='limit the number of tempi; if set, align the ' + 'tempi with log spacings, otherwise linearly ' '(comma separated list with one value per pattern)' ' [default=%(default)s]') g.add_argument('--transition_lambda', action=OverrideDefaultListAction, @@ -1073,11 +1071,11 @@ def add_arguments(parser, pattern_files=None, min_bpm=MIN_BPM, if norm_observations: g.add_argument('--no_norm_obs', dest='norm_observations', action='store_false', default=norm_observations, - help='do not normalize the observations of the HMM') + help='do not normalize the observations of the DBN') else: g.add_argument('--norm_obs', dest='norm_observations', action='store_true', default=norm_observations, - help='normalize the observations of the HMM') + help='normalize the observations of the DBN') # add output format stuff g = parser.add_argument_group('output arguments') g.add_argument('--downbeats', action='store_true', default=False, diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index 9bcf8acdc..de4106a5d 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -71,8 +71,16 @@ class BeatStateSpace(object): num_log_tempi += 1 # intervals to model self.intervals = np.ascontiguousarray(intervals, dtype=np.uint32) - # compute the position and interval mapping - self.position_mapping, self.interval_mapping = self.compute_mapping() + # define the position and interval states + self.position = np.empty(self.num_states) + self.interval = np.empty(self.num_states, dtype=np.uint32) + idx = interval = 0 + for i in self.intervals: + self.position[idx: idx + i] = np.linspace(0, 1, i, endpoint=False) + self.interval[idx: idx + i] = interval + # increase counters + idx += i + interval += 1 @property def num_states(self): @@ -94,89 +102,10 @@ class BeatStateSpace(object): """Last state for each interval.""" return np.cumsum(self.intervals).astype(np.uint32) - 1 - @cython.cdivision(True) - @cython.boundscheck(False) - @cython.wraparound(False) - def compute_mapping(self): - """ - Compute the mapping from state numbers to position inside the beat - and interval states. - - Returns - ------- - position_mapping : numpy array - Mapping from state number to position inside beat. - interval_mapping : numpy array - Mapping from state number to interval. - - """ - # counters etc. - cdef unsigned int interval, first_state, last_state - cdef unsigned int num_states = np.sum(self.intervals) - cdef float pos - # mapping arrays from state numbers to interval / position - cdef unsigned int [::1] interval_mapping = \ - np.empty(num_states, dtype=np.uint32) - cdef double [::1] position_mapping = \ - np.empty(num_states, dtype=np.float) - # cache variables - cdef unsigned int [::1] intervals = self.intervals - cdef unsigned int [::1] first_states = self.first_states - cdef unsigned int [::1] last_states = self.last_states - # loop over all intervals - for interval in range(self.num_intervals): - # first and last state of interval - first_state = first_states[interval] - last_state = last_states[interval] - # reset position counter - pos = 0 - for state in range(first_state, last_state + 1): - # interval state mapping - interval_mapping[state] = interval - # position inside beat mapping - position_mapping[state] = pos / float(intervals[interval]) - pos += 1 - # return the mappings - return np.asarray(position_mapping), np.asarray(interval_mapping) - - def position(self, state): - """ - Position (inside one beat) of the given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding position. - - """ - return self.position_mapping[state] - - def interval(self, state): - """ - Intervals of the given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding intervals. - - """ - return self.interval_mapping[state] - class MultiPatternStateSpace(object): """ - State space for rythmic pattern tracking with a HMM. + State space for rhythmic pattern tracking with a HMM. A rhythmic pattern is modeled similar to :class:`BeatStateSpace`, but models multiple rhythmic patterns instead of a single beat. The @@ -216,14 +145,14 @@ class MultiPatternStateSpace(object): # create a BeatStateSpace and append it to the list bar_state_spaces.append(BeatStateSpace(min_, max_, num_)) self.bar_state_spaces = bar_state_spaces - # define mappings - self.position_mapping = \ - np.hstack([st.position(np.arange(st.num_states, dtype=np.int)) + # define the position, interval and pattern states + self.position = \ + np.hstack([st.position[np.arange(st.num_states, dtype=np.int)] for st in self.bar_state_spaces]) - self.interval_mapping = \ - np.hstack([st.interval(np.arange(st.num_states, dtype=np.int)) + self.interval = \ + np.hstack([st.interval[np.arange(st.num_states, dtype=np.int)] for st in self.bar_state_spaces]) - self.pattern_mapping = \ + self.pattern = \ np.hstack([np.repeat(i, st.num_states) for i, st in enumerate(self.bar_state_spaces)]) @@ -237,57 +166,6 @@ class MultiPatternStateSpace(object): """Number of rhythmic patterns""" return len(self.bar_state_spaces) - def position(self, state): - """ - Position (inside one pattern) for the given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding positions. - - """ - return self.position_mapping[state] - - def interval(self, state): - """ - Interval for a given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding intervals. - - """ - return self.interval_mapping[state] - - def pattern(self, state): - """ - Pattern for the given states. - - Parameters - ---------- - state : numpy array - States. - - Returns - ------- - numpy array - Corresponding patterns. - - """ - return self.pattern_mapping[state] - # transition models class BeatTransitionModel(TransitionModel): @@ -343,7 +221,6 @@ class BeatTransitionModel(TransitionModel): # Note: convert all intervals to float here cdef float [::1] intervals = \ self.state_space.intervals.astype(np.float32) - # cdef unsigned int [::1] tempo_states = self.state_space.tempo_states cdef double transition_lambda = self.transition_lambda # number of tempo & total states cdef unsigned int num_intervals = self.state_space.num_intervals @@ -559,8 +436,7 @@ class RNNBeatTrackingObservationModel(ObservationModel): pointers = np.ones(state_space.num_states, dtype=np.uint32) # unless they are in the beat range of the state space border = 1. / observation_lambda - beat_idx = state_space.position(np.arange(state_space.num_states, - dtype=np.int)) < border + beat_idx = state_space.position[:state_space.num_states] < border pointers[beat_idx] = 0 # instantiate a ObservationModel with the pointers super(RNNBeatTrackingObservationModel, self).__init__(pointers) @@ -632,9 +508,8 @@ class GMMPatternTrackingObservationModel(ObservationModel): self.norm_observations = norm_observations # define the pointers of the log densities pointers = np.zeros(transition_model.num_states, dtype=np.uint32) - states = np.arange(self.transition_model.num_states) - pattern = self.transition_model.pattern(states) - position = self.transition_model.position(states) + pattern = self.transition_model.pattern + position = self.transition_model.position # Note: the densities of all GMMs are just stacked on top of each # other, so we have to to keep track of the total number of GMMs densities_idx_offset = 0 diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index a1617d5a6..a0ca1d69f 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -350,10 +350,10 @@ def interval_histogram(self, activations): # instantiate a DBN for beat tracking dbn = DBNBeatTrackingProcessor(min_bpm=self.min_bpm, max_bpm=self.max_bpm, - num_tempo_states=None, fps=self.fps) + num_tempi=None, fps=self.fps) # get the best state path by calling the viterbi algorithm path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) - intervals = dbn.st.interval(path) + intervals = dbn.st.interval[path] # add the minimum interval of the beat state space intervals += dbn.st.intervals.min() # get the counts of the bins diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index 108b9432f..11ec5e3b1 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -14,38 +14,44 @@ class TestBeatStateSpaceClass(unittest.TestCase): - def setUp(self): - self.btss = BeatStateSpace(1, 4) - def test_types(self): - self.assertIsInstance(self.btss.intervals, np.ndarray) - self.assertIsInstance(self.btss.position_mapping, np.ndarray) - self.assertIsInstance(self.btss.interval_mapping, np.ndarray) - self.assertIsInstance(self.btss.num_states, int) - self.assertIsInstance(self.btss.num_intervals, int) - self.assertIsInstance(self.btss.first_states, np.ndarray) - self.assertIsInstance(self.btss.last_states, np.ndarray) + bss = BeatStateSpace(1, 4) + self.assertIsInstance(bss.intervals, np.ndarray) + self.assertIsInstance(bss.position, np.ndarray) + self.assertIsInstance(bss.interval, np.ndarray) + self.assertIsInstance(bss.num_states, int) + self.assertIsInstance(bss.num_intervals, int) + self.assertIsInstance(bss.first_states, np.ndarray) + self.assertIsInstance(bss.last_states, np.ndarray) def test_values(self): - print(self.btss.intervals) - self.assertTrue(np.allclose(self.btss.intervals, - [1, 2, 3, 4])) - self.assertTrue(np.allclose(self.btss.position_mapping, + bss = BeatStateSpace(1, 4) + self.assertTrue(np.allclose(bss.intervals, [1, 2, 3, 4])) + self.assertTrue(np.allclose(bss.position, [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(self.btss.interval_mapping, - [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) - self.assertTrue(np.allclose(self.btss.first_states, - [0, 1, 3, 6])) - self.assertTrue(np.allclose(self.btss.last_states, - [0, 2, 5, 9])) - self.assertTrue(self.btss.num_states == 10) - self.assertTrue(self.btss.num_intervals == 4) - self.assertTrue(np.allclose(self.btss.position(np.arange(10)), - [0, 0, 0.5, 0, 1. / 3, 2. / 3, - 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(self.btss.interval(np.arange(10)), + self.assertTrue(np.allclose(bss.interval, [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + self.assertTrue(np.allclose(bss.first_states, [0, 1, 3, 6])) + self.assertTrue(np.allclose(bss.last_states, [0, 2, 5, 9])) + self.assertTrue(bss.num_states == 10) + self.assertTrue(bss.num_intervals == 4) + # other intervals + bss = BeatStateSpace(2, 6) + self.assertTrue(np.allclose(bss.intervals, [2, 3, 4, 5, 6])) + self.assertTrue(np.allclose(bss.position, + [0, 0.5, + 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75, + 0, 0.2, 0.4, 0.6, 0.8, + 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) + self.assertTrue(np.allclose(bss.interval, + [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4])) + self.assertTrue(np.allclose(bss.first_states, [0, 2, 5, 9, 14])) + self.assertTrue(np.allclose(bss.last_states, [1, 4, 8, 13, 19])) + self.assertTrue(bss.num_states == 20) + self.assertTrue(bss.num_intervals == 5) class TestBeatTransitionModelClass(unittest.TestCase): @@ -118,8 +124,8 @@ def setUp(self): def test_types(self): self.assertIsInstance(self.ptss.bar_state_spaces, list) - self.assertIsInstance(self.ptss.position_mapping, np.ndarray) - self.assertIsInstance(self.ptss.interval_mapping, np.ndarray) + self.assertIsInstance(self.ptss.position, np.ndarray) + self.assertIsInstance(self.ptss.interval, np.ndarray) self.assertIsInstance(self.ptss.num_states, int) # self.assertIsInstance(self.ptss.num_intervals, list) self.assertIsInstance(self.ptss.num_patterns, int) @@ -133,25 +139,23 @@ def test_values(self): # self.assertTrue(self.ptss.num_intervals == [4, 5]) self.assertTrue(self.ptss.num_patterns == 2) # first pattern - self.assertTrue(np.allclose(self.ptss.position(np.arange(10)), + self.assertTrue(np.allclose(self.ptss.position[:10], [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(self.ptss.interval(np.arange(10)), + self.assertTrue(np.allclose(self.ptss.interval[:10], [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) - self.assertTrue(np.allclose(self.ptss.pattern(np.arange(10)), - np.zeros(10))) + self.assertTrue(np.allclose(self.ptss.pattern[:10], 0)) # second pattern - self.assertTrue(np.allclose(self.ptss.position(np.arange(10, 30)), + self.assertTrue(np.allclose(self.ptss.position[10:], [0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75, 0, 0.2, 0.4, 0.6, 0.8, 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) - self.assertTrue(np.allclose(self.ptss.interval(np.arange(10, 30)), + self.assertTrue(np.allclose(self.ptss.interval[10:], [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4])) - self.assertTrue(np.allclose(self.ptss.pattern(np.arange(10, 30)), - np.ones(20))) + self.assertTrue(np.allclose(self.ptss.pattern[10:], 1)) class TestPatternTrackingTransitionModelClass(unittest.TestCase): From b64f60d29dce220f626cc147db9ca59699e77497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Thu, 17 Dec 2015 14:46:18 +0100 Subject: [PATCH 05/18] reordered tests --- tests/test_features_beats_hmm.py | 129 ++++++++++++++++--------------- 1 file changed, 66 insertions(+), 63 deletions(-) diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index 11ec5e3b1..af35dfda5 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -12,6 +12,7 @@ from madmom.features.beats_hmm import * +# state spaces class TestBeatStateSpaceClass(unittest.TestCase): def test_types(self): @@ -54,69 +55,6 @@ def test_values(self): self.assertTrue(bss.num_intervals == 5) -class TestBeatTransitionModelClass(unittest.TestCase): - - def setUp(self): - btss = BeatStateSpace(1, 4) - self.tm = BeatTransitionModel(btss, 100) - - def test_types(self): - self.assertIsInstance(self.tm, BeatTransitionModel) - self.assertIsInstance(self.tm, TransitionModel) - self.assertIsInstance(self.tm.state_space, BeatStateSpace) - self.assertIsInstance(self.tm.transition_lambda, np.ndarray) - self.assertIsInstance(self.tm.states, np.ndarray) - self.assertIsInstance(self.tm.pointers, np.ndarray) - self.assertIsInstance(self.tm.probabilities, np.ndarray) - self.assertIsInstance(self.tm.log_probabilities, np.ndarray) - self.assertIsInstance(self.tm.num_states, int) - self.assertIsInstance(self.tm.num_transitions, int) - self.assertTrue(self.tm.states.dtype == np.uint32) - self.assertTrue(self.tm.pointers.dtype == np.uint32) - self.assertTrue(self.tm.probabilities.dtype == np.float) - self.assertTrue(self.tm.log_probabilities.dtype == np.float) - - def test_values(self): - self.assertTrue(np.allclose(self.tm.states, - [0, 2, 5, 1, 5, 9, 3, 4, 5, 9, 6, 7, 8])) - self.assertTrue(np.allclose(self.tm.pointers, - [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13])) - self.assertTrue(np.allclose(self.tm.probabilities, - [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1])) - self.assertTrue(np.allclose(self.tm.log_probabilities, - [0, 0, -33.3333333, 0, 0, -25, 0, 0, - -33.3333333, 0, 0, 0, 0])) - self.assertTrue(self.tm.num_states == 10) - self.assertTrue(self.tm.num_transitions == 13) - - -class TestRNNBeatTrackingObservationModelClass(unittest.TestCase): - - def setUp(self): - btss = BeatStateSpace(1, 4) - self.om = RNNBeatTrackingObservationModel(btss, 4) - self.obs = np.asarray([1, 0.1, 0.01, 0], dtype=np.float32) - - def test_types(self): - self.assertIsInstance(self.om.pointers, np.ndarray) - self.assertIsInstance(self.om.densities(self.obs), np.ndarray) - self.assertIsInstance(self.om.log_densities(self.obs), np.ndarray) - self.assertTrue(self.om.pointers.dtype == np.uint32) - self.assertTrue(self.om.densities(self.obs).dtype == np.float) - self.assertTrue(self.om.log_densities(self.obs).dtype == np.float) - - def test_values(self): - self.assertTrue(np.allclose(self.om.pointers, - [0, 0, 1, 0, 1, 1, 0, 1, 1, 1])) - self.assertTrue(np.allclose(self.om.densities(self.obs), - [[1, 0], [0.1, 0.3], - [0.01, 0.33], [0, 1. / 3]])) - self.assertTrue(np.allclose(self.om.log_densities(self.obs), - [[0, -np.inf], [-2.30258508, -1.20397281], - [-4.60517021, -1.10866262], - [-np.inf, -1.09861229]])) - - class TestMultiPatternStateSpaceClass(unittest.TestCase): def setUp(self): @@ -158,6 +96,43 @@ def test_values(self): self.assertTrue(np.allclose(self.ptss.pattern[10:], 1)) +# transition models +class TestBeatTransitionModelClass(unittest.TestCase): + + def setUp(self): + btss = BeatStateSpace(1, 4) + self.tm = BeatTransitionModel(btss, 100) + + def test_types(self): + self.assertIsInstance(self.tm, BeatTransitionModel) + self.assertIsInstance(self.tm, TransitionModel) + self.assertIsInstance(self.tm.state_space, BeatStateSpace) + self.assertIsInstance(self.tm.transition_lambda, np.ndarray) + self.assertIsInstance(self.tm.states, np.ndarray) + self.assertIsInstance(self.tm.pointers, np.ndarray) + self.assertIsInstance(self.tm.probabilities, np.ndarray) + self.assertIsInstance(self.tm.log_probabilities, np.ndarray) + self.assertIsInstance(self.tm.num_states, int) + self.assertIsInstance(self.tm.num_transitions, int) + self.assertTrue(self.tm.states.dtype == np.uint32) + self.assertTrue(self.tm.pointers.dtype == np.uint32) + self.assertTrue(self.tm.probabilities.dtype == np.float) + self.assertTrue(self.tm.log_probabilities.dtype == np.float) + + def test_values(self): + self.assertTrue(np.allclose(self.tm.states, + [0, 2, 5, 1, 5, 9, 3, 4, 5, 9, 6, 7, 8])) + self.assertTrue(np.allclose(self.tm.pointers, + [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13])) + self.assertTrue(np.allclose(self.tm.probabilities, + [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1])) + self.assertTrue(np.allclose(self.tm.log_probabilities, + [0, 0, -33.3333333, 0, 0, -25, 0, 0, + -33.3333333, 0, 0, 0, 0])) + self.assertTrue(self.tm.num_states == 10) + self.assertTrue(self.tm.num_transitions == 13) + + class TestPatternTrackingTransitionModelClass(unittest.TestCase): def setUp(self): @@ -210,3 +185,31 @@ def test_values(self): 0, -20, -5.78e-08, 0, 0, 0, 0, 0])) self.assertTrue(self.tm.num_states == 30) self.assertTrue(self.tm.num_transitions == 41) + + +# observation models +class TestRNNBeatTrackingObservationModelClass(unittest.TestCase): + + def setUp(self): + btss = BeatStateSpace(1, 4) + self.om = RNNBeatTrackingObservationModel(btss, 4) + self.obs = np.asarray([1, 0.1, 0.01, 0], dtype=np.float32) + + def test_types(self): + self.assertIsInstance(self.om.pointers, np.ndarray) + self.assertIsInstance(self.om.densities(self.obs), np.ndarray) + self.assertIsInstance(self.om.log_densities(self.obs), np.ndarray) + self.assertTrue(self.om.pointers.dtype == np.uint32) + self.assertTrue(self.om.densities(self.obs).dtype == np.float) + self.assertTrue(self.om.log_densities(self.obs).dtype == np.float) + + def test_values(self): + self.assertTrue(np.allclose(self.om.pointers, + [0, 0, 1, 0, 1, 1, 0, 1, 1, 1])) + self.assertTrue(np.allclose(self.om.densities(self.obs), + [[1, 0], [0.1, 0.3], + [0.01, 0.33], [0, 1. / 3]])) + self.assertTrue(np.allclose(self.om.log_densities(self.obs), + [[0, -np.inf], [-2.30258508, -1.20397281], + [-4.60517021, -1.10866262], + [-np.inf, -1.09861229]])) From 38a98ac03df9e471cd0db4d72dbd9e8e8566b10f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Mon, 21 Dec 2015 17:02:18 +0100 Subject: [PATCH 06/18] renamed the mapping arrays to state_{positions|intervals|etc.} --- madmom/features/beats.py | 6 +- madmom/features/beats_hmm.pyx | 171 ++++++++++++++++++++++--------- madmom/features/tempo.py | 2 +- tests/test_features_beats_hmm.py | 108 ++++++++++++++----- 4 files changed, 209 insertions(+), 78 deletions(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index c4864b473..fe9a1ddcc 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -734,7 +734,7 @@ def process(self, activations): else: # just take the frames with the smallest beat state values from scipy.signal import argrelmin - beats = argrelmin(self.st.position[path], mode='wrap')[0] + beats = argrelmin(self.st.state_positions[path], mode='wrap')[0] # recheck if they are within the "beat range", i.e. the pointers # of the observation model for that state must be 0 # Note: interpolation and alignment of the beats to be at state 0 @@ -977,9 +977,9 @@ def process(self, activations): path, _ = self.hmm.viterbi(activations) # get the corresponding pattern (use only the first state, since it # doesn't change throughout the sequence) - pattern = self.st.pattern[path[0]] + pattern = self.st.state_patterns[path[0]] # the position inside the pattern (0..1) - position = self.st.position[path] + position = self.st.state_positions[path] # beat position (= weighted by number of beats in bar) beat_counter = (position * self.num_beats[pattern]).astype(int) # transitions are the points where the beat counters change diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index de4106a5d..8ea8a8140 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -42,6 +42,23 @@ class BeatStateSpace(object): Number of intervals to model; if set, limit the number of intervals and use a log spacing instead of the default linear spacing. + Attributes + ---------- + num_states : int + Number of states. + intervals : numpy array + Modeled intervals. + num_intervals : int + Number of intervals. + state_positions : numpy array + Positions of the states. + state_intervals : numpy array + Intervals of the states. + first_states : numpy array + First states for each interval. + last_states : numpy array + Last states for each interval. + References ---------- .. [1] Florian Krebs, Sebastian Böck and Gerhard Widmer, @@ -51,7 +68,8 @@ class BeatStateSpace(object): """ - def __init__(self, min_interval, max_interval, num_intervals=None): + def __init__(self, min_interval, max_interval, num_intervals=None, + offset=0): # per default, use a linear spacing of the tempi intervals = np.arange(np.round(min_interval), np.round(max_interval) + 1) @@ -69,38 +87,101 @@ class BeatStateSpace(object): # quantize to integer tempo states intervals = np.unique(np.round(intervals)) num_log_tempi += 1 - # intervals to model + # save the intervals self.intervals = np.ascontiguousarray(intervals, dtype=np.uint32) + # number of states and intervals + self.num_states = int(np.sum(intervals)) + self.num_intervals = len(intervals) + # define first and last states + first_states = np.cumsum(np.r_[0, self.intervals[:-1]]) + self.first_states = first_states.astype(np.uint32) + self.last_states = np.cumsum(self.intervals).astype(np.uint32) - 1 # define the position and interval states - self.position = np.empty(self.num_states) - self.interval = np.empty(self.num_states, dtype=np.uint32) + self.state_positions = np.empty(self.num_states) + self.state_intervals = np.empty(self.num_states, dtype=np.uint32) idx = interval = 0 for i in self.intervals: - self.position[idx: idx + i] = np.linspace(0, 1, i, endpoint=False) - self.interval[idx: idx + i] = interval + self.state_positions[idx: idx + i] = np.linspace(0, 1, i, + endpoint=False) + self.state_intervals[idx: idx + i] = interval # increase counters idx += i interval += 1 - @property - def num_states(self): - """Number of states.""" - return int(np.sum(self.intervals)) - @property - def num_intervals(self): - """Number of different intervals.""" - return len(self.intervals) +class BarStateSpace(object): + """ + State space for bar tracking with a HMM. - @property - def first_states(self): - """First state for each interval.""" - return np.cumsum(np.r_[0, self.intervals[:-1]]).astype(np.uint32) + Parameters + ---------- + num_beats : int + Number of beats per bar. + min_interval : float + Minimum beat interval to model. + max_interval : float + Maximum beat interval to model. + num_intervals : int, optional + Number of beat intervals to model; if set, limit the number of + intervals and use a log spacing instead of the default linear spacing. - @property - def last_states(self): - """Last state for each interval.""" - return np.cumsum(self.intervals).astype(np.uint32) - 1 + Attributes + ---------- + num_beats : int. + Number of beats. + num_states : int + Number of states. + num_intervals : int + Number of intervals. + state_positions : numpy array + Positions of the states. + state_intervals : numpy array + Intervals of the states. + beat_state_offsets : numpy array + State offsets of the beats. + first_states : numpy array + First states for each interval. + last_states : numpy array + Last states for each interval. + + References + ---------- + .. [1] Florian Krebs, Sebastian Böck and Gerhard Widmer, + "An Efficient State Space Model for Joint Tempo and Meter Tracking", + Proceedings of the 16th International Society for Music Information + Retrieval Conference (ISMIR), 2015. + + """ + + def __init__(self, num_beats, min_interval, max_interval, + num_intervals=None): + # model N beats as a bar + self.num_beats = int(num_beats) + self.state_positions = np.empty(0, dtype=np.uint32) + self.state_intervals = np.empty(0, dtype=np.uint32) + self.beat_state_offsets = np.empty(0, dtype=np.int) + self.num_states = 0 + self._first_states = [] + self._last_states = [] + # create a beat state space + bss = BeatStateSpace(min_interval, max_interval, num_intervals) + offset = 0 + for n in range(self.num_beats): + # define position and interval states + self.state_positions = np.hstack((self.state_positions, + bss.state_positions + n)) + self.state_intervals = np.hstack((self.state_intervals, + bss.state_intervals)) + self.num_states += bss.num_states + self._first_states.append(bss.first_states + offset) + self._last_states.append(bss.last_states + offset) + # save the offsets and increase afterwards + self.beat_state_offsets = np.hstack((self.beat_state_offsets, + offset)) + offset += bss.num_states + # save the first / last interval states + self.first_states = self._first_states[0] + self.last_states = self._last_states[-1] class MultiPatternStateSpace(object): @@ -139,32 +220,30 @@ class MultiPatternStateSpace(object): num_intervals = [None] * len(min_intervals) # for each pattern, compute a bar state space (i.e. a beat state space # which spans a complete bar) - bar_state_spaces = [] + state_spaces = [] enum = enumerate(zip(min_intervals, max_intervals, num_intervals)) for pattern, (min_, max_, num_) in enum: # create a BeatStateSpace and append it to the list - bar_state_spaces.append(BeatStateSpace(min_, max_, num_)) - self.bar_state_spaces = bar_state_spaces + state_spaces.append(BeatStateSpace(min_, max_, num_)) + self.state_spaces = state_spaces # define the position, interval and pattern states - self.position = \ - np.hstack([st.position[np.arange(st.num_states, dtype=np.int)] - for st in self.bar_state_spaces]) - self.interval = \ - np.hstack([st.interval[np.arange(st.num_states, dtype=np.int)] - for st in self.bar_state_spaces]) - self.pattern = \ + self.state_positions = \ + np.hstack([st.state_positions for st in self.state_spaces]) + self.state_intervals = \ + np.hstack([st.state_intervals for st in self.state_spaces]) + self.state_patterns = \ np.hstack([np.repeat(i, st.num_states) - for i, st in enumerate(self.bar_state_spaces)]) + for i, st in enumerate(self.state_spaces)]) @property def num_states(self): """Number of states.""" - return int(sum([st.num_states for st in self.bar_state_spaces])) + return int(sum([st.num_states for st in self.state_spaces])) @property def num_patterns(self): """Number of rhythmic patterns""" - return len(self.bar_state_spaces) + return len(self.state_spaces) # transition models @@ -219,7 +298,7 @@ class BeatTransitionModel(TransitionModel): """ # cache variables # Note: convert all intervals to float here - cdef float [::1] intervals = \ + cdef float [::1] intervals =\ self.state_space.intervals.astype(np.float32) cdef double transition_lambda = self.transition_lambda # number of tempo & total states @@ -239,14 +318,15 @@ class BeatTransitionModel(TransitionModel): cdef double [:, ::1] trans_prob = np.zeros((num_intervals, num_intervals), dtype=np.float) - # iterate over all tempo states + # iterate over all interval states for old_interval in range(num_intervals): # reset probability sum prob_sum = 0 - # compute transition probabilities to all other tempo states + # compute transition probabilities to all other interval states for new_interval in range(num_intervals): # compute the ratio of the two tempi - ratio = intervals[new_interval] / intervals[old_interval] + ratio = intervals[new_interval] / \ + intervals[old_interval] # compute the probability for the tempo change following an # exponential distribution prob = exp(-transition_lambda * abs(ratio - 1)) @@ -373,7 +453,7 @@ class MultiPatternTransitionModel(TransitionModel): self.state_space = state_space self.transition_lambda = transition_lambda # compute the transitions for each pattern and stack them - enum = enumerate(zip(state_space.bar_state_spaces, transition_lambda)) + enum = enumerate(zip(state_space.state_spaces, transition_lambda)) for pattern, (state_space, transition_lambda) in enum: # create a BeatTransitionModel tm = BeatTransitionModel(state_space, transition_lambda) @@ -436,8 +516,7 @@ class RNNBeatTrackingObservationModel(ObservationModel): pointers = np.ones(state_space.num_states, dtype=np.uint32) # unless they are in the beat range of the state space border = 1. / observation_lambda - beat_idx = state_space.position[:state_space.num_states] < border - pointers[beat_idx] = 0 + pointers[state_space.state_positions < border] = 0 # instantiate a ObservationModel with the pointers super(RNNBeatTrackingObservationModel, self).__init__(pointers) @@ -508,8 +587,8 @@ class GMMPatternTrackingObservationModel(ObservationModel): self.norm_observations = norm_observations # define the pointers of the log densities pointers = np.zeros(transition_model.num_states, dtype=np.uint32) - pattern = self.transition_model.pattern - position = self.transition_model.position + patterns = self.transition_model.state_patterns + positions = self.transition_model.state_positions # Note: the densities of all GMMs are just stacked on top of each # other, so we have to to keep track of the total number of GMMs densities_idx_offset = 0 @@ -519,8 +598,8 @@ class GMMPatternTrackingObservationModel(ObservationModel): # distribute the observation densities defined by the GMMs # uniformly across the entire state space (for this pattern) # since the densities are just stacked, add the offset - pointers[pattern == p] = (position[pattern == p] * num_gmms + - densities_idx_offset) + pointers[patterns == p] = (positions[patterns == p] * num_gmms + + densities_idx_offset) # increase the offset by the number of GMMs densities_idx_offset += num_gmms # instantiate a ObservationModel with the pointers diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index a0ca1d69f..c7707bf88 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -353,7 +353,7 @@ def interval_histogram(self, activations): num_tempi=None, fps=self.fps) # get the best state path by calling the viterbi algorithm path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) - intervals = dbn.st.interval[path] + intervals = dbn.st.state_intervals[path] # add the minimum interval of the beat state space intervals += dbn.st.intervals.min() # get the counts of the bins diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index af35dfda5..ff87a403d 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -18,20 +18,26 @@ class TestBeatStateSpaceClass(unittest.TestCase): def test_types(self): bss = BeatStateSpace(1, 4) self.assertIsInstance(bss.intervals, np.ndarray) - self.assertIsInstance(bss.position, np.ndarray) - self.assertIsInstance(bss.interval, np.ndarray) - self.assertIsInstance(bss.num_states, int) - self.assertIsInstance(bss.num_intervals, int) + self.assertIsInstance(bss.state_positions, np.ndarray) + self.assertIsInstance(bss.state_intervals, np.ndarray) self.assertIsInstance(bss.first_states, np.ndarray) self.assertIsInstance(bss.last_states, np.ndarray) + self.assertIsInstance(bss.num_states, int) + self.assertIsInstance(bss.num_intervals, int) + # dtypes + self.assertTrue(bss.intervals.dtype == np.uint32) + self.assertTrue(bss.state_positions.dtype == np.float) + self.assertTrue(bss.state_intervals.dtype == np.uint32) + self.assertTrue(bss.first_states.dtype == np.uint32) + self.assertTrue(bss.last_states.dtype == np.uint32) def test_values(self): bss = BeatStateSpace(1, 4) self.assertTrue(np.allclose(bss.intervals, [1, 2, 3, 4])) - self.assertTrue(np.allclose(bss.position, + self.assertTrue(np.allclose(bss.state_positions, [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(bss.interval, + self.assertTrue(np.allclose(bss.state_intervals, [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) self.assertTrue(np.allclose(bss.first_states, [0, 1, 3, 6])) self.assertTrue(np.allclose(bss.last_states, [0, 2, 5, 9])) @@ -40,13 +46,13 @@ def test_values(self): # other intervals bss = BeatStateSpace(2, 6) self.assertTrue(np.allclose(bss.intervals, [2, 3, 4, 5, 6])) - self.assertTrue(np.allclose(bss.position, + self.assertTrue(np.allclose(bss.state_positions, [0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75, 0, 0.2, 0.4, 0.6, 0.8, 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) - self.assertTrue(np.allclose(bss.interval, + self.assertTrue(np.allclose(bss.state_intervals, [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4])) self.assertTrue(np.allclose(bss.first_states, [0, 2, 5, 9, 14])) @@ -55,45 +61,91 @@ def test_values(self): self.assertTrue(bss.num_intervals == 5) -class TestMultiPatternStateSpaceClass(unittest.TestCase): +class TestBarStateSpaceClass(unittest.TestCase): - def setUp(self): - self.ptss = MultiPatternStateSpace([1, 2], [4, 6]) + def test_types(self): + bss = BarStateSpace(2, 1, 4) + self.assertIsInstance(bss.num_beats, int) + self.assertIsInstance(bss.num_states, int) + # self.assertIsInstance(bss.intervals, np.ndarray) + self.assertIsInstance(bss.state_positions, np.ndarray) + self.assertIsInstance(bss.state_intervals, np.ndarray) + self.assertIsInstance(bss.first_states, np.ndarray) + self.assertIsInstance(bss.last_states, np.ndarray) + self.assertIsInstance(bss.beat_state_offsets, np.ndarray) + # dtypes + # self.assertTrue(bss.intervals.dtype == np.uint32) + self.assertTrue(bss.state_positions.dtype == np.float) + self.assertTrue(bss.state_intervals.dtype == np.uint32) + self.assertTrue(bss.first_states.dtype == np.uint32) + self.assertTrue(bss.last_states.dtype == np.uint32) + print(bss.beat_state_offsets.dtype) + self.assertTrue(bss.beat_state_offsets.dtype == np.int) + + def test_values(self): + bss = BarStateSpace(2, 1, 4) + self.assertTrue(bss.num_beats == 2) + self.assertTrue(bss.num_states == 20) + # self.assertTrue(np.allclose(bss.intervals, [1, 2, 3, 4])) + # self.assertTrue(np.allclose(bss.beat[:10], 0)) + # self.assertTrue(np.allclose(bss.beat[10:], 1)) + self.assertTrue(np.allclose(bss.state_positions, + [0, 0, 0.5, 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75, + 1, 1, 1.5, 1, 4. / 3, 5. / 3, + 1, 1.25, 1.5, 1.75])) + self.assertTrue(np.allclose(bss.state_intervals, + [0, 1, 1, 2, 2, 2, 3, 3, 3, 3, + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + self.assertTrue(np.allclose(bss.first_states, [0, 1, 3, 6])) + self.assertTrue(np.allclose(bss.last_states, [10, 12, 15, 19])) + # self.assertTrue(bss.num_intervals == 4) + + +class TestMultiPatternStateSpaceClass(unittest.TestCase): def test_types(self): - self.assertIsInstance(self.ptss.bar_state_spaces, list) - self.assertIsInstance(self.ptss.position, np.ndarray) - self.assertIsInstance(self.ptss.interval, np.ndarray) - self.assertIsInstance(self.ptss.num_states, int) - # self.assertIsInstance(self.ptss.num_intervals, list) - self.assertIsInstance(self.ptss.num_patterns, int) + mpss = MultiPatternStateSpace([1, 2], [4, 6]) + self.assertIsInstance(mpss.state_spaces, list) + self.assertIsInstance(mpss.state_positions, np.ndarray) + self.assertIsInstance(mpss.state_intervals, np.ndarray) + self.assertIsInstance(mpss.num_states, int) + # self.assertIsInstance(mpss.num_intervals, list) + self.assertIsInstance(mpss.num_patterns, int) + # dtypes + # self.assertTrue(mpss.intervals.dtype == np.uint32) + self.assertTrue(mpss.state_positions.dtype == np.float) + self.assertTrue(mpss.state_intervals.dtype == np.uint32) + # self.assertTrue(mpss.first_states.dtype == np.uint32) + # self.assertTrue(mpss.last_states.dtype == np.uint32) def test_values(self): - self.assertTrue(np.allclose(self.ptss.bar_state_spaces[0].intervals, + mpss = MultiPatternStateSpace([1, 2], [4, 6]) + self.assertTrue(np.allclose(mpss.state_spaces[0].intervals, [1, 2, 3, 4])) - self.assertTrue(np.allclose(self.ptss.bar_state_spaces[1].intervals, + self.assertTrue(np.allclose(mpss.state_spaces[1].intervals, [2, 3, 4, 5, 6])) - self.assertTrue(self.ptss.num_states == 30) - # self.assertTrue(self.ptss.num_intervals == [4, 5]) - self.assertTrue(self.ptss.num_patterns == 2) + self.assertTrue(mpss.num_states == 30) + # self.assertTrue(mpss.num_intervals == [4, 5]) + self.assertTrue(mpss.num_patterns == 2) # first pattern - self.assertTrue(np.allclose(self.ptss.position[:10], + self.assertTrue(np.allclose(mpss.state_positions[:10], [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) - self.assertTrue(np.allclose(self.ptss.interval[:10], + self.assertTrue(np.allclose(mpss.state_intervals[:10], [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) - self.assertTrue(np.allclose(self.ptss.pattern[:10], 0)) + self.assertTrue(np.allclose(mpss.state_patterns[:10], 0)) # second pattern - self.assertTrue(np.allclose(self.ptss.position[10:], + self.assertTrue(np.allclose(mpss.state_positions[10:], [0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75, 0, 0.2, 0.4, 0.6, 0.8, 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) - self.assertTrue(np.allclose(self.ptss.interval[10:], + self.assertTrue(np.allclose(mpss.state_intervals[10:], [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4])) - self.assertTrue(np.allclose(self.ptss.pattern[10:], 1)) + self.assertTrue(np.allclose(mpss.state_patterns[10:], 1)) # transition models From aa3ed89897e3bacd97e0b056cdcfdcf0d8cf9533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 8 Jan 2016 15:03:16 +0100 Subject: [PATCH 07/18] BeatStateSpace.state_intervals refers to real intervals, not indices --- madmom/features/beats_hmm.pyx | 10 ++++------ madmom/features/tempo.py | 2 -- tests/test_features_beats_hmm.py | 16 ++++++++-------- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index 8ea8a8140..0ccee3d15 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -68,8 +68,7 @@ class BeatStateSpace(object): """ - def __init__(self, min_interval, max_interval, num_intervals=None, - offset=0): + def __init__(self, min_interval, max_interval, num_intervals=None): # per default, use a linear spacing of the tempi intervals = np.arange(np.round(min_interval), np.round(max_interval) + 1) @@ -99,14 +98,13 @@ class BeatStateSpace(object): # define the position and interval states self.state_positions = np.empty(self.num_states) self.state_intervals = np.empty(self.num_states, dtype=np.uint32) - idx = interval = 0 + idx = 0 for i in self.intervals: self.state_positions[idx: idx + i] = np.linspace(0, 1, i, endpoint=False) - self.state_intervals[idx: idx + i] = interval - # increase counters + self.state_intervals[idx: idx + i] = i + # increase counter idx += i - interval += 1 class BarStateSpace(object): diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index c7707bf88..d8fb9b05e 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -354,8 +354,6 @@ def interval_histogram(self, activations): # get the best state path by calling the viterbi algorithm path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) intervals = dbn.st.state_intervals[path] - # add the minimum interval of the beat state space - intervals += dbn.st.intervals.min() # get the counts of the bins bins = np.bincount(intervals, minlength=dbn.st.intervals.max() + 1) # truncate everything below the minimum interval of the state space diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index ff87a403d..02277f0cc 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -38,7 +38,7 @@ def test_values(self): [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) self.assertTrue(np.allclose(bss.state_intervals, - [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + [1, 2, 2, 3, 3, 3, 4, 4, 4, 4])) self.assertTrue(np.allclose(bss.first_states, [0, 1, 3, 6])) self.assertTrue(np.allclose(bss.last_states, [0, 2, 5, 9])) self.assertTrue(bss.num_states == 10) @@ -53,8 +53,8 @@ def test_values(self): 0, 0.2, 0.4, 0.6, 0.8, 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) self.assertTrue(np.allclose(bss.state_intervals, - [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4])) + [2, 2, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6])) self.assertTrue(np.allclose(bss.first_states, [0, 2, 5, 9, 14])) self.assertTrue(np.allclose(bss.last_states, [1, 4, 8, 13, 19])) self.assertTrue(bss.num_states == 20) @@ -95,8 +95,8 @@ def test_values(self): 1, 1, 1.5, 1, 4. / 3, 5. / 3, 1, 1.25, 1.5, 1.75])) self.assertTrue(np.allclose(bss.state_intervals, - [0, 1, 1, 2, 2, 2, 3, 3, 3, 3, - 0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, + 1, 2, 2, 3, 3, 3, 4, 4, 4, 4])) self.assertTrue(np.allclose(bss.first_states, [0, 1, 3, 6])) self.assertTrue(np.allclose(bss.last_states, [10, 12, 15, 19])) # self.assertTrue(bss.num_intervals == 4) @@ -133,7 +133,7 @@ def test_values(self): [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75])) self.assertTrue(np.allclose(mpss.state_intervals[:10], - [0, 1, 1, 2, 2, 2, 3, 3, 3, 3])) + [1, 2, 2, 3, 3, 3, 4, 4, 4, 4])) self.assertTrue(np.allclose(mpss.state_patterns[:10], 0)) # second pattern self.assertTrue(np.allclose(mpss.state_positions[10:], @@ -143,8 +143,8 @@ def test_values(self): 0, 0.2, 0.4, 0.6, 0.8, 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) self.assertTrue(np.allclose(mpss.state_intervals[10:], - [0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4])) + [2, 2, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6])) self.assertTrue(np.allclose(mpss.state_patterns[10:], 1)) From 0e37f57d70ca794489c20016272811b3238889c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 8 Jan 2016 15:04:02 +0100 Subject: [PATCH 08/18] refactored the exponential tempo transition into a function --- madmom/features/beats_hmm.pyx | 211 ++++++++++++++----------------- tests/test_features_beats_hmm.py | 2 +- 2 files changed, 93 insertions(+), 120 deletions(-) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index 0ccee3d15..b074c1658 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -159,6 +159,7 @@ class BarStateSpace(object): self.state_intervals = np.empty(0, dtype=np.uint32) self.beat_state_offsets = np.empty(0, dtype=np.int) self.num_states = 0 + # save the first and last states of the individual beats in a list self._first_states = [] self._last_states = [] # create a beat state space @@ -244,6 +245,73 @@ class MultiPatternStateSpace(object): return len(self.state_spaces) +# transition distributions +@cython.cdivision(True) +@cython.boundscheck(False) +@cython.wraparound(False) +def exponential_transition(double [::1] from_intervals, + double [::1] to_intervals, double transition_lambda, + double threshold=np.spacing(1)): + """ + + Parameters + ---------- + from_intervals : numpy array + Intervals where the transitions originate from. + to_intervals : : numpy array + Intervals where the transitions destinate to. + transition_lambda : float + Lambda for the exponential tempo change distribution (higher values + prefer a constant tempo from one beat/bar to the next one). + threshold : float, optional + Set transition probabilities below this threshold to zero. + + Returns + ------- + probabilities : numpy array, shape (num_from_intervals, num_to_intervals) + Probability of each transition from an interval to another. + + References + ---------- + .. [1] Florian Krebs, Sebastian Böck and Gerhard Widmer, + "An Efficient State Space Model for Joint Tempo and Meter Tracking", + Proceedings of the 16th International Society for Music Information + Retrieval Conference (ISMIR), 2015. + + """ + # define variables + cdef unsigned int from_int, to_int + cdef double prob_sum, ratio, prob + cdef unsigned int num_to = len(to_intervals) + cdef unsigned int num_from = len(from_intervals) + # transition matrix for the tempo changes + cdef double [:, ::1] trans_prob = np.zeros((num_from, num_to), + dtype=np.float) + # iterate over all interval states + for from_int in range(num_from): + # reset probability sum + prob_sum = 0 + # compute transition probabilities to all other interval states + for to_int in range(num_to): + # compute the ratio of the two intervals + ratio = to_intervals[to_int] / from_intervals[from_int] + # compute the probability for the tempo change following an + # exponential distribution + prob = exp(-transition_lambda * abs(ratio - 1)) + # keep only transition probabilities > threshold + if prob > threshold: + # save the probability + trans_prob[from_int, to_int] = prob + # collect normalization data + prob_sum += prob + # normalize the interval transitions to other intervals + # TODO: make the normailsation optional!? + for to_int in range(num_to): + trans_prob[from_int, to_int] /= prob_sum + # return the transition probabilities + return np.asarray(trans_prob) + + # transition models class BeatTransitionModel(TransitionModel): """ @@ -269,127 +337,32 @@ class BeatTransitionModel(TransitionModel): def __init__(self, state_space, transition_lambda): # save attributes self.state_space = state_space - self.transition_lambda = np.asarray(transition_lambda, dtype=np.float) - # compute the transitions - transitions = self.make_sparse(*self.compute_transitions()) - # instantiate a TransitionModel with the transitions + self.transition_lambda = float(transition_lambda) + # intra state space connections (i.e. same tempi) + states = np.arange(state_space.num_states, dtype=np.uint32) + # remove the transitions into the first states + states = np.setdiff1d(states, state_space.first_states) + prev_states = states - 1 + probabilities = np.ones_like(states, dtype=np.float) + # self connection of the state space (i.e. tempo changes) + to_states = state_space.first_states + from_states = state_space.last_states + # generate an exponential tempo transition + from_int = state_space.state_intervals[from_states].astype(np.float) + to_int = state_space.state_intervals[to_states].astype(np.float) + prob = exponential_transition(from_int, to_int, self.transition_lambda) + # use only the states with transitions to/from != 0 + from_states = state_space.last_states[np.nonzero(prob)[0]] + to_states = state_space.first_states[np.nonzero(prob)[1]] + # append to the arrays + states = np.hstack((states, to_states)) + prev_states = np.hstack((prev_states, from_states)) + probabilities = np.hstack((probabilities, prob[prob != 0])) + # make the transitions sparse + transitions = self.make_sparse(states, prev_states, probabilities) + # instantiate a TransitionModel super(BeatTransitionModel, self).__init__(*transitions) - @cython.cdivision(True) - @cython.boundscheck(False) - @cython.wraparound(False) - def compute_transitions(self): - """ - Compute the transitions (i.e. the probabilities to move from any state - to another one) and return them in a dense format understood by - :func:`.ml.hmm.TransitionModel.make_sparse`. - - Returns - ------- - states : numpy array - Array with states (i.e. destination states). - prev_states : numpy array - Array with previous states (i.e. origination states). - probabilities : numpy array - Transition probabilities. - - """ - # cache variables - # Note: convert all intervals to float here - cdef float [::1] intervals =\ - self.state_space.intervals.astype(np.float32) - cdef double transition_lambda = self.transition_lambda - # number of tempo & total states - cdef unsigned int num_intervals = self.state_space.num_intervals - cdef unsigned int num_states = self.state_space.num_states - # counters etc. - cdef unsigned int state, prev_state, old_interval, new_interval - cdef double ratio, u, prob, prob_sum - cdef double threshold = np.spacing(1) - - # to determine the number of transitions, we need to determine the - # number of tempo change transitions first; also compute their - # probabilities for later use - - # tempo changes can only occur at the beginning of a beat - # transition matrix for the tempo changes - cdef double [:, ::1] trans_prob = np.zeros((num_intervals, - num_intervals), - dtype=np.float) - # iterate over all interval states - for old_interval in range(num_intervals): - # reset probability sum - prob_sum = 0 - # compute transition probabilities to all other interval states - for new_interval in range(num_intervals): - # compute the ratio of the two tempi - ratio = intervals[new_interval] / \ - intervals[old_interval] - # compute the probability for the tempo change following an - # exponential distribution - prob = exp(-transition_lambda * abs(ratio - 1)) - # keep only transition probabilities > threshold - if prob > threshold: - # save the probability - trans_prob[old_interval, new_interval] = prob - # collect normalization data - prob_sum += prob - # normalize the tempo transitions to other tempi - for new_interval in range(num_intervals): - trans_prob[old_interval, new_interval] /= prob_sum - - # number of tempo transitions (= non-zero probabilities) - cdef unsigned int num_tempo_transitions = \ - len(np.nonzero(trans_prob)[0]) - - # apart from the very beginning of a beat, the tempo stays the same, - # thus the number of transitions is equal to the total number of states - # plus the number of tempo transitions minus the number of tempo states - # since these transitions are already included in the tempo transitions - cdef int num_transitions = num_states + num_tempo_transitions - \ - num_intervals - # arrays for transition matrix creation - cdef unsigned int [::1] states = \ - np.empty(num_transitions, dtype=np.uint32) - cdef unsigned int [::1] prev_states = \ - np.empty(num_transitions, dtype=np.uint32) - # init the probabilities with ones, so we have to care only about the - # probabilities of the tempo transitions - cdef double [::1] probabilities = \ - np.ones(num_transitions, dtype=np.float) - - # cache first and last positions - cdef unsigned int [::1] first_beat_positions = \ - self.state_space.first_states - cdef unsigned int [::1] last_beat_positions = \ - self.state_space.last_states - # state counter - cdef int i = 0 - # loop over all tempi - for new_interval in range(num_intervals): - # generate all transitions from other tempi - for old_interval in range(num_intervals): - # but only if it is a probable transition - if trans_prob[old_interval, new_interval] != 0: - # generate a transition - prev_states[i] = last_beat_positions[old_interval] - states[i] = first_beat_positions[new_interval] - probabilities[i] = trans_prob[old_interval, new_interval] - # increase counter - i += 1 - # transitions within the same tempo - for prev_state in range(first_beat_positions[new_interval], - last_beat_positions[new_interval]): - # generate a transition with probability 1 - prev_states[i] = prev_state - states[i] = prev_state + 1 - # Note: skip setting the probability here, since they were - # initialised with 1 - # increase counter - i += 1 - # return the arrays - return states, prev_states, probabilities - class MultiPatternTransitionModel(TransitionModel): """ diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index 02277f0cc..7abe8bf12 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -159,7 +159,7 @@ def test_types(self): self.assertIsInstance(self.tm, BeatTransitionModel) self.assertIsInstance(self.tm, TransitionModel) self.assertIsInstance(self.tm.state_space, BeatStateSpace) - self.assertIsInstance(self.tm.transition_lambda, np.ndarray) + self.assertIsInstance(self.tm.transition_lambda, float) self.assertIsInstance(self.tm.states, np.ndarray) self.assertIsInstance(self.tm.pointers, np.ndarray) self.assertIsInstance(self.tm.probabilities, np.ndarray) From e1427efbb4d79e6cd2c8e02ea90ded9bde3b9470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 8 Jan 2016 15:38:22 +0100 Subject: [PATCH 09/18] added BarTransitionModel and tests --- madmom/features/beats_hmm.pyx | 81 +++++++++++++++++++++++++------- tests/test_features_beats_hmm.py | 60 +++++++++++++++++++---- 2 files changed, 117 insertions(+), 24 deletions(-) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index b074c1658..3b3b201d9 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -137,10 +137,10 @@ class BarStateSpace(object): Intervals of the states. beat_state_offsets : numpy array State offsets of the beats. - first_states : numpy array - First states for each interval. - last_states : numpy array - Last states for each interval. + first_states : list + First interval states for each beat. + last_states : list + Last interval states for each beat. References ---------- @@ -160,8 +160,8 @@ class BarStateSpace(object): self.beat_state_offsets = np.empty(0, dtype=np.int) self.num_states = 0 # save the first and last states of the individual beats in a list - self._first_states = [] - self._last_states = [] + self.first_states = [] + self.last_states = [] # create a beat state space bss = BeatStateSpace(min_interval, max_interval, num_intervals) offset = 0 @@ -172,15 +172,12 @@ class BarStateSpace(object): self.state_intervals = np.hstack((self.state_intervals, bss.state_intervals)) self.num_states += bss.num_states - self._first_states.append(bss.first_states + offset) - self._last_states.append(bss.last_states + offset) + self.first_states.append(bss.first_states + offset) + self.last_states.append(bss.last_states + offset) # save the offsets and increase afterwards self.beat_state_offsets = np.hstack((self.beat_state_offsets, offset)) offset += bss.num_states - # save the first / last interval states - self.first_states = self._first_states[0] - self.last_states = self._last_states[-1] class MultiPatternStateSpace(object): @@ -352,11 +349,9 @@ class BeatTransitionModel(TransitionModel): to_int = state_space.state_intervals[to_states].astype(np.float) prob = exponential_transition(from_int, to_int, self.transition_lambda) # use only the states with transitions to/from != 0 - from_states = state_space.last_states[np.nonzero(prob)[0]] - to_states = state_space.first_states[np.nonzero(prob)[1]] - # append to the arrays - states = np.hstack((states, to_states)) - prev_states = np.hstack((prev_states, from_states)) + prev_states = np.hstack((prev_states, + from_states[np.nonzero(prob)[0]])) + states = np.hstack((states, to_states[np.nonzero(prob)[1]])) probabilities = np.hstack((probabilities, prob[prob != 0])) # make the transitions sparse transitions = self.make_sparse(states, prev_states, probabilities) @@ -364,6 +359,60 @@ class BeatTransitionModel(TransitionModel): super(BeatTransitionModel, self).__init__(*transitions) +class BarTransitionModel(TransitionModel): + """ + Transition model for bar tracking with a HMM. + + Parameters + ---------- + state_space : :class:`BarStateSpace` instance + BarStateSpace instance. + transition_lambda : float + Lambda for the exponential tempo change distribution (higher values + prefer a constant tempo from one beat to the next one). + + References + ---------- + .. [1] Florian Krebs, Sebastian Böck and Gerhard Widmer, + "An Efficient State Space Model for Joint Tempo and Meter Tracking", + Proceedings of the 16th International Society for Music Information + Retrieval Conference (ISMIR), 2015. + + """ + + def __init__(self, state_space, transition_lambda): + # save attributes + self.state_space = state_space + self.transition_lambda = float(transition_lambda) + # intra state space connections (i.e. same tempi within the beats) + states = np.arange(state_space.num_states, dtype=np.uint32) + # remove the transitions into the first states of the individual beats + states = np.setdiff1d(states, state_space.first_states) + prev_states = states - 1 + probabilities = np.ones_like(states, dtype=np.float) + # tempo transition at the beat boundaries + for beat in range(state_space.num_beats): + # connect to the first states of the actual beat + to_states = state_space.first_states[beat] + # connect from the last states of the previous beat + from_states = state_space.last_states[beat - 1] + # generate an exponential tempo transition + from_int = state_space.state_intervals[from_states] + to_int = state_space.state_intervals[to_states] + prob = exponential_transition(from_int.astype(np.float), + to_int.astype(np.float), + self.transition_lambda) + # use only the states with transitions to/from != 0 + prev_states = np.hstack((prev_states, + from_states[np.nonzero(prob)[0]])) + states = np.hstack((states, to_states[np.nonzero(prob)[1]])) + probabilities = np.hstack((probabilities, prob[prob != 0])) + # make the transitions sparse + transitions = self.make_sparse(states, prev_states, probabilities) + # instantiate a TransitionModel + super(BarTransitionModel, self).__init__(*transitions) + + class MultiPatternTransitionModel(TransitionModel): """ Transition model for pattern tracking with a HMM. diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index 7abe8bf12..081ff58bd 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -70,15 +70,15 @@ def test_types(self): # self.assertIsInstance(bss.intervals, np.ndarray) self.assertIsInstance(bss.state_positions, np.ndarray) self.assertIsInstance(bss.state_intervals, np.ndarray) - self.assertIsInstance(bss.first_states, np.ndarray) - self.assertIsInstance(bss.last_states, np.ndarray) + self.assertIsInstance(bss.first_states, list) + self.assertIsInstance(bss.last_states, list) self.assertIsInstance(bss.beat_state_offsets, np.ndarray) # dtypes # self.assertTrue(bss.intervals.dtype == np.uint32) self.assertTrue(bss.state_positions.dtype == np.float) self.assertTrue(bss.state_intervals.dtype == np.uint32) - self.assertTrue(bss.first_states.dtype == np.uint32) - self.assertTrue(bss.last_states.dtype == np.uint32) + # self.assertTrue(bss.first_states.dtype == np.uint32) + # self.assertTrue(bss.last_states.dtype == np.uint32) print(bss.beat_state_offsets.dtype) self.assertTrue(bss.beat_state_offsets.dtype == np.int) @@ -97,8 +97,10 @@ def test_values(self): self.assertTrue(np.allclose(bss.state_intervals, [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4])) - self.assertTrue(np.allclose(bss.first_states, [0, 1, 3, 6])) - self.assertTrue(np.allclose(bss.last_states, [10, 12, 15, 19])) + self.assertTrue(np.allclose(bss.first_states, [[0, 1, 3, 6], + [10, 11, 13, 16]])) + self.assertTrue(np.allclose(bss.last_states, [[0, 2, 5, 9], + [10, 12, 15, 19]])) # self.assertTrue(bss.num_intervals == 4) @@ -152,8 +154,8 @@ def test_values(self): class TestBeatTransitionModelClass(unittest.TestCase): def setUp(self): - btss = BeatStateSpace(1, 4) - self.tm = BeatTransitionModel(btss, 100) + bss = BeatStateSpace(1, 4) + self.tm = BeatTransitionModel(bss, 100) def test_types(self): self.assertIsInstance(self.tm, BeatTransitionModel) @@ -185,6 +187,48 @@ def test_values(self): self.assertTrue(self.tm.num_transitions == 13) +class TestBarTransitionModelClass(unittest.TestCase): + + def setUp(self): + bss = BarStateSpace(2, 1, 4) + self.tm = BarTransitionModel(bss, 100) + + def test_types(self): + self.assertIsInstance(self.tm, BarTransitionModel) + self.assertIsInstance(self.tm, TransitionModel) + self.assertIsInstance(self.tm.state_space, BarStateSpace) + self.assertIsInstance(self.tm.transition_lambda, float) + self.assertIsInstance(self.tm.states, np.ndarray) + self.assertIsInstance(self.tm.pointers, np.ndarray) + self.assertIsInstance(self.tm.probabilities, np.ndarray) + self.assertIsInstance(self.tm.log_probabilities, np.ndarray) + self.assertIsInstance(self.tm.num_states, int) + self.assertIsInstance(self.tm.num_transitions, int) + self.assertTrue(self.tm.states.dtype == np.uint32) + self.assertTrue(self.tm.pointers.dtype == np.uint32) + self.assertTrue(self.tm.probabilities.dtype == np.float) + self.assertTrue(self.tm.log_probabilities.dtype == np.float) + + def test_values(self): + self.assertTrue(np.allclose(self.tm.states, + [10, 12, 15, 1, 15, 19, 3, 4, 15, 19, 6, 7, + 8, 0, 2, 5, 11, 5, 9, 13, 14, 5, 9, 16, + 17, 18])) + self.assertTrue(np.allclose(self.tm.pointers, + [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, + 16, 17, 19, 20, 21, 23, 24, 25, 26])) + self.assertTrue(np.allclose(self.tm.probabilities, + [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1])) + self.assertTrue(np.allclose(self.tm.log_probabilities, + [0, 0, -33.3333333, 0, 0, -25, + 0, 0, -33.3333333, 0, 0, 0, 0, + 0, 0, -33.3333333, 0, 0, -25, + 0, 0, -33.3333333, 0, 0, 0, 0])) + self.assertTrue(self.tm.num_states == 20) + self.assertTrue(self.tm.num_transitions == 26) + + class TestPatternTrackingTransitionModelClass(unittest.TestCase): def setUp(self): From a26c29b578af6fba55eac9525c0431448b4db95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 8 Jan 2016 16:46:17 +0100 Subject: [PATCH 10/18] simplify the exponential_transition function by using numpy instead of cython --- madmom/features/beats_hmm.pyx | 53 ++++++++++------------------------- 1 file changed, 15 insertions(+), 38 deletions(-) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index 3b3b201d9..da1ea9413 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -243,13 +243,10 @@ class MultiPatternStateSpace(object): # transition distributions -@cython.cdivision(True) -@cython.boundscheck(False) -@cython.wraparound(False) -def exponential_transition(double [::1] from_intervals, - double [::1] to_intervals, double transition_lambda, - double threshold=np.spacing(1)): +def exponential_transition(from_intervals, to_intervals, transition_lambda, + threshold=np.spacing(1), norm=True): """ + Exponential tempo transition. Parameters ---------- @@ -262,6 +259,8 @@ def exponential_transition(double [::1] from_intervals, prefer a constant tempo from one beat/bar to the next one). threshold : float, optional Set transition probabilities below this threshold to zero. + norm : bool, optional + Normalize the emission probabilities to sum 1. Returns ------- @@ -276,37 +275,15 @@ def exponential_transition(double [::1] from_intervals, Retrieval Conference (ISMIR), 2015. """ - # define variables - cdef unsigned int from_int, to_int - cdef double prob_sum, ratio, prob - cdef unsigned int num_to = len(to_intervals) - cdef unsigned int num_from = len(from_intervals) - # transition matrix for the tempo changes - cdef double [:, ::1] trans_prob = np.zeros((num_from, num_to), - dtype=np.float) - # iterate over all interval states - for from_int in range(num_from): - # reset probability sum - prob_sum = 0 - # compute transition probabilities to all other interval states - for to_int in range(num_to): - # compute the ratio of the two intervals - ratio = to_intervals[to_int] / from_intervals[from_int] - # compute the probability for the tempo change following an - # exponential distribution - prob = exp(-transition_lambda * abs(ratio - 1)) - # keep only transition probabilities > threshold - if prob > threshold: - # save the probability - trans_prob[from_int, to_int] = prob - # collect normalization data - prob_sum += prob - # normalize the interval transitions to other intervals - # TODO: make the normailsation optional!? - for to_int in range(num_to): - trans_prob[from_int, to_int] /= prob_sum - # return the transition probabilities - return np.asarray(trans_prob) + # compute the transition probabilities + ratio = to_intervals / from_intervals[:, np.newaxis] + prob = np.exp(-transition_lambda * abs(ratio - 1.)) + # set values below threshold to 0 + prob[prob <= threshold] = 0 + # normalize the emission probabilities + if norm: + prob /= np.sum(prob, axis=1)[:, np.newaxis] + return prob # transition models @@ -635,7 +612,7 @@ class GMMPatternTrackingObservationModel(ObservationModel): Parameters ---------- observations : numpy array - Observations (i.e. activations of the NN). + Observations (i.e. multiband spectral flux features). Returns ------- From 5ed5dbea2d28dc228ec96e493ca01bdd5d85d38c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Sat, 9 Jan 2016 13:29:20 +0100 Subject: [PATCH 11/18] cleanup and docstring clarifications --- madmom/features/beats_hmm.pyx | 78 ++++++++++++++++++-------------- tests/test_features_beats_hmm.py | 9 +--- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index da1ea9413..ceda6031d 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -76,16 +76,16 @@ class BeatStateSpace(object): # spacing of the intervals) use a log spacing and limit the number of # intervals to the given value if num_intervals is not None and num_intervals < len(intervals): - # we must approach intervals iteratively - num_log_tempi = num_intervals + # we must approach the number of intervals iteratively + num_log_intervals = num_intervals intervals = [] while len(intervals) < num_intervals: intervals = np.logspace(np.log2(min_interval), - np.log2(max_interval), - num_log_tempi, base=2) - # quantize to integer tempo states + np.log2(max_interval), + num_log_intervals, base=2) + # quantize to integer intervals intervals = np.unique(np.round(intervals)) - num_log_tempi += 1 + num_log_intervals += 1 # save the intervals self.intervals = np.ascontiguousarray(intervals, dtype=np.uint32) # number of states and intervals @@ -95,15 +95,15 @@ class BeatStateSpace(object): first_states = np.cumsum(np.r_[0, self.intervals[:-1]]) self.first_states = first_states.astype(np.uint32) self.last_states = np.cumsum(self.intervals).astype(np.uint32) - 1 - # define the position and interval states + # define the positions and intervals of the states self.state_positions = np.empty(self.num_states) self.state_intervals = np.empty(self.num_states, dtype=np.uint32) + # Note: having an idx counter is faster than ndenumerate idx = 0 for i in self.intervals: self.state_positions[idx: idx + i] = np.linspace(0, 1, i, endpoint=False) self.state_intervals[idx: idx + i] = i - # increase counter idx += i @@ -125,7 +125,7 @@ class BarStateSpace(object): Attributes ---------- - num_beats : int. + num_beats : int Number of beats. num_states : int Number of states. @@ -135,8 +135,6 @@ class BarStateSpace(object): Positions of the states. state_intervals : numpy array Intervals of the states. - beat_state_offsets : numpy array - State offsets of the beats. first_states : list First interval states for each beat. last_states : list @@ -155,29 +153,25 @@ class BarStateSpace(object): num_intervals=None): # model N beats as a bar self.num_beats = int(num_beats) - self.state_positions = np.empty(0, dtype=np.uint32) + self.state_positions = np.empty(0) self.state_intervals = np.empty(0, dtype=np.uint32) - self.beat_state_offsets = np.empty(0, dtype=np.int) self.num_states = 0 # save the first and last states of the individual beats in a list self.first_states = [] self.last_states = [] - # create a beat state space + # create a BeatStateSpace and stack it `num_beats` times bss = BeatStateSpace(min_interval, max_interval, num_intervals) - offset = 0 for n in range(self.num_beats): # define position and interval states self.state_positions = np.hstack((self.state_positions, bss.state_positions + n)) self.state_intervals = np.hstack((self.state_intervals, bss.state_intervals)) + # add the current number of states as offset + self.first_states.append(bss.first_states + self.num_states) + self.last_states.append(bss.last_states + self.num_states) + # finally increase the number of states self.num_states += bss.num_states - self.first_states.append(bss.first_states + offset) - self.last_states.append(bss.last_states + offset) - # save the offsets and increase afterwards - self.beat_state_offsets = np.hstack((self.beat_state_offsets, - offset)) - offset += bss.num_states class MultiPatternStateSpace(object): @@ -291,6 +285,10 @@ class BeatTransitionModel(TransitionModel): """ Transition model for beat tracking with a HMM. + Within the beat, the tempo stays the same; at beat boundaries transitions + from one tempo (i.e. interval) to another following an exponential + distribution are allowed. + Parameters ---------- state_space : :class:`BeatStateSpace` instance @@ -312,23 +310,26 @@ class BeatTransitionModel(TransitionModel): # save attributes self.state_space = state_space self.transition_lambda = float(transition_lambda) - # intra state space connections (i.e. same tempi) + # same tempo transitions probabilities within the state space is 1 + # Note: use all states, but remove all first states because there are + # no same tempo transitions into them states = np.arange(state_space.num_states, dtype=np.uint32) - # remove the transitions into the first states states = np.setdiff1d(states, state_space.first_states) prev_states = states - 1 probabilities = np.ones_like(states, dtype=np.float) - # self connection of the state space (i.e. tempo changes) + # tempo transitions occur at the boundary between beats + # Note: connect the beat state space with itself, the transitions from + # the last states to the first states follow an exponential tempo + # transition (with the tempi given as intervals) to_states = state_space.first_states from_states = state_space.last_states - # generate an exponential tempo transition from_int = state_space.state_intervals[from_states].astype(np.float) to_int = state_space.state_intervals[to_states].astype(np.float) prob = exponential_transition(from_int, to_int, self.transition_lambda) # use only the states with transitions to/from != 0 - prev_states = np.hstack((prev_states, - from_states[np.nonzero(prob)[0]])) - states = np.hstack((states, to_states[np.nonzero(prob)[1]])) + from_prob, to_prob = np.nonzero(prob) + states = np.hstack((states, to_states[to_prob])) + prev_states = np.hstack((prev_states,from_states[from_prob])) probabilities = np.hstack((probabilities, prob[prob != 0])) # make the transitions sparse transitions = self.make_sparse(states, prev_states, probabilities) @@ -340,6 +341,10 @@ class BarTransitionModel(TransitionModel): """ Transition model for bar tracking with a HMM. + Within the beats of the bar, the tempo stays the same; at beat boundaries + transitions from one tempo (i.e. interval) to another following an + exponential distribution are allowed. + Parameters ---------- state_space : :class:`BarStateSpace` instance @@ -361,13 +366,18 @@ class BarTransitionModel(TransitionModel): # save attributes self.state_space = state_space self.transition_lambda = float(transition_lambda) - # intra state space connections (i.e. same tempi within the beats) + # TODO: this could be unified with the BeatStateSpace + # same tempo transitions probabilities within the state space is 1 + # Note: use all states, but remove all first states of the individual + # beats, because there are no same tempo transitions into them states = np.arange(state_space.num_states, dtype=np.uint32) - # remove the transitions into the first states of the individual beats states = np.setdiff1d(states, state_space.first_states) prev_states = states - 1 probabilities = np.ones_like(states, dtype=np.float) - # tempo transition at the beat boundaries + # tempo transitions occur at the boundary between beats + # Note: connect the first states of each beat state space to the last + # states of the previous beat with an exponential tempo + # transition (with the tempi given as intervals) for beat in range(state_space.num_beats): # connect to the first states of the actual beat to_states = state_space.first_states[beat] @@ -380,9 +390,9 @@ class BarTransitionModel(TransitionModel): to_int.astype(np.float), self.transition_lambda) # use only the states with transitions to/from != 0 - prev_states = np.hstack((prev_states, - from_states[np.nonzero(prob)[0]])) - states = np.hstack((states, to_states[np.nonzero(prob)[1]])) + from_prob, to_prob = np.nonzero(prob) + states = np.hstack((states, to_states[to_prob])) + prev_states = np.hstack((prev_states,from_states[from_prob])) probabilities = np.hstack((probabilities, prob[prob != 0])) # make the transitions sparse transitions = self.make_sparse(states, prev_states, probabilities) diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index 081ff58bd..ab8ef250f 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -72,23 +72,16 @@ def test_types(self): self.assertIsInstance(bss.state_intervals, np.ndarray) self.assertIsInstance(bss.first_states, list) self.assertIsInstance(bss.last_states, list) - self.assertIsInstance(bss.beat_state_offsets, np.ndarray) # dtypes # self.assertTrue(bss.intervals.dtype == np.uint32) self.assertTrue(bss.state_positions.dtype == np.float) self.assertTrue(bss.state_intervals.dtype == np.uint32) - # self.assertTrue(bss.first_states.dtype == np.uint32) - # self.assertTrue(bss.last_states.dtype == np.uint32) - print(bss.beat_state_offsets.dtype) - self.assertTrue(bss.beat_state_offsets.dtype == np.int) def test_values(self): bss = BarStateSpace(2, 1, 4) self.assertTrue(bss.num_beats == 2) self.assertTrue(bss.num_states == 20) # self.assertTrue(np.allclose(bss.intervals, [1, 2, 3, 4])) - # self.assertTrue(np.allclose(bss.beat[:10], 0)) - # self.assertTrue(np.allclose(bss.beat[10:], 1)) self.assertTrue(np.allclose(bss.state_positions, [0, 0, 0.5, 0, 1. / 3, 2. / 3, 0, 0.25, 0.5, 0.75, @@ -112,7 +105,7 @@ def test_types(self): self.assertIsInstance(mpss.state_positions, np.ndarray) self.assertIsInstance(mpss.state_intervals, np.ndarray) self.assertIsInstance(mpss.num_states, int) - # self.assertIsInstance(mpss.num_intervals, list) + # self.assertIsInstance(mpss.num_intervals, int) self.assertIsInstance(mpss.num_patterns, int) # dtypes # self.assertTrue(mpss.intervals.dtype == np.uint32) From d4e69605b83c1f7153b6aa77b515e9ef2a62450d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Mon, 11 Jan 2016 12:58:22 +0100 Subject: [PATCH 12/18] refactored MultiPattern stuff; PatternTrackingProcessor uses a BarStateSpace and BarTransitionModel to build a MultiPatternStateSpace and MultiPatternTransitionModel. BarTransitionModel support transitions at beat and/or bar boundaries now. --- madmom/features/beats.py | 65 ++++--- madmom/features/beats_hmm.pyx | 193 ++++++++----------- tests/test_features_beats_hmm.py | 310 +++++++++++++++++++++---------- 3 files changed, 332 insertions(+), 236 deletions(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index fe9a1ddcc..1c0bb6005 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -895,6 +895,9 @@ class PatternTrackingProcessor(Processor): MIN_BPM = [55, 60] MAX_BPM = [205, 225] NUM_TEMPI = [None, None] + # TODO: make this parametric + # Note: if lambda is given as a list, the individual values represent the + # lambdas for each transition into the beat at this index position TRANSITION_LAMBDA = [100, 100] NORM_OBSERVATIONS = False @@ -906,11 +909,11 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, # pylint: disable=no-name-in-module import pickle - from madmom.ml.hmm import HiddenMarkovModel as Hmm - from .beats_hmm import (MultiPatternStateSpace as St, - MultiPatternTransitionModel as Tm, - GMMPatternTrackingObservationModel as Om) + from .beats_hmm import (BarStateSpace, BarTransitionModel, + MultiPatternStateSpace, + MultiPatternTransitionModel, + GMMPatternTrackingObservationModel) # expand num_tempi and transition_lambda to lists if needed if not isinstance(num_tempi, list): @@ -923,7 +926,6 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, raise ValueError('`min_bpm`, `max_bpm`, `num_tempi` and ' '`transition_lambda` must have the same length ' 'as number of patterns.') - # load the patterns patterns = [] for pattern_file in pattern_files: @@ -944,17 +946,24 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, # save additional variables self.downbeats = downbeats self.fps = fps - # convert timing information to construct state space - # Note: since we model a complete bar, we must multiply the intervals - # by the number of beats in that pattern - min_interval = 60. * self.fps / np.asarray(max_bpm) * self.num_beats - max_interval = 60. * self.fps / np.asarray(min_bpm) * self.num_beats - # state space - self.st = St(min_interval, max_interval, num_tempi) - # transition model - self.tm = Tm(self.st, transition_lambda) + # convert timing information to construct a state space + min_interval = 60. * self.fps / np.asarray(max_bpm) + max_interval = 60. * self.fps / np.asarray(min_bpm) + # construct a multi pattern state space and transition model + state_spaces = [] + transition_models = [] + for p in range(len(patterns)): + # model each rhythmic pattern as a bar + st = BarStateSpace(self.num_beats[p], min_interval[p], + max_interval[p], num_tempi[p]) + tm = BarTransitionModel(st, transition_lambda[p]) + state_spaces.append(st) + transition_models.append(tm) + self.st = MultiPatternStateSpace(state_spaces) + self.tm = MultiPatternTransitionModel(transition_models) # observation model - self.om = Om(gmms, self.st, norm_observations) + self.om = GMMPatternTrackingObservationModel(gmms, self.st, + norm_observations) # instantiate a HMM self.hmm = Hmm(self.tm, self.om, None) @@ -975,27 +984,23 @@ def process(self, activations): """ # get the best state path by calling the viterbi algorithm path, _ = self.hmm.viterbi(activations) - # get the corresponding pattern (use only the first state, since it - # doesn't change throughout the sequence) - pattern = self.st.state_patterns[path[0]] - # the position inside the pattern (0..1) - position = self.st.state_positions[path] - # beat position (= weighted by number of beats in bar) - beat_counter = (position * self.num_beats[pattern]).astype(int) - # transitions are the points where the beat counters change + # the positions inside the pattern (0..num_beats) + positions = self.st.state_positions[path] + # corresponding beats (add 1 for natural counting) + beat_numbers = positions.astype(int) + 1 + # transitions are the points where the beat numbers change # FIXME: we might miss the first or last beat! # we could calculate the interval towards the beginning/end to # decide whether to include these points - beat_positions = np.nonzero(np.diff(beat_counter))[0] + 1 - # the beat numbers are the counters + 1 at the transition points - beat_numbers = beat_counter[beat_positions] + 1 - # convert the detected beats to a list of timestamps - beats = np.asarray(beat_positions) / float(self.fps) + beat_positions = np.nonzero(np.diff(beat_numbers))[0] + 1 + # stack the beat positions (converted to seconds) and beat numbers + beats = np.vstack((beat_positions / float(self.fps), + beat_numbers[beat_positions])).T # return the downbeats or beats and their beat number if self.downbeats: - return beats[beat_numbers == 1] + return beats[beats[:, 1] == 1][0, :] else: - return np.vstack(zip(beats, beat_numbers)) + return beats @staticmethod def add_arguments(parser, pattern_files=None, min_bpm=MIN_BPM, diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index ceda6031d..6cfa64105 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -98,7 +98,7 @@ class BeatStateSpace(object): # define the positions and intervals of the states self.state_positions = np.empty(self.num_states) self.state_intervals = np.empty(self.num_states, dtype=np.uint32) - # Note: having an idx counter is faster than ndenumerate + # Note: having an index counter is faster than ndenumerate idx = 0 for i in self.intervals: self.state_positions[idx: idx + i] = np.linspace(0, 1, i, @@ -178,23 +178,10 @@ class MultiPatternStateSpace(object): """ State space for rhythmic pattern tracking with a HMM. - A rhythmic pattern is modeled similar to :class:`BeatStateSpace`, - but models multiple rhythmic patterns instead of a single beat. The - pattern's length can span multiple beats (e.g. 3 or 4 beats). - Parameters ---------- - min_intervals : list or numpy array - Minimum intervals (i.e. rhythmic pattern length) to model. - max_intervals : list or numpy array - Maximum intervals (i.e. rhythmic pattern length) to model. - num_intervals : list or numpy array, optional - Corresponding number of intervals; if set, limit the number of - intervals and use a log spacing instead of the default linear spacing. - - See Also - -------- - :class:`BeatStateSpace` + state_spaces : list + List with state spaces to model. References ---------- @@ -205,35 +192,31 @@ class MultiPatternStateSpace(object): """ - def __init__(self, min_intervals, max_intervals, num_intervals=None): - if num_intervals is None: - num_intervals = [None] * len(min_intervals) - # for each pattern, compute a bar state space (i.e. a beat state space - # which spans a complete bar) - state_spaces = [] - enum = enumerate(zip(min_intervals, max_intervals, num_intervals)) - for pattern, (min_, max_, num_) in enum: - # create a BeatStateSpace and append it to the list - state_spaces.append(BeatStateSpace(min_, max_, num_)) + def __init__(self, state_spaces): self.state_spaces = state_spaces - # define the position, interval and pattern states - self.state_positions = \ - np.hstack([st.state_positions for st in self.state_spaces]) - self.state_intervals = \ - np.hstack([st.state_intervals for st in self.state_spaces]) - self.state_patterns = \ - np.hstack([np.repeat(i, st.num_states) - for i, st in enumerate(self.state_spaces)]) - - @property - def num_states(self): - """Number of states.""" - return int(sum([st.num_states for st in self.state_spaces])) - - @property - def num_patterns(self): - """Number of rhythmic patterns""" - return len(self.state_spaces) + # model the patterns as a whole + self.num_patterns = len(self.state_spaces) + self.state_positions = np.empty(0) + self.state_intervals = np.empty(0, dtype=np.uint32) + self.state_patterns = np.empty(0, dtype=np.uint32) + self.num_states = 0 + # save the first and last states of the individual patterns in a list + # self.first_states = [] + # self.last_states = [] + for p in range(self.num_patterns): + pattern = self.state_spaces[p] + # define position, interval and pattern states + self.state_positions = np.hstack((self.state_positions, + pattern.state_positions)) + self.state_intervals = np.hstack((self.state_intervals, + pattern.state_intervals)) + self.state_patterns = np.hstack((self.state_patterns, + np.repeat(p, pattern.num_states))) + # TODO: first and last states should both be lists to work easily + # self.first_states.append() + # self.last_states.append() + # finally increase the number of states + self.num_states += pattern.num_states # transition distributions @@ -250,7 +233,8 @@ def exponential_transition(from_intervals, to_intervals, transition_lambda, Intervals where the transitions destinate to. transition_lambda : float Lambda for the exponential tempo change distribution (higher values - prefer a constant tempo from one beat/bar to the next one). + prefer a constant tempo from one beat/bar to the next one). If None, + allow only transitions from/to the same interval. threshold : float, optional Set transition probabilities below this threshold to zero. norm : bool, optional @@ -269,6 +253,11 @@ def exponential_transition(from_intervals, to_intervals, transition_lambda, Retrieval Conference (ISMIR), 2015. """ + # no transition lambda + if transition_lambda is None: + # return a diagonal matrix + return np.diag(np.diag(np.ones((len(from_intervals), + len(to_intervals))))) # compute the transition probabilities ratio = to_intervals / from_intervals[:, np.newaxis] prob = np.exp(-transition_lambda * abs(ratio - 1.)) @@ -285,7 +274,7 @@ class BeatTransitionModel(TransitionModel): """ Transition model for beat tracking with a HMM. - Within the beat, the tempo stays the same; at beat boundaries transitions + Within the beat the tempo stays the same; at beat boundaries transitions from one tempo (i.e. interval) to another following an exponential distribution are allowed. @@ -341,7 +330,7 @@ class BarTransitionModel(TransitionModel): """ Transition model for bar tracking with a HMM. - Within the beats of the bar, the tempo stays the same; at beat boundaries + Within the beats of the bar the tempo stays the same; at beat boundaries transitions from one tempo (i.e. interval) to another following an exponential distribution are allowed. @@ -349,9 +338,18 @@ class BarTransitionModel(TransitionModel): ---------- state_space : :class:`BarStateSpace` instance BarStateSpace instance. - transition_lambda : float + transition_lambda : float or list Lambda for the exponential tempo change distribution (higher values prefer a constant tempo from one beat to the next one). + None can be used to set the tempo change probability to 0. + If a list is given, the individual values represent the lambdas for + each transition into the beat at this index position. + + Notes + ----- + Bars performing tempo changes only at bar boundaries (and not at the beat + boundaries) must have set all but the first `transition_lambda` values to + None, e.g. [100, None, None] for a bar with 3 beats. References ---------- @@ -363,10 +361,16 @@ class BarTransitionModel(TransitionModel): """ def __init__(self, state_space, transition_lambda): + # expand transition_lambda to a list if a single value is given + if not isinstance(transition_lambda, list): + transition_lambda = [transition_lambda] * state_space.num_beats + if state_space.num_beats != len(transition_lambda): + raise ValueError('length of `transition_lambda` must be equal to ' + '`num_beats` of `state_space`.') # save attributes self.state_space = state_space - self.transition_lambda = float(transition_lambda) - # TODO: this could be unified with the BeatStateSpace + self.transition_lambda = transition_lambda + # TODO: this could be unified with the BeatTransitionModel # same tempo transitions probabilities within the state space is 1 # Note: use all states, but remove all first states of the individual # beats, because there are no same tempo transitions into them @@ -374,21 +378,19 @@ class BarTransitionModel(TransitionModel): states = np.setdiff1d(states, state_space.first_states) prev_states = states - 1 probabilities = np.ones_like(states, dtype=np.float) - # tempo transitions occur at the boundary between beats - # Note: connect the first states of each beat state space to the last - # states of the previous beat with an exponential tempo - # transition (with the tempi given as intervals) + # tempo transitions occur at the boundary between beats (unless the + # corresponding transition_lambda is set to None) for beat in range(state_space.num_beats): # connect to the first states of the actual beat to_states = state_space.first_states[beat] # connect from the last states of the previous beat from_states = state_space.last_states[beat - 1] - # generate an exponential tempo transition + # transition follow an exponential tempo distribution from_int = state_space.state_intervals[from_states] to_int = state_space.state_intervals[to_states] prob = exponential_transition(from_int.astype(np.float), to_int.astype(np.float), - self.transition_lambda) + transition_lambda[beat]) # use only the states with transitions to/from != 0 from_prob, to_prob = np.nonzero(prob) states = np.hstack((states, to_states[to_prob])) @@ -404,69 +406,36 @@ class MultiPatternTransitionModel(TransitionModel): """ Transition model for pattern tracking with a HMM. - Instead of modelling only a single beat (as :class:`BeatTransitionModel`), - the :class:`MultiPatternTransitionModel` models rhythmic patterns. It - accepts the same arguments as the :class:`BeatTransitionModel`, but - everything as lists, with the list entries at the same position - corresponding to one rhythmic pattern. - Parameters ---------- - state_space : :class:`MultiPatternTransitionModel` instance - MultiPatternTransitionModel instance. - transition_lambda : list - Lambda(s) for the exponential tempo change distribution of the patterns - (higher values prefer a constant tempo from one bar to the next one). - If a single value is given, the same value is assumed for all patterns. - - See Also - -------- - :class:`BeatTransitionModel` + transition_models : list + List with :class:`TransitionModel` instances. + transition_prob : numpy array, optional + Matrix with transition probabilities from one pattern to another. + transition_lambda : float, optional + Lambda for the exponential tempo change distribution (higher values + prefer a constant tempo from one pattern to the next one). Notes ----- - This transition model differs from the one described in [1]_ in the - following way: - - - it allows transitions only at pattern boundaries instead of beat - boundaries, - - it uses the new state space discretisation and tempo change distribution - proposed in [2]_. - - References - ---------- - .. [1] Florian Krebs, Sebastian Böck and Gerhard Widmer, - "Rhythmic Pattern Modeling for Beat and Downbeat Tracking in Musical - Audio", - Proceedings of the 14th International Society for Music Information - Retrieval Conference (ISMIR), 2013. - .. [2] Florian Krebs, Sebastian Böck and Gerhard Widmer, - "An Efficient State Space Model for Joint Tempo and Meter Tracking", - Proceedings of the 16th International Society for Music Information - Retrieval Conference (ISMIR), 2015. + Right now, no transitions from one pattern to another are allowed. """ - def __init__(self, state_space, transition_lambda): - # expand the transition lambda to a list if needed, i.e. use the same - # value for all patterns - if not isinstance(transition_lambda, list): - transition_lambda = [transition_lambda] * state_space.num_patterns - # check if all lists have the same length - if not state_space.num_patterns == len(transition_lambda): - raise ValueError('number of patterns of the `state_space` and the ' - 'length `transition_lambda` must be the same') - # save the given arguments - self.state_space = state_space + def __init__(self, transition_models, transition_prob=None, + transition_lambda=None): + # TODO: implement pattern transitions + if transition_prob is not None or transition_lambda is not None: + raise NotImplementedError("please implement pattern transitions") + # save attributes + self.transition_models = transition_models + self.transition_prob = transition_prob self.transition_lambda = transition_lambda - # compute the transitions for each pattern and stack them - enum = enumerate(zip(state_space.state_spaces, transition_lambda)) - for pattern, (state_space, transition_lambda) in enum: - # create a BeatTransitionModel - tm = BeatTransitionModel(state_space, transition_lambda) - seq = np.arange(tm.num_states, dtype=np.int) + # stack the pattern transitions + for p in range(len(self.transition_models)): + tm = self.transition_models[p] # set/update the probabilities, states and pointers - if pattern == 0: + if p == 0: # for the first pattern, just set the TM arrays states = tm.states pointers = tm.pointers @@ -484,9 +453,9 @@ class MultiPatternTransitionModel(TransitionModel): max(pointers))) # probabilities: just stack them probabilities = np.hstack((probabilities, tm.probabilities)) - # instantiate a TransitionModel with the transition arrays - transitions = states, pointers, probabilities - super(MultiPatternTransitionModel, self).__init__(*transitions) + # instantiate a TransitionModel + super(MultiPatternTransitionModel, self).__init__(states, pointers, + probabilities) # observation models diff --git a/tests/test_features_beats_hmm.py b/tests/test_features_beats_hmm.py index ab8ef250f..3f877c6ed 100644 --- a/tests/test_features_beats_hmm.py +++ b/tests/test_features_beats_hmm.py @@ -78,6 +78,7 @@ def test_types(self): self.assertTrue(bss.state_intervals.dtype == np.uint32) def test_values(self): + # 2 beats, intervals 1 to 4 bss = BarStateSpace(2, 1, 4) self.assertTrue(bss.num_beats == 2) self.assertTrue(bss.num_states == 20) @@ -94,13 +95,32 @@ def test_values(self): [10, 11, 13, 16]])) self.assertTrue(np.allclose(bss.last_states, [[0, 2, 5, 9], [10, 12, 15, 19]])) - # self.assertTrue(bss.num_intervals == 4) + # other values: 1 beat, intervals 2 to 6 + bss = BarStateSpace(1, 2, 6) + self.assertTrue(bss.num_beats == 1) + self.assertTrue(bss.num_states == 20) + # self.assertTrue(np.allclose(bss.intervals, [2, 3, 4, 5, 6])) + self.assertTrue(np.allclose(bss.state_positions, + [0, 0.5, + 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75, + 0, 0.2, 0.4, 0.6, 0.8, + 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) + self.assertTrue(np.allclose(bss.state_intervals, + [2, 2, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6])) + self.assertTrue(np.allclose(bss.first_states, [[0, 2, 5, 9, 14]])) + self.assertTrue(np.allclose(bss.last_states, [[1, 4, 8, 13, 19]])) class TestMultiPatternStateSpaceClass(unittest.TestCase): def test_types(self): - mpss = MultiPatternStateSpace([1, 2], [4, 6]) + # test with 2 BeatStateSpaces as before + # mpss = MultiPatternStateSpace([1, 2], [4, 6]) + bss1 = BeatStateSpace(1, 4) + bss2 = BeatStateSpace(2, 6) + mpss = MultiPatternStateSpace([bss1, bss2]) self.assertIsInstance(mpss.state_spaces, list) self.assertIsInstance(mpss.state_positions, np.ndarray) self.assertIsInstance(mpss.state_intervals, np.ndarray) @@ -114,8 +134,12 @@ def test_types(self): # self.assertTrue(mpss.first_states.dtype == np.uint32) # self.assertTrue(mpss.last_states.dtype == np.uint32) - def test_values(self): - mpss = MultiPatternStateSpace([1, 2], [4, 6]) + def test_values_beat(self): + # test with 2 BeatStateSpaces as before + # mpss = MultiPatternStateSpace([1, 2], [4, 6]) + bss1 = BeatStateSpace(1, 4) + bss2 = BeatStateSpace(2, 6) + mpss = MultiPatternStateSpace([bss1, bss2]) self.assertTrue(np.allclose(mpss.state_spaces[0].intervals, [1, 2, 3, 4])) self.assertTrue(np.allclose(mpss.state_spaces[1].intervals, @@ -142,138 +166,236 @@ def test_values(self): 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6])) self.assertTrue(np.allclose(mpss.state_patterns[10:], 1)) + def test_values_bar(self): + # test with 2 BarStateSpaces + bss1 = BarStateSpace(2, 1, 4) + bss2 = BarStateSpace(1, 2, 6) + mpss = MultiPatternStateSpace([bss1, bss2]) + # self.assertTrue(np.allclose(mpss.state_spaces[0].intervals, + # [1, 2, 3, 4])) + # self.assertTrue(np.allclose(mpss.state_spaces[1].intervals, + # [2, 3, 4, 5, 6])) + self.assertTrue(mpss.num_states == 40) + # self.assertTrue(mpss.num_intervals == [4, 5]) + self.assertTrue(mpss.num_patterns == 2) + # first pattern + self.assertTrue(np.allclose(mpss.state_positions[:20], + [0, 0, 0.5, 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75, + 1, 1, 1.5, 1, 4. / 3, 5. / 3, + 1, 1.25, 1.5, 1.75])) + self.assertTrue(np.allclose(mpss.state_intervals[:20], + [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, + 1, 2, 2, 3, 3, 3, 4, 4, 4, 4])) + self.assertTrue(np.allclose(mpss.state_patterns[:20], 0)) + # self.assertTrue(np.allclose(mpss.first_states[0], + # [[0, 1, 3, 6], [10, 11, 13, 16]])) + # self.assertTrue(np.allclose(mpss.last_states[0], + # [[0, 2, 5, 9], [10, 12, 15, 19]])) + # second pattern + self.assertTrue(np.allclose(mpss.state_positions[20:], + [0, 0.5, + 0, 1. / 3, 2. / 3, + 0, 0.25, 0.5, 0.75, + 0, 0.2, 0.4, 0.6, 0.8, + 0, 1. / 6, 2. / 6, 0.5, 4. / 6, 5. / 6])) + self.assertTrue(np.allclose(mpss.state_intervals[20:], + [2, 2, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6])) + self.assertTrue(np.allclose(mpss.state_patterns[20:], 1)) + # self.assertTrue(np.allclose(mpss.first_states[1], + # [[0, 2, 5, 9, 14]])) + # self.assertTrue(np.allclose(mpss.last_states[1], + # [[1, 4, 8, 13, 19]])) + # transition models class TestBeatTransitionModelClass(unittest.TestCase): - def setUp(self): - bss = BeatStateSpace(1, 4) - self.tm = BeatTransitionModel(bss, 100) - def test_types(self): - self.assertIsInstance(self.tm, BeatTransitionModel) - self.assertIsInstance(self.tm, TransitionModel) - self.assertIsInstance(self.tm.state_space, BeatStateSpace) - self.assertIsInstance(self.tm.transition_lambda, float) - self.assertIsInstance(self.tm.states, np.ndarray) - self.assertIsInstance(self.tm.pointers, np.ndarray) - self.assertIsInstance(self.tm.probabilities, np.ndarray) - self.assertIsInstance(self.tm.log_probabilities, np.ndarray) - self.assertIsInstance(self.tm.num_states, int) - self.assertIsInstance(self.tm.num_transitions, int) - self.assertTrue(self.tm.states.dtype == np.uint32) - self.assertTrue(self.tm.pointers.dtype == np.uint32) - self.assertTrue(self.tm.probabilities.dtype == np.float) - self.assertTrue(self.tm.log_probabilities.dtype == np.float) + bss = BeatStateSpace(1, 4) + tm = BeatTransitionModel(bss, 100) + self.assertIsInstance(tm, BeatTransitionModel) + self.assertIsInstance(tm, TransitionModel) + self.assertIsInstance(tm.state_space, BeatStateSpace) + self.assertIsInstance(tm.transition_lambda, float) + self.assertIsInstance(tm.states, np.ndarray) + self.assertIsInstance(tm.pointers, np.ndarray) + self.assertIsInstance(tm.probabilities, np.ndarray) + self.assertIsInstance(tm.log_probabilities, np.ndarray) + self.assertIsInstance(tm.num_states, int) + self.assertIsInstance(tm.num_transitions, int) + self.assertTrue(tm.states.dtype == np.uint32) + self.assertTrue(tm.pointers.dtype == np.uint32) + self.assertTrue(tm.probabilities.dtype == np.float) + self.assertTrue(tm.log_probabilities.dtype == np.float) def test_values(self): - self.assertTrue(np.allclose(self.tm.states, + bss = BeatStateSpace(1, 4) + tm = BeatTransitionModel(bss, 100) + self.assertTrue(np.allclose(tm.states, [0, 2, 5, 1, 5, 9, 3, 4, 5, 9, 6, 7, 8])) - self.assertTrue(np.allclose(self.tm.pointers, + self.assertTrue(np.allclose(tm.pointers, [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13])) - self.assertTrue(np.allclose(self.tm.probabilities, + self.assertTrue(np.allclose(tm.probabilities, [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1])) - self.assertTrue(np.allclose(self.tm.log_probabilities, - [0, 0, -33.3333333, 0, 0, -25, 0, 0, - -33.3333333, 0, 0, 0, 0])) - self.assertTrue(self.tm.num_states == 10) - self.assertTrue(self.tm.num_transitions == 13) + self.assertTrue(np.allclose(tm.log_probabilities, + [0, 0, -33.33333, 0, 0, -25, + 0, 0, -33.33333, 0, 0, 0, 0])) + self.assertTrue(tm.num_states == 10) + self.assertTrue(tm.num_transitions == 13) class TestBarTransitionModelClass(unittest.TestCase): - def setUp(self): - bss = BarStateSpace(2, 1, 4) - self.tm = BarTransitionModel(bss, 100) - def test_types(self): - self.assertIsInstance(self.tm, BarTransitionModel) - self.assertIsInstance(self.tm, TransitionModel) - self.assertIsInstance(self.tm.state_space, BarStateSpace) - self.assertIsInstance(self.tm.transition_lambda, float) - self.assertIsInstance(self.tm.states, np.ndarray) - self.assertIsInstance(self.tm.pointers, np.ndarray) - self.assertIsInstance(self.tm.probabilities, np.ndarray) - self.assertIsInstance(self.tm.log_probabilities, np.ndarray) - self.assertIsInstance(self.tm.num_states, int) - self.assertIsInstance(self.tm.num_transitions, int) - self.assertTrue(self.tm.states.dtype == np.uint32) - self.assertTrue(self.tm.pointers.dtype == np.uint32) - self.assertTrue(self.tm.probabilities.dtype == np.float) - self.assertTrue(self.tm.log_probabilities.dtype == np.float) + bss = BarStateSpace(2, 1, 4) + tm = BarTransitionModel(bss, 100) + self.assertIsInstance(tm, BarTransitionModel) + self.assertIsInstance(tm, TransitionModel) + self.assertIsInstance(tm.state_space, BarStateSpace) + self.assertIsInstance(tm.transition_lambda, list) + self.assertIsInstance(tm.states, np.ndarray) + self.assertIsInstance(tm.pointers, np.ndarray) + self.assertIsInstance(tm.probabilities, np.ndarray) + self.assertIsInstance(tm.log_probabilities, np.ndarray) + self.assertIsInstance(tm.num_states, int) + self.assertIsInstance(tm.num_transitions, int) + self.assertTrue(tm.states.dtype == np.uint32) + self.assertTrue(tm.pointers.dtype == np.uint32) + self.assertTrue(tm.probabilities.dtype == np.float) + self.assertTrue(tm.log_probabilities.dtype == np.float) def test_values(self): - self.assertTrue(np.allclose(self.tm.states, + bss = BarStateSpace(2, 1, 4) + tm = BarTransitionModel(bss, 100) + self.assertTrue(np.allclose(tm.states, [10, 12, 15, 1, 15, 19, 3, 4, 15, 19, 6, 7, 8, 0, 2, 5, 11, 5, 9, 13, 14, 5, 9, 16, 17, 18])) - self.assertTrue(np.allclose(self.tm.pointers, + self.assertTrue(np.allclose(tm.pointers, [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 21, 23, 24, 25, 26])) - self.assertTrue(np.allclose(self.tm.probabilities, + self.assertTrue(np.allclose(tm.probabilities, [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1])) - self.assertTrue(np.allclose(self.tm.log_probabilities, - [0, 0, -33.3333333, 0, 0, -25, - 0, 0, -33.3333333, 0, 0, 0, 0, - 0, 0, -33.3333333, 0, 0, -25, - 0, 0, -33.3333333, 0, 0, 0, 0])) - self.assertTrue(self.tm.num_states == 20) - self.assertTrue(self.tm.num_transitions == 26) - + self.assertTrue(np.allclose(tm.log_probabilities, + [0, 0, -33.33333, 0, 0, -25, + 0, 0, -33.33333, 0, 0, 0, 0, + 0, 0, -33.33333, 0, 0, -25, + 0, 0, -33.33333, 0, 0, 0, 0])) + self.assertTrue(tm.num_states == 20) + self.assertTrue(tm.num_transitions == 26) -class TestPatternTrackingTransitionModelClass(unittest.TestCase): - def setUp(self): - ptss = MultiPatternStateSpace([1, 2], [4, 6]) - self.tm = MultiPatternTransitionModel(ptss, 100) +class TestMultiPatternTransitionModelClass(unittest.TestCase): def test_types(self): - self.assertIsInstance(self.tm, MultiPatternTransitionModel) - self.assertIsInstance(self.tm, TransitionModel) - # self.assertIsInstance(self.tm.state_space, PatternTrackingStateSpace) - self.assertIsInstance(self.tm.transition_lambda, list) - self.assertIsInstance(self.tm.states, np.ndarray) - self.assertIsInstance(self.tm.pointers, np.ndarray) - self.assertIsInstance(self.tm.probabilities, np.ndarray) - self.assertIsInstance(self.tm.log_probabilities, np.ndarray) - self.assertIsInstance(self.tm.num_states, int) - self.assertIsInstance(self.tm.num_transitions, int) - self.assertTrue(self.tm.states.dtype == np.uint32) - self.assertTrue(self.tm.pointers.dtype == np.uint32) - self.assertTrue(self.tm.probabilities.dtype == np.float) - self.assertTrue(self.tm.log_probabilities.dtype == np.float) - - def test_values(self): - print(self.tm.probabilities) - print(self.tm.log_probabilities) + bss1 = BeatStateSpace(1, 4) + bss2 = BeatStateSpace(2, 6) + btm1 = BeatTransitionModel(bss1, 100) + btm2 = BeatTransitionModel(bss2, 100) + tm = MultiPatternTransitionModel([btm1, btm2]) + self.assertIsInstance(tm, MultiPatternTransitionModel) + self.assertIsInstance(tm, TransitionModel) + self.assertIsInstance(tm.transition_models, list) + # right now, transition_lambda and transition_prob are None + self.assertIsNone(tm.transition_lambda) + self.assertIsNone(tm.transition_prob) + self.assertIsInstance(tm.states, np.ndarray) + self.assertIsInstance(tm.pointers, np.ndarray) + self.assertIsInstance(tm.probabilities, np.ndarray) + self.assertIsInstance(tm.log_probabilities, np.ndarray) + self.assertIsInstance(tm.num_states, int) + self.assertIsInstance(tm.num_transitions, int) + self.assertTrue(tm.states.dtype == np.uint32) + self.assertTrue(tm.pointers.dtype == np.uint32) + self.assertTrue(tm.probabilities.dtype == np.float) + self.assertTrue(tm.log_probabilities.dtype == np.float) + + def test_values_beat(self): + # test with 2 BeatStateSpaces + bss1 = BeatStateSpace(1, 4) + bss2 = BeatStateSpace(2, 6) + btm1 = BeatTransitionModel(bss1, 100) + btm2 = BeatTransitionModel(bss2, 100) + tm = MultiPatternTransitionModel([btm1, btm2]) + + self.assertTrue(tm.num_states == 10 + 20) + self.assertTrue(tm.num_transitions == 13 + 28) # the first pattern has 13 transitions - self.assertTrue(np.allclose(self.tm.states[:13], + self.assertTrue(np.allclose(tm.states[:13], [0, 2, 5, 1, 5, 9, 3, 4, 5, 9, 6, 7, 8])) - self.assertTrue(np.allclose(self.tm.states[13:], + # the second 28 + self.assertTrue(np.allclose(tm.states[13:], [11, 14, 10, 14, 18, 12, 13, 14, 18, 23, 29, 15, 16, 17, 18, 23, 29, 19, 20, 21, 22, 23, 29, 24, 25, 26, 27, 28])) # the first pattern has 10 states (pointers has one more element) - self.assertTrue(np.allclose(self.tm.pointers[:11], + self.assertTrue(np.allclose(tm.pointers[:11], [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13])) - self.assertTrue(np.allclose(self.tm.pointers[11:], + # the second has 20 + self.assertTrue(np.allclose(tm.pointers[11:], [15, 16, 18, 19, 20, 24, 25, 26, 27, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41])) - self.assertTrue(np.allclose(self.tm.probabilities, + # transition probabilities + self.assertTrue(np.allclose(tm.probabilities, [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 2.06e-09, 0, 1, 1, 1, 0, 1, 5.78e-08, 1, 1, 1, 1, 2.06e-09, 1, 1, 1, 1, 1, 1])) - self.assertTrue(np.allclose(self.tm.log_probabilities, - [0, 0, -33.3333333, 0, 0, -25, 0, 0, - -33.3333333, 0, 0, 0, 0, 0, - -33.3333333, 0, 0, -25, 0, 0, - -33.3333333, 0, -20, -33.3333334, 0, 0, - 0, -25, -4.1e-09, -16.666666, 0, 0, 0, + self.assertTrue(np.allclose(tm.log_probabilities, + [0, 0, -33.33333, 0, 0, -25, 0, 0, + -33.33333, 0, 0, 0, 0, 0, + -33.33333, 0, 0, -25, 0, 0, + -33.33333, 0, -20, -33.33333, 0, 0, + 0, -25, -4.1e-09, -16.6666, 0, 0, 0, 0, -20, -5.78e-08, 0, 0, 0, 0, 0])) - self.assertTrue(self.tm.num_states == 30) - self.assertTrue(self.tm.num_transitions == 41) + + def test_values_bar(self): + # test with 2 BarStateSpaces + bss1 = BarStateSpace(2, 1, 4) + bss2 = BarStateSpace(1, 2, 6) + btm1 = BarTransitionModel(bss1, 100) + btm2 = BarTransitionModel(bss2, 100) + tm = MultiPatternTransitionModel([btm1, btm2]) + self.assertTrue(tm.num_states == 20 + 20) + self.assertTrue(tm.num_transitions == 26 + 28) + # the first pattern has 26 transitions + print(tm.log_probabilities) + self.assertTrue(np.allclose(tm.states[:26], + [10, 12, 15, 1, 15, 19, 3, 4, 15, 19, 6, 7, + 8, 0, 2, 5, 11, 5, 9, 13, 14, 5, 9, 16, + 17, 18])) + # the second 28 + self.assertTrue(np.allclose(tm.states[26:], + [21, 24, 20, 24, 28, 22, 23, 24, 28, 33, + 39, 25, 26, 27, 28, 33, 39, 29, 30, 31, + 32, 33, 39, 34, 35, 36, 37, 38])) + # the first pattern has 20 states (pointers has one more element) + self.assertTrue(np.allclose(tm.pointers[:21], + [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, + 16, 17, 19, 20, 21, 23, 24, 25, 26])) + # the second has 20 + self.assertTrue(np.allclose(tm.pointers[21:], + [28, 29, 31, 32, 33, 37, 38, 39, 40, 43, + 44, 45, 46, 47, 49, 50, 51, 52, 53, 54])) + # transition probabilities + self.assertTrue(np.allclose(tm.probabilities, + [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, + 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1], + atol=1e-7)) + self.assertTrue(np.allclose(tm.log_probabilities, + [0, 0, -33.33333, 0, 0, -25, + 0, 0, -33.33333, 0, 0, 0, 0, + 0, 0, -33.33333, 0, 0, -25, + 0, 0, -33.33333, 0, 0, 0, + 0, 0, -33.33333, 0, 0, -25, + 0, 0, -33.33333, 0, -20, -33.33333, 0, + 0, 0, -25, 0, -16.6666, 0, 0, + 0, 0, -20, -5.78e-08, 0, 0, 0, 0, 0])) # observation models From 18009b4acbce7112d607869acb7b86f041833e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Thu, 28 Jan 2016 09:55:04 +0100 Subject: [PATCH 13/18] fix GMMPatternTrackingObservationModel to map the GMMs to states correctly --- madmom/features/beats_hmm.pyx | 45 +++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.pyx index 6cfa64105..a4584e1c7 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.pyx @@ -539,10 +539,11 @@ class GMMPatternTrackingObservationModel(ObservationModel): Parameters ---------- - gmms : list - Fitted GMM(s), one entry per rhythmic pattern. - transition_model : :class:`MultiPatternTransitionModel` instance - MultiPatternTransitionModel instance. + pattern_files : list + List with files representing the rhythmic patterns, one entry per + pattern; each pattern being a list with fitted GMMs. + state_space : :class:`MultiPatternStateSpeac` instance + Multi pattern state space. norm_observations : bool, optional Normalize the observations. @@ -556,26 +557,30 @@ class GMMPatternTrackingObservationModel(ObservationModel): """ - def __init__(self, gmms, transition_model, norm_observations=False): + def __init__(self, pattern_files, state_space, norm_observations=False): # save the parameters - self.gmms = gmms - self.transition_model = transition_model + self.pattern_files = pattern_files + self.state_space = state_space self.norm_observations = norm_observations # define the pointers of the log densities - pointers = np.zeros(transition_model.num_states, dtype=np.uint32) - patterns = self.transition_model.state_patterns - positions = self.transition_model.state_positions + pointers = np.zeros(state_space.num_states, dtype=np.uint32) + patterns = self.state_space.state_patterns + positions = self.state_space.state_positions # Note: the densities of all GMMs are just stacked on top of each # other, so we have to to keep track of the total number of GMMs densities_idx_offset = 0 - for p in range(len(gmms)): + for p, gmms in enumerate(pattern_files): # number of fitted GMMs for this pattern - num_gmms = len(gmms[p]) + num_gmms = len(gmms) + # number of beats in this pattern + num_beats = self.state_space.state_spaces[p].num_beats # distribute the observation densities defined by the GMMs # uniformly across the entire state space (for this pattern) # since the densities are just stacked, add the offset - pointers[patterns == p] = (positions[patterns == p] * num_gmms + - densities_idx_offset) + # Note: we have to divide by the number of beats, since the + # positions range is [0, num_beats] + pointers[patterns == p] = (positions[patterns == p] * num_gmms / + num_beats + densities_idx_offset) # increase the offset by the number of GMMs densities_idx_offset += num_gmms # instantiate a ObservationModel with the pointers @@ -602,22 +607,22 @@ class GMMPatternTrackingObservationModel(ObservationModel): # counter, etc. cdef unsigned int i, j cdef unsigned int num_observations = len(observations) - cdef unsigned int num_patterns = len(self.gmms) + cdef unsigned int num_patterns = len(self.pattern_files) cdef unsigned int num_gmms = 0 # norm observations if self.norm_observations: observations /= np.max(observations) # maximum number of GMMs of all patterns - for i in range(num_patterns): - num_gmms += len(self.gmms[i]) + for pattern in self.pattern_files: + num_gmms += len(pattern) # init the densities log_densities = np.empty((num_observations, num_gmms), dtype=np.float) # define the observation densities cdef unsigned int c = 0 - for i in range(num_patterns): - for j in range(len(self.gmms[i])): + for pattern in self.pattern_files: + for gmm in pattern: # get the predictions of each GMM for the observations - log_densities[:, c] = self.gmms[i][j].score(observations) + log_densities[:, c] = gmm.score(observations) c += 1 # return the densities return log_densities From 179e1d8e18b45c42df0d88ebb900737a4694d860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Mon, 1 Feb 2016 10:39:33 +0100 Subject: [PATCH 14/18] refactored the beats_hmm module to python only --- .../features/{beats_hmm.pyx => beats_hmm.py} | 66 ++++++------------- setup.py | 3 - 2 files changed, 19 insertions(+), 50 deletions(-) rename madmom/features/{beats_hmm.pyx => beats_hmm.py} (91%) diff --git a/madmom/features/beats_hmm.pyx b/madmom/features/beats_hmm.py similarity index 91% rename from madmom/features/beats_hmm.pyx rename to madmom/features/beats_hmm.py index a4584e1c7..d312c5868 100644 --- a/madmom/features/beats_hmm.pyx +++ b/madmom/features/beats_hmm.py @@ -1,7 +1,6 @@ # encoding: utf-8 -# cython: embedsignature=True """ -This module contains HMM state space, transition and observation models used +This module contains HMM state spaces, transition and observation models used for beat and downbeat tracking. Notes @@ -9,20 +8,11 @@ Please note that (almost) everything within this module is discretised to integer values because of performance reasons. -If you want to change this module and use it interactively, use pyximport. - ->>> import pyximport ->>> pyximport.install(reload_support=True, - setup_args={'include_dirs': np.get_include()}) - """ from __future__ import absolute_import, division, print_function import numpy as np -cimport numpy as np -cimport cython -from libc.math cimport log, exp from madmom.ml.hmm import TransitionModel, ObservationModel @@ -318,7 +308,7 @@ def __init__(self, state_space, transition_lambda): # use only the states with transitions to/from != 0 from_prob, to_prob = np.nonzero(prob) states = np.hstack((states, to_states[to_prob])) - prev_states = np.hstack((prev_states,from_states[from_prob])) + prev_states = np.hstack((prev_states, from_states[from_prob])) probabilities = np.hstack((probabilities, prob[prob != 0])) # make the transitions sparse transitions = self.make_sparse(states, prev_states, probabilities) @@ -394,7 +384,7 @@ def __init__(self, state_space, transition_lambda): # use only the states with transitions to/from != 0 from_prob, to_prob = np.nonzero(prob) states = np.hstack((states, to_states[to_prob])) - prev_states = np.hstack((prev_states,from_states[from_prob])) + prev_states = np.hstack((prev_states, from_states[from_prob])) probabilities = np.hstack((probabilities, prob[prob != 0])) # make the transitions sparse transitions = self.make_sparse(states, prev_states, probabilities) @@ -432,10 +422,9 @@ def __init__(self, transition_models, transition_prob=None, self.transition_prob = transition_prob self.transition_lambda = transition_lambda # stack the pattern transitions - for p in range(len(self.transition_models)): - tm = self.transition_models[p] + for i, tm in enumerate(self.transition_models): # set/update the probabilities, states and pointers - if p == 0: + if i == 0: # for the first pattern, just set the TM arrays states = tm.states pointers = tm.pointers @@ -496,10 +485,7 @@ def __init__(self, state_space, observation_lambda, # instantiate a ObservationModel with the pointers super(RNNBeatTrackingObservationModel, self).__init__(pointers) - @cython.cdivision(True) - @cython.boundscheck(False) - @cython.wraparound(False) - def log_densities(self, float [::1] observations): + def log_densities(self, observations): """ Computes the log densities of the observations. @@ -514,23 +500,18 @@ def log_densities(self, float [::1] observations): Log densities of the observations. """ - # init variables - cdef unsigned int i - cdef unsigned int num_observations = len(observations) - cdef float observation_lambda = self.observation_lambda # norm observations if self.norm_observations: observations /= np.max(observations) # init densities - cdef double [:, ::1] log_densities = np.empty((num_observations, 2), - dtype=np.float) - # define the observation densities - for i in range(num_observations): - log_densities[i, 0] = log(observations[i]) - log_densities[i, 1] = log((1. - observations[i]) / - (observation_lambda - 1)) + log_densities = np.empty((len(observations), 2), dtype=np.float) + # Note: it's faster to call np.log 2 times instead of once on the + # whole 2d array + log_densities[:, 0] = np.log(observations) + log_densities[:, 1] = np.log((1. - observations) / + (self.observation_lambda - 1)) # return the densities - return np.asarray(log_densities) + return log_densities class GMMPatternTrackingObservationModel(ObservationModel): @@ -586,9 +567,6 @@ def __init__(self, pattern_files, state_space, norm_observations=False): # instantiate a ObservationModel with the pointers super(GMMPatternTrackingObservationModel, self).__init__(pointers) - @cython.cdivision(True) - @cython.boundscheck(False) - @cython.wraparound(False) def log_densities(self, observations): """ Computes the log densities of the observations using (a) GMM(s). @@ -604,25 +582,19 @@ def log_densities(self, observations): Log densities of the observations. """ - # counter, etc. - cdef unsigned int i, j - cdef unsigned int num_observations = len(observations) - cdef unsigned int num_patterns = len(self.pattern_files) - cdef unsigned int num_gmms = 0 # norm observations if self.norm_observations: observations /= np.max(observations) - # maximum number of GMMs of all patterns - for pattern in self.pattern_files: - num_gmms += len(pattern) + # number of GMMs of all patterns + num_gmms = sum([len(pattern) for pattern in self.pattern_files]) # init the densities - log_densities = np.empty((num_observations, num_gmms), dtype=np.float) + log_densities = np.empty((len(observations), num_gmms), dtype=np.float) # define the observation densities - cdef unsigned int c = 0 + i = 0 for pattern in self.pattern_files: for gmm in pattern: # get the predictions of each GMM for the observations - log_densities[:, c] = gmm.score(observations) - c += 1 + log_densities[:, i] = gmm.score(observations) + i += 1 # return the densities return log_densities diff --git a/setup.py b/setup.py index 628c80767..df1dc7655 100644 --- a/setup.py +++ b/setup.py @@ -26,9 +26,6 @@ Extension('madmom.features.beats_crf', ['madmom/features/beats_crf.pyx'], include_dirs=[np.get_include()]), - Extension('madmom.features.beats_hmm', - ['madmom/features/beats_hmm.pyx'], - include_dirs=[np.get_include()]), Extension('madmom.ml.hmm', ['madmom/ml/hmm.pyx'], include_dirs=[np.get_include()])] From 2d1eb19d3bcb8930705a23fba7b2a17f15060b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 5 Feb 2016 11:42:30 +0100 Subject: [PATCH 15/18] fix GMMPatternTracker description to model bars --- bin/GMMPatternTracker | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bin/GMMPatternTracker b/bin/GMMPatternTracker index c88b9ec8b..d51e7ac2f 100755 --- a/bin/GMMPatternTracker +++ b/bin/GMMPatternTracker @@ -44,18 +44,15 @@ def main(): Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR), 2015. - This program uses only two rhythmical patterns and allows tempo changes - only at bar boundaries. - This program can be run in 'single' file mode to process a single audio file and write the detected beats to STDOUT or the given output file. - $ DownBeatTracker single INFILE [-o OUTFILE] + $ GMMPatternTracker single INFILE [-o OUTFILE] If multiple audio files should be processed, the program can also be run in 'batch' mode to save the detected beats to files with the given suffix. - $ DownBeatTracker batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] LIST OF FILES + $ GMMPatternTracker batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] LIST OF FILES If no output directory is given, the program writes the files with the detected beats to same location as the audio files. From d6422c9af7d6f0a071fa3152824a1de88eaf0548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 5 Feb 2016 11:43:34 +0100 Subject: [PATCH 16/18] fix tests and minor test data restructuring fixes --- madmom/features/beats.py | 4 +- tests/data/annotations/sample.beats | 2 +- tests/data/annotations/sample.onsets | 2 +- tests/data/annotations/sample.tempo | 2 +- tests/data/annotations/stereo_sample.notes | 2 +- tests/test_features_beats.py | 50 +++++++++++----------- tests/test_features_notes.py | 3 +- 7 files changed, 32 insertions(+), 33 deletions(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index 1c0bb6005..40aae8cdd 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -917,9 +917,9 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, # expand num_tempi and transition_lambda to lists if needed if not isinstance(num_tempi, list): - num_tempi = [num_tempi] * len(num_tempi) + num_tempi = [num_tempi] * len(pattern_files) if not isinstance(transition_lambda, list): - transition_lambda = [transition_lambda] * len(num_tempi) + transition_lambda = [transition_lambda] * len(pattern_files) # check if all lists have the same length if not (len(min_bpm) == len(max_bpm) == len(num_tempi) == len(transition_lambda) == len(pattern_files)): diff --git a/tests/data/annotations/sample.beats b/tests/data/annotations/sample.beats index f52821162..57d515da0 100644 --- a/tests/data/annotations/sample.beats +++ b/tests/data/annotations/sample.beats @@ -1,4 +1,4 @@ -# manually annotated beats for the sample.wav file in the same directory +# manually annotated beats for the sample.wav file 0.0913 1 0.7997 2 1.4806 3 diff --git a/tests/data/annotations/sample.onsets b/tests/data/annotations/sample.onsets index 451d01bac..5d24b50cc 100644 --- a/tests/data/annotations/sample.onsets +++ b/tests/data/annotations/sample.onsets @@ -1,4 +1,4 @@ -# manually annotated onsets for the sample.wav file in the same directory +# manually annotated onsets for the sample.wav file 0.0943 0.2844 0.4528 diff --git a/tests/data/annotations/sample.tempo b/tests/data/annotations/sample.tempo index d37770c4e..19efdfa86 100644 --- a/tests/data/annotations/sample.tempo +++ b/tests/data/annotations/sample.tempo @@ -1,3 +1,3 @@ -# manually annotated tempo for the sample.wav file in the same directory +# manually annotated tempo for the sample.wav file # Note: this file has mixed whitespace (blank & tab) 87.5 175 0.7 diff --git a/tests/data/annotations/stereo_sample.notes b/tests/data/annotations/stereo_sample.notes index 60cf46983..e4430f9a9 100644 --- a/tests/data/annotations/stereo_sample.notes +++ b/tests/data/annotations/stereo_sample.notes @@ -1,4 +1,4 @@ -# MIDI notes for the stereo_sample.[flac|wav] file in the same directory +# MIDI notes for the stereo_sample.[flac|wav] file 0.147 72 3.323 63 1.567 41 0.223 29 2.526 77 0.930 72 diff --git a/tests/test_features_beats.py b/tests/test_features_beats.py index cb0a0077b..42a56db7d 100644 --- a/tests/test_features_beats.py +++ b/tests/test_features_beats.py @@ -57,9 +57,10 @@ def setUp(self): def test_types(self): self.assertIsInstance(self.processor.correct, bool) - self.assertIsInstance(self.processor.st, BeatTrackingStateSpace) - self.assertIsInstance(self.processor.tm, BeatTrackingTransitionModel) - self.assertIsInstance(self.processor.om, BeatTrackingObservationModel) + self.assertIsInstance(self.processor.st, BeatStateSpace) + self.assertIsInstance(self.processor.tm, BeatTransitionModel) + self.assertIsInstance(self.processor.om, + RNNBeatTrackingObservationModel) self.assertIsInstance(self.processor.hmm, HiddenMarkovModel) def test_values(self): @@ -69,12 +70,11 @@ def test_values(self): 2035, 2036, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975])) self.assertTrue(np.allclose(prob, -772.03353)) - position = self.processor.st.position(path) - self.assertTrue(np.allclose(position[:11], [0.89855075, 0.9130435, - 0.92753625, 0.942029, - 0.95652175, 0.9710145, - 0.98550725, 0, 0.01449275, - 0.02898551, 0.04347826])) + positions = self.processor.st.state_positions[path] + self.assertTrue(np.allclose(positions[:10], + [0.89855075, 0.9130435, 0.92753625, + 0.942029, 0.95652175, 0.9710145, + 0.98550725, 0, 0.01449275, 0.02898551])) def test_process(self): beats = self.processor(act) @@ -95,30 +95,30 @@ def setUp(self): def test_types(self): self.assertIsInstance(self.processor.downbeats, bool) self.assertIsInstance(self.processor.num_beats, list) - self.assertIsInstance(self.processor.st, PatternTrackingStateSpace) - self.assertIsInstance(self.processor.tm, - PatternTrackingTransitionModel) + self.assertIsInstance(self.processor.st, MultiPatternStateSpace) + self.assertIsInstance(self.processor.tm, MultiPatternTransitionModel) self.assertIsInstance(self.processor.om, GMMPatternTrackingObservationModel) self.assertIsInstance(self.processor.hmm, HiddenMarkovModel) def test_values(self): + self.assertTrue(self.processor.fps == 50) self.assertTrue(self.processor.downbeats is False) self.assertTrue(np.allclose(self.processor.num_beats, [3, 4])) path, prob = self.processor.hmm.viterbi(self.act) - self.assertTrue(np.allclose(path[:12], [13497, 13498, 13499, 13500, - 13501, 13502, 13503, 13504, - 13505, 13506, 13507, 13508])) - self.assertTrue(np.allclose(prob, -463.3286)) - pattern = self.processor.st.pattern(path) - self.assertTrue(np.allclose(pattern, np.ones(len(self.act)))) - position = self.processor.st.position(path) - self.assertTrue(np.allclose(position[:6], [0.19117647, 0.20588236, - 0.22058824, 0.23529412, - 0.25, 0.2647059])) + self.assertTrue(np.allclose(path[:12], [5573, 5574, 5575, 5576, 6757, + 6758, 6759, 6760, 6761, 6762, + 6763, 6764])) + self.assertTrue(np.allclose(prob, -468.8014)) + patterns = self.processor.st.state_patterns[path] + self.assertTrue(np.allclose(patterns, np.ones(len(self.act)))) + positions = self.processor.st.state_positions[path] + self.assertTrue(np.allclose(positions[:6], [1.76470588, 1.82352944, + 1.88235296, 1.94117648, + 2, 2.0588236])) def test_process(self): beats = self.processor(self.act) - self.assertTrue(np.allclose(beats, [[0.08, 2], [0.42, 3], [0.76, 4], - [1.1, 1], [1.46, 2], [1.8, 3], - [2.14, 4], [2.48, 1]])) + self.assertTrue(np.allclose(beats, [[0.08, 3], [0.42, 4], [0.76, 1], + [1.1, 2], [1.44, 3], [1.78, 4], + [2.12, 1], [2.46, 2], [2.8, 3]])) diff --git a/tests/test_features_notes.py b/tests/test_features_notes.py index 92804ac58..62b07f52b 100644 --- a/tests/test_features_notes.py +++ b/tests/test_features_notes.py @@ -49,8 +49,7 @@ def test_values(self): class TestWriteNotesFunction(unittest.TestCase): def test_values(self): - header = "MIDI notes for the stereo_sample.[flac|wav] file in the " \ - "same directory" + header = "MIDI notes for the stereo_sample.[flac|wav] file" result = write_notes(NOTES, ANNOTATIONS_PATH + 'stereo_sample.notes', header=header) self.assertTrue(np.allclose(result, NOTES)) From 50da8b513a0f33d02242eee2c333023cc7824070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 5 Feb 2016 11:51:05 +0100 Subject: [PATCH 17/18] refactored pattern loading --- madmom/features/beats.py | 53 +++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index 40aae8cdd..b74aaf428 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -926,42 +926,45 @@ def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, raise ValueError('`min_bpm`, `max_bpm`, `num_tempi` and ' '`transition_lambda` must have the same length ' 'as number of patterns.') + # save some variables + self.downbeats = downbeats + self.fps = fps + self.num_beats = [] + # convert timing information to construct a state space + min_interval = 60. * self.fps / np.asarray(max_bpm) + max_interval = 60. * self.fps / np.asarray(min_bpm) + # collect beat/bar state spaces, transition models, and GMMs + state_spaces = [] + transition_models = [] + gmms = [] + # check that at least one pattern is given + if not pattern_files: + raise ValueError('at least one rhythmical pattern must be given.') # load the patterns - patterns = [] - for pattern_file in pattern_files: + for p, pattern_file in enumerate(pattern_files): with open(pattern_file, 'rb') as f: # Python 2 and 3 behave differently # TODO: use some other format to save the GMMs (.npz, .hdf5) try: # Python 3 - patterns.append(pickle.load(f, encoding='latin1')) + pattern = pickle.load(f, encoding='latin1') except TypeError: # Python 2 doesn't have/need the encoding - patterns.append(pickle.load(f)) - if not patterns: - raise ValueError('at least one rhythmical pattern must be given.') - # extract the GMMs and number of beats - gmms = [p['gmms'] for p in patterns] - self.num_beats = [p['num_beats'] for p in patterns] - # save additional variables - self.downbeats = downbeats - self.fps = fps - # convert timing information to construct a state space - min_interval = 60. * self.fps / np.asarray(max_bpm) - max_interval = 60. * self.fps / np.asarray(min_bpm) - # construct a multi pattern state space and transition model - state_spaces = [] - transition_models = [] - for p in range(len(patterns)): + pattern = pickle.load(f) + # get the fitted GMMs and number of beats + gmms.append(pattern['gmms']) + num_beats = pattern['num_beats'] + self.num_beats.append(num_beats) # model each rhythmic pattern as a bar - st = BarStateSpace(self.num_beats[p], min_interval[p], - max_interval[p], num_tempi[p]) - tm = BarTransitionModel(st, transition_lambda[p]) - state_spaces.append(st) - transition_models.append(tm) + state_space = BarStateSpace(num_beats, min_interval[p], + max_interval[p], num_tempi[p]) + transition_model = BarTransitionModel(state_space, + transition_lambda[p]) + state_spaces.append(state_space) + transition_models.append(transition_model) + # create multi pattern state space, transition and observation model self.st = MultiPatternStateSpace(state_spaces) self.tm = MultiPatternTransitionModel(transition_models) - # observation model self.om = GMMPatternTrackingObservationModel(gmms, self.st, norm_observations) # instantiate a HMM From 91e80a8e3be97c159b36751defe42096ddbecd06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 5 Feb 2016 12:02:12 +0100 Subject: [PATCH 18/18] update to the latest conda package and include libgfortran --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7c688be08..3e006b8e0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,9 +13,9 @@ before_install: - sudo apt-get install -qq ffmpeg # install numpy etc. via miniconda - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then - wget http://repo.continuum.io/miniconda/Miniconda-3.8.3-Linux-x86_64.sh -O miniconda.sh; + wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; else - wget http://repo.continuum.io/miniconda/Miniconda3-3.8.3-Linux-x86_64.sh -O miniconda.sh; + wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" @@ -24,7 +24,7 @@ before_install: - conda update -q conda - conda config --add channels pypi - conda info -a - - deps='pip cython numpy scipy nose pep8' + - deps='pip libgfortran cython numpy scipy nose pep8' - conda create -q -n test-environment "python=$TRAVIS_PYTHON_VERSION" $deps - source activate test-environment - pip install codecov