1.0.0

Hiroshiba · Nov 22, 2019 · 98fcffe · 98fcffe
commit 98fcffe
Show file tree

Hide file tree

Showing 7 changed files with 288 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2019 Kazuyuki Hiroshiba.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,48 @@
+## kiritan_singing_label_reader
+The reader for [東北きりたん歌唱データベース](https://github.com/mmorise/kiritan_singing)'s label data in python.
+You can read midi notes and phoneme labels with using this library.
+
+### Requirements
+Python 3.6
+
+### Usage
+```bash
+pip install git+https://github.com/Hiroshiba/kiritan_singing_label_reader
+```
+
+### Example
+```python
+# get midi notes
+from kiritan_singing_label_reader import MidiNoteReader
+
+midi_reader = MidiNoteReader('/path/to/midi_label/01.mid')
+notes = midi_reader.get_notes()
+"""
+[
+    Note(pitch='64', start=18.947376, end=19.350747871874997),
+    Note(pitch='71', start=19.342112999999998, end=21.904202840624997),
+    Note(pitch='68', start=22.105272, end=22.30181813125),
+    ...
+]
+"""
+from kiritan_singing_label_reader import Phoneme
+
+phonemes = Phoneme.load_julius_list('/path/to/mono_label/01.lab')
+"""
+[
+    Phoneme(phoneme='pau', start=0.0, end=18.6263777),
+    Phoneme(phoneme='br', start=18.6263777, end=19.0916217),
+    Phoneme(phoneme='k', start=19.0916217, end=19.1636238),
+    ...
+]
+"""
+
+# you can filter un pairwise phoneme label
+from kiritan_singing_label_reader import filter_phoneme_with_note
+phonemes = filter_phoneme_with_note(phonemes, notes)
+```
+
+There are more samples in [sample.py](sample.py)
+
+### License
+[MIT LICENSE](LICENSE)
diff --git a/kiritan_singing_label_reader/__init__.py b/kiritan_singing_label_reader/__init__.py
@@ -0,0 +1,2 @@
+from .midi_note import MidiNoteReader
+from .phoneme import Phoneme, filter_phoneme_with_note, verify_phoneme_and_note
diff --git a/kiritan_singing_label_reader/midi_note.py b/kiritan_singing_label_reader/midi_note.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+from typing import Union
+
+import midi
+
+
+class Note:
+    def __init__(self, pitch: int, start: float, end: float):
+        self.pitch = pitch
+        self.start = start
+        self.end = end
+
+    def __repr__(self):
+        return f'Note(pitch=\'{self.pitch}\', start={self.start}, end={self.end})'
+
+
+class MidiNoteReader():
+    def __init__(self, path: Union[str, Path]):
+        self.song = midi.read_midifile(str(path))
+
+    def get_bpm(self):
+        bpm = None
+        for track in self.song:
+            for event in track:
+                if isinstance(event, midi.SetTempoEvent):
+                    new_bpm = event.get_bpm()
+                    assert bpm is None or bpm == new_bpm
+                    bpm = new_bpm
+        return bpm
+
+    def get_resolution(self):
+        return self.song.resolution
+
+    def get_notes(self):
+        self.song.make_ticks_abs()
+        bpm = self.get_bpm()
+        resolution = self.get_resolution()
+
+        def _tick_to_second(tick: float):
+            return tick * 60 / bpm / resolution
+
+        notes = []
+        start_states = {}
+        for track in self.song:
+            for event in track:
+                if isinstance(event, midi.NoteOnEvent):
+                    pitch = event.get_pitch()
+
+                    if pitch in start_states:  # note off
+                        pitch = event.get_pitch()
+                        note = Note(
+                            pitch=pitch,
+                            start=start_states.pop(pitch),
+                            end=_tick_to_second(event.tick),
+                        )
+                        notes.append(note)
+
+                    if event.get_velocity() > 0:  # note on
+                        assert pitch not in start_states
+                        start_states[pitch] = _tick_to_second(event.tick)
+
+        return notes
diff --git a/kiritan_singing_label_reader/phoneme.py b/kiritan_singing_label_reader/phoneme.py
@@ -0,0 +1,104 @@
+from pathlib import Path
+from typing import Union, Sequence
+
+import numpy
+
+from kiritan_singing_label_reader.midi_note import Note
+
+
+class Phoneme:
+    phoneme_names = (
+        'pau', 'br', 'a', 'b', 'ch', 'cl', 'd', 'e', 'f', 'g', 'gy', 'h', 'hy', 'i', 'j', 'k', 'ky', 'm', 'my', 'n',
+        'N', 'ny', 'o', 'p', 'py', 'r', 'ry', 's', 'sh', 't', 'ts', 'u', 'v', 'w', 'y', 'z',
+    )
+
+    def __init__(
+            self,
+            name: str,
+            start: float = None,
+            end: float = None,
+    ) -> None:
+        self.name = name
+        self.start = start
+        self.end = end
+
+    def __eq__(self, other: 'Phoneme'):
+        return self.name == other.name
+
+    def __repr__(self):
+        return f'Phoneme(phoneme=\'{self.name}\', start={self.start}, end={self.end})'
+
+    def verify(self):
+        assert self.name in self.phoneme_names, f'{self.name} is not defined.'
+
+    @property
+    def duration(self):
+        return self.end - self.start
+
+    @classmethod
+    def parse(cls, s: str):
+        words = s.split()
+        return cls(
+            start=float(words[0]),
+            end=float(words[1]),
+            name=words[2],
+        )
+
+    @classmethod
+    def load_julius_list(cls, path: Union[str, Path]):
+        phonemes = [
+            cls.parse(s)
+            for s in Path(path).read_text().splitlines()
+            if len(s) > 0
+        ]
+        for phoneme in phonemes:
+            phoneme.verify()
+        return phonemes
+
+    @classmethod
+    def write_julius_list(cls, path: Union[str, Path], phonemes: Sequence['Phoneme']):
+        s = ''
+        for phoneme in phonemes:
+            s += f'{phoneme.start} {phoneme.end} {phoneme.name}\n'
+
+        Path(path).write_text(s)
+
+
+def filter_phoneme_with_note(phonemes: Sequence[Phoneme], notes: Sequence[Note]):
+    pau_phoneme = Phoneme('pau')
+    assert phonemes[0] == pau_phoneme and phonemes[-1] == pau_phoneme
+
+    pau_indexes = numpy.where(numpy.array(phonemes) == pau_phoneme)[0]
+
+    filtered_phonemes = []
+    for i, j in zip(pau_indexes[:-1], pau_indexes[1:]):
+        start = phonemes[i + 1].start - 1
+        end = phonemes[j - 1].end + 1
+        num_note = len(list(filter(lambda note: start < note.start and note.end < end, notes)))
+
+        if num_note > 0:
+            filtered_phonemes += phonemes[i:j]
+
+    filtered_phonemes += [phonemes[-1]]
+    return filtered_phonemes
+
+
+def verify_phoneme_and_note(phonemes: Sequence[Phoneme], notes: Sequence[Note]):
+    pau_phoneme = Phoneme('pau')
+    br_phoneme = Phoneme('br')
+
+    def _is_near(phoneme: Phoneme, note: Note):
+        return \
+            (phoneme.start - 1 < note.start and note.end < phoneme.end + 1) \
+            or \
+            (note.start - 1 < phoneme.start and phoneme.end < note.end + 1)
+
+    # eliminate 'pau' and 'br'
+    phonemes = list(filter(lambda phoneme: phoneme != pau_phoneme and phoneme != br_phoneme, phonemes))
+
+    matrix = numpy.empty((len(phonemes), len(notes)), dtype=bool)
+    for i, phoneme in enumerate(phonemes):
+        for j, note in enumerate(notes):
+            matrix[i, j] = _is_near(phoneme, note)
+
+    return matrix.any(axis=0).all() and matrix.any(axis=1).all()
diff --git a/sample.py b/sample.py
@@ -0,0 +1,38 @@
+import argparse
+from pathlib import Path
+
+from kiritan_singing_label_reader import MidiNoteReader, Phoneme, filter_phoneme_with_note, verify_phoneme_and_note
+
+
+def sample(
+        kiritan_singing_directory: Path,
+):
+    midi_paths = sorted((kiritan_singing_directory / 'midi_label/').glob('*.mid'))
+    label_paths = sorted((kiritan_singing_directory / 'mono_label/').glob('*.lab'))
+
+    for i, (midi_path, label_path) in enumerate(zip(midi_paths, label_paths)):
+        # get midi notes
+        midi_reader = MidiNoteReader(midi_path)
+        notes = midi_reader.get_notes()
+
+        # get phonemes
+        phonemes = Phoneme.load_julius_list(label_path)
+
+        # '06' and '08' have un pairwise phoneme label
+        # ref. https://github.com/mmorise/kiritan_singing
+        if i in [5, 7]:
+            phonemes = filter_phoneme_with_note(phonemes, notes)
+            Phoneme.write_julius_list(f'{i + 1:0>2}.lab', phonemes)
+
+        ok = verify_phoneme_and_note(phonemes, notes)
+        print(midi_path)
+        print(label_path)
+        print(ok)
+
+        break
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('kiritan_singing_directory', type=Path)
+    sample(**vars(parser.parse_args()))
diff --git a/setup.py b/setup.py
@@ -0,0 +1,15 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='kiritan_singing_label_reader',
+    version='1.0.0',
+    packages=find_packages(),
+    url='https://github.com/Hiroshiba/kiritan_singing_label_reader',
+    author='Hiroshiba Kazuyuki',
+    author_email='hihokaruta@gmail.com',
+    license='MIT License',
+    install_requires=[
+        'numpy',
+        'midi @ git+https://github.com/vishnubob/python-midi@feature/python3',
+    ],
+)