In [1]:
%cd ..
%load_ext autoreload
%autoreload 2

/home/teo/userdata/git_libraries/SejongMusic


In [2]:
from pathlib import Path

In [10]:


text_score_dir = Path('music_score/yeominlak_in_text')
assert text_score_dir.exists()
text_fns = list(text_score_dir.glob('*.txt'))

score_by_inst = {}

for text_fn in text_fns:
  inst_name = text_fn.stem.split('_')[0]

  with text_fn.open('r') as f:
    text = f.read()
  score_by_inst[inst_name] = [x.split(',') for x in text.split('\n')]


In [25]:

from sejong_music.yeominrak_processing import ShiftedAlignedScore
era_dataset = ShiftedAlignedScore(xml_path='music_score/yeominlak.musicxml', is_valid=False, min_meas=4, max_meas=4, slice_measure_num=4)


In [48]:
from collections import Counter
class JgOrchestraScore:
  def __init__(self, 
              score_dir: Path = Path('music_score/yeominlak_in_text'),
              is_valid: bool = False,
              min_meas: int = 4,
              max_meas: int = 4,
              slice_measure_num: int = 4
              ) -> None:  
    self.score_dir = score_dir
    assert self.score_dir.exists()
    self.score_by_inst = {}
    self.load_score()
    self.make_tokenizer()
    # self.era_dataset = ShiftedAlignedScore(xml_path=self.score_dir.parent/'yeominlak.musicxml', is_valid=is_valid, min_meas=min_meas, max_meas=max_meas, slice_measure_num=slice_measure_num)
    self.era_dataset = era_dataset

  def load_score(self):
    for inst_fn in self.score_dir.glob('*.txt'):
      inst_name = inst_fn.stem.split('_')[0]
      with inst_fn.open('r') as f:
        text = f.read()
      self.score_by_inst[inst_name] = [x.split(',') for x in text.split('\n')]
  
  def make_tokenizer(self):
    every_symbol = [note for inst in self.score_by_inst.values() for jeonggan in inst for note in jeonggan]
    pitchs = Counter([x.split(':')[0].split('(')[0] for x in every_symbol])
    ornaments = Counter([x.split(':')[1] for x in every_symbol])
    durations = Counter([x.split(':')[2] for x in every_symbol])

    self.tokenizer = JgTokenizer(entire_vocab={'pitch': pitchs, 'ornament': ornaments, 'duration': durations})

  def __len__(self):
     return len(self.era_dataset)

class JgTokenizer:
  def __init__(self, entire_vocab):
    self.entire_vocab = entire_vocab
    pass

  def __call__(self, score):
    pass

dataset = JgOrchestraScore(Path('music_score/yeominlak_in_text'))

dataset.tokenizer.entire_vocab

{'pitch': Counter({'배황': 565,
          '0': 4836,
          '하배임': 437,
          '하배남': 332,
          '배태': 573,
          '배중': 878,
          '추성': 200,
          '퇴성': 194,
          '하하배임': 33,
          '하배중': 213,
          '하배태': 102,
          '-': 4379,
          '하배황': 150,
          '배임': 1347,
          '중': 1541,
          '배남': 1643,
          '황': 1885,
          '하배무': 1,
          '겹요성표': 15,
          '쉼표': 45,
          '태': 1735,
          '임': 841,
          '청황': 677,
          '요성표': 219,
          '노': 412,
          '니나': 308,
          '배무': 35,
          '남': 978,
          '니': 195,
          '노라': 127,
          '무': 18,
          '느나': 54,
          '청중': 472,
          '청태': 686,
          '청임': 213,
          '청남': 131,
          '니레나': 61,
          '같은음표': 146,
          '청협': 4,
          '느니': 17,
          '리': 147,
          '니로나': 2,
          '덧길이표': 3,
          '느나르나니': 2,
          '청고': 3}),
 'ornament': Counter({'대현': 109,
          '': 2

In [47]:
dataset.tokenizer.entire_vocab['ornament'].

[('', 22558),
 ('퇴성', 717),
 ('추성', 679),
 ('니레', 601),
 ('ㅅ', 440),
 ('뜰', 222),
 ('중현', 143),
 ('노니로', 124),
 ('2', 114),
 ('대현', 109),
 ('나니나', 106),
 ('루러표', 102),
 ('자출', 99),
 ('4', 79),
 ('떠이어표', 70),
 ('나니로', 61),
 ('노네', 51),
 ('살동', 46),
 ('6', 40),
 ('자출_3', 29),
 ('2_추성', 27),
 ('뜰_하', 26),
 ('더름표', 26),
 ('ㅅ_추성', 24),
 ('6_루러표', 24),
 ('겹흘림표', 24),
 ('특강표', 20),
 ('덧길이', 18),
 ('서침표', 18),
 ('ㅅ_퇴성', 18),
 ('ㅅ_3', 17),
 ('ㅅ_5_8', 16),
 ('반길이', 16),
 ('싸랭', 15),
 ('니', 14),
 ('전성', 13),
 ('시루표', 13),
 ('니레_퇴성', 13),
 ('뜰_상_3', 12),
 ('ㅅ_5_추성', 11),
 ('?', 8),
 ('하', 7),
 ('끊는표', 7),
 ('장지표', 7),
 ('ㅅ_2', 6),
 ('ㅅ_4_8_추성', 6),
 ('니레_노', 5),
 ('8', 5),
 ('느니르', 5),
 ('자출_4', 4),
 ('나니로_퇴성', 4),
 ('뜰_상', 3),
 ('뜰_?', 3),
 ('니나_반길이표', 3),
 ('소지표_나니로', 3),
 ('ㅅ_6_8', 2),
 ('5_추성', 2),
 ('반길이_루러표', 2),
 ('ㅅ_4_8', 2),
 ('농음표', 2),
 ('뜰_대현', 1),
 ('4_퇴성', 1),
 ('뜰_4', 1),
 ('자출_자출_3', 1),
 ('퇴성_추성_(-)', 1),
 ('4_추성', 1),
 ('3', 1),
 ('?_퇴성', 1),
 ('5', 1),
 ('9', 1),
 ('니나', 1),
 ('

In [32]:
dataset.score_by_inst['piri'][0]

['배임(58)--1/3-0-0',
 '황(63)--2/3-1/3-1/3',
 '-(0)---2/3-2/3',
 '추성(0)--1-0-1',
 '배임(58)--1-0-2',
 '0(0)--1-0-3',
 '배남(60)-퇴성-1/3-0-4',
 '퇴성(0)--4/3-1/3-13/3',
 '-(0)---2/3-14/3',
 '-(0)---0-5',
 '-(0)---1/3-16/3',
 '배남(60)-ㅅ_3-1/3-2/3-17/3',
 '황(63)--1-0-6',
 '추성(0)--1-0-7',
 '겹요성표(0)--1-0-8',
 '쉼표(0)--1-0-9',
 '황(63)--1/3-0-10',
 '중(68)--2/3-1/3-31/3',
 '-(0)---2/3-32/3',
 '0(0)--1-0-11',
 '퇴성(0)--1-0-12',
 '황(63)-시루표-1/6-0-13',
 '태(65)--5/6-1/6-79/6',
 '-(0)---1/3-40/3',
 '-(0)---2/3-41/3',
 '임(70)--1-0-14',
 '0(0)--1-0-15',
 '0(0)--1-0-16',
 '추성(0)--1-0-17',
 '겹요성표(0)--1-0-18',
 '쉼표(0)--1-0-19']