In [None]:
import os
import sys

# Add project root to sys.path
dirof = os.path.dirname
try:
    dir_of_file = dirof(__file__)
except NameError:
    # .ipynb 文件中没有 __file__，使用当前工作目录
    dir_of_file = os.getcwd()
project_root = dirof(dir_of_file)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from remi_z import MultiTrack
from sonata_utils import ls, read_json, jpath
from datasets.dataset_utils import load_phrase_annot_cleaned, calculate_bar_id_of_phrases
from tqdm import tqdm

In [4]:
song_ids = ls('/data1/longshen/Datasets/Piano/POP909/pop909_longshen/data_key_normed')
error_ids = read_json('/data1/longshen/Datasets/Piano/POP909/pop909_longshen/phrase_annotation_errors.json')
song_ids = [sid for sid in song_ids if sid not in error_ids]

In [None]:
data_dir = '/data1/longshen/Datasets/Piano/POP909/pop909_longshen/data_key_normed'

change_of_inst_config_in_sec_boundaries = []
pbar = tqdm(song_ids)
for song_id in pbar:
    pbar.set_description(f'Processing {song_id}')
    song_dir = jpath(data_dir, song_id)
    mt = MultiTrack.from_midi(f'{song_dir}/{song_id}.mid')
    annot_fp = jpath(song_dir, 'phrase_annot_cleaned.txt')
    sec_annots = load_phrase_annot_cleaned(annot_fp)
    sec_annots = calculate_bar_id_of_phrases(sec_annots)
    
    total_n_bar = len(mt)
    for sec in sec_annots:
        sec_name, start_bar, end_bar = sec
        prev_bar_id = start_bar - 1
        next_bar_id = start_bar

        # Get Instrumentation Configuration of prev bar
        if 0 <= prev_bar_id < total_n_bar:
            prev_bar = mt[prev_bar_id]
            prev_inst_config = prev_bar.get_unique_insts()
        else:
            prev_inst_config = []

        # Get Instrumentation Configuration of next bar
        if 0 <= next_bar_id < total_n_bar:
            next_bar = mt[next_bar_id]
            next_inst_config = next_bar.get_unique_insts()
        else:
            next_inst_config = []

        if prev_inst_config == next_inst_config:
            change_of_inst_config_in_sec_boundaries.append(0)
        else:
            change_of_inst_config_in_sec_boundaries.append(1)

Processing 909: 100%|██████████| 844/844 [00:30<00:00, 27.61it/s]


In [5]:
print(len(change_of_inst_config_in_sec_boundaries))
print(sum(change_of_inst_config_in_sec_boundaries))
print(f'P(inst_config_change | sec_boundary) = {sum(change_of_inst_config_in_sec_boundaries) / len(change_of_inst_config_in_sec_boundaries):.4f}')

10879
6438
P(inst_config_change | sec_boundary) = 0.5918


In [8]:
change_of_inst_config_inside_sec = []
pbar = tqdm(song_ids)
for song_id in pbar:
    pbar.set_description(f'Processing {song_id}')
    song_dir = jpath(data_dir, song_id)
    mt = MultiTrack.from_midi(f'{song_dir}/{song_id}.mid')
    annot_fp = jpath(song_dir, 'phrase_annot_cleaned.txt')
    sec_annots = load_phrase_annot_cleaned(annot_fp)
    sec_annots = calculate_bar_id_of_phrases(sec_annots)
    
    total_n_bar = len(mt)
    for sec in sec_annots:
        sec_name, start_bar, end_bar = sec

        for bar_id in range(start_bar, end_bar):
            bar1 = mt[bar_id]
            bar2 = mt[bar_id + 1] if (bar_id + 1) < total_n_bar else None
            inst_config_1 = bar1.get_unique_insts()
            inst_config_2 = bar2.get_unique_insts() if bar2 is not None else []

            if inst_config_1 == inst_config_2:
                change_of_inst_config_inside_sec.append(0)
            else:
                change_of_inst_config_inside_sec.append(1)

Processing 909: 100%|██████████| 844/844 [00:31<00:00, 26.94it/s]


In [9]:
print(len(change_of_inst_config_inside_sec))
print(sum(change_of_inst_config_inside_sec))
print(f'P(inst_config_change | inside_sec) = {sum(change_of_inst_config_inside_sec) / len(change_of_inst_config_inside_sec):.4f}')

68013
22967
P(inst_config_change | inside_sec) = 0.3377


In [5]:
# What if do not consider order of instruments?

data_dir = '/data1/longshen/Datasets/Piano/POP909/pop909_longshen/data_key_normed'

change_of_inst_config_in_sec_boundaries = []
pbar = tqdm(song_ids)
for song_id in pbar:
    pbar.set_description(f'Processing {song_id}')
    song_dir = jpath(data_dir, song_id)
    mt = MultiTrack.from_midi(f'{song_dir}/{song_id}.mid')
    annot_fp = jpath(song_dir, 'phrase_annot_cleaned.txt')
    sec_annots = load_phrase_annot_cleaned(annot_fp)
    sec_annots = calculate_bar_id_of_phrases(sec_annots)
    
    total_n_bar = len(mt)
    for sec in sec_annots:
        sec_name, start_bar, end_bar = sec
        prev_bar_id = start_bar - 1
        next_bar_id = start_bar

        # Get Instrumentation Configuration of prev bar
        if 0 <= prev_bar_id < total_n_bar:
            prev_bar = mt[prev_bar_id]
            prev_inst_config = set(prev_bar.get_unique_insts())
        else:
            prev_inst_config = set()

        # Get Instrumentation Configuration of next bar
        if 0 <= next_bar_id < total_n_bar:
            next_bar = mt[next_bar_id]
            next_inst_config = set(next_bar.get_unique_insts())
        else:
            next_inst_config = set()

        if prev_inst_config == next_inst_config:
            change_of_inst_config_in_sec_boundaries.append(0)
        else:
            change_of_inst_config_in_sec_boundaries.append(1)

Processing 909: 100%|██████████| 844/844 [00:30<00:00, 27.42it/s]


In [6]:
print(len(change_of_inst_config_in_sec_boundaries))
print(sum(change_of_inst_config_in_sec_boundaries))
print(f'P(inst_config_change | sec_boundary) = {sum(change_of_inst_config_in_sec_boundaries) / len(change_of_inst_config_in_sec_boundaries):.4f}')


10879
5348
P(inst_config_change | sec_boundary) = 0.4916


In [7]:
change_of_inst_config_inside_sec = []
pbar = tqdm(song_ids)
for song_id in pbar:
    pbar.set_description(f'Processing {song_id}')
    song_dir = jpath(data_dir, song_id)
    mt = MultiTrack.from_midi(f'{song_dir}/{song_id}.mid')
    annot_fp = jpath(song_dir, 'phrase_annot_cleaned.txt')
    sec_annots = load_phrase_annot_cleaned(annot_fp)
    sec_annots = calculate_bar_id_of_phrases(sec_annots)
    
    total_n_bar = len(mt)
    for sec in sec_annots:
        sec_name, start_bar, end_bar = sec

        for bar_id in range(start_bar, end_bar):
            bar1 = mt[bar_id]
            bar2 = mt[bar_id + 1] if (bar_id + 1) < total_n_bar else None
            inst_config_1 = set(bar1.get_unique_insts())
            inst_config_2 = set(bar2.get_unique_insts()) if bar2 is not None else set()

            if inst_config_1 == inst_config_2:
                change_of_inst_config_inside_sec.append(0)
            else:
                change_of_inst_config_inside_sec.append(1)

Processing 909: 100%|██████████| 844/844 [00:30<00:00, 27.44it/s]


In [8]:
print(len(change_of_inst_config_inside_sec))
print(sum(change_of_inst_config_inside_sec))
print(f'P(inst_config_change | inside_sec) = {sum(change_of_inst_config_inside_sec) / len(change_of_inst_config_inside_sec):.4f}')

68013
15554
P(inst_config_change | inside_sec) = 0.2287
