In [1]:
# CHANGE YOUR PATH HERE
midi_lib_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/midi_lib"

output_dir = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/Ideas/data/output"

log_dir = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/Ideas/hooktheory/output/logs/model_result_log"

In [2]:
import sys

sys.path.append(midi_lib_path)

In [3]:
from datetime import datetime
import json

from const_lib import musecoco_const as mcc

In [4]:
for k, v in mcc.next_acceptable_keys.items():
    print(f"{k}: {v}")

s: ['b', 'o']
o: ['t']
t: ['i']
i: ['p']
p: ['d']
d: ['v']
v: ['i', 'b', 'p', 'o']
b: ['s']


In [5]:
def all_available_structure(next_acceptable_keys):
    '''
        List all possible paths in the graph (if a path meets the start of itself, stop searching on that path)
        Return a list of all possible paths

        next_acceptable_keys: a adjacency list of all possible next keys. Example:
            {
                's': ['b', 'o'],
                'o': ['t'],
                't': ['i'],
                'i': ['p'],
                'p': ['d'],
                'd': ['v'],
                'v': ['i', 'b', 'p', 'o'],
                'b': ['s']
            }

        Return: a list of all possible paths. Example:
            [
                ['s', 'b'],
                ['s', 'o', 't', 'i', 'p', 'd', 'v'],
                ...
            ]
    '''
    paths = []

    def dfs(node: str, path: list):
        if node in path:
            paths.append(path)
            return
        else:
            pass

        path.append(node)
        
        for next_node in next_acceptable_keys[node]:
            dfs(next_node, path.copy())

    for k in next_acceptable_keys.keys():
        dfs(k, [])

    return paths

In [6]:
structures = all_available_structure(mcc.next_acceptable_keys)

for path in structures:
    print(path)

['s', 'b']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['s', 'o', 't', 'i', 'p', 'd', 'v', 'b']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v', 'b', 's']
['o', 't', 'i', 'p', 'd', 'v', 'b', 's']
['o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v', 'b', 's']
['t', 'i', 'p', 'd', 'v', 'b', 's', 'o']
['t', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v', 'o']
['i', 'p', 'd', 'v']
['i', 'p', 'd', 'v', 'b', 's']
['i', 'p', 'd', 'v', 'b', 's', 'o', 't']
['i', 'p', 'd', 'v']
['i', 'p', 'd', 'v', 'o', 't']
['p', 'd', 'v', 'i']
['p', 'd', 'v', 'b', 's']
['p', 'd', 'v', 'b', 's', 'o', 't', 'i']
['p', 'd', 'v']
['p', 'd', 'v', 'o', 't', 'i']
['d', 'v', 'i', 'p']
['d', 'v', 'b', 's']
['d', 'v', 'b', 's', 'o', 't', 'i', 'p']
['d', 'v', 'p']
['d', 'v', 'o', 't', 'i', 'p']
['v', 'i', 'p', 'd']
['v', 'b', 's']
['v', 'b', 's', 'o', 't', 'i', 'p', 'd']
['v', 'p', 'd']
[

In [7]:
def unique_structures(
    structures, 
    should_sort_by_length=True, 
    descending_length=True
):
    '''
        Remove duplicate structures in the list of structures
        Return a list of unique structures

        structures: a list of structures. Example:
            [
                ['s', 'b'],
                ['s', 'b'],
                ['s', 'o', 't', 'i', 'p', 'd', 'v'],
                ...
            ]
        
        should_sort_by_length: a boolean value to sort the list of unique structures by length or not. Default is True.

        descending_length: a boolean value to sort the list of unique structures by descending length or not. Default is True.

        Return: a list of unique structures. Example:
            [
                ['s', 'b'],
                ['s', 'o', 't', 'i', 'p', 'd', 'v'],
                ...
            ]
    '''
    # Unique structure
    unique_structures = set([tuple(structure) for structure in structures])
    unique_structures = [list(structure) for structure in unique_structures]

    if should_sort_by_length:
        unique_structures = sorted(
            unique_structures, 
            key=lambda x: len(x), reverse=descending_length
        )
    else:
        pass

    return unique_structures

In [8]:
unique_structures = unique_structures(structures)

for path in unique_structures:
    print(path)


['b', 's', 'o', 't', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v', 'b', 's', 'o']
['s', 'o', 't', 'i', 'p', 'd', 'v', 'b']
['d', 'v', 'b', 's', 'o', 't', 'i', 'p']
['o', 't', 'i', 'p', 'd', 'v', 'b', 's']
['p', 'd', 'v', 'b', 's', 'o', 't', 'i']
['i', 'p', 'd', 'v', 'b', 's', 'o', 't']
['v', 'b', 's', 'o', 't', 'i', 'p', 'd']
['t', 'i', 'p', 'd', 'v', 'b', 's']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v']
['p', 'd', 'v', 'o', 't', 'i']
['t', 'i', 'p', 'd', 'v', 'o']
['v', 'o', 't', 'i', 'p', 'd']
['d', 'v', 'o', 't', 'i', 'p']
['i', 'p', 'd', 'v', 'o', 't']
['i', 'p', 'd', 'v', 'b', 's']
['p', 'd', 'v', 'b', 's']
['t', 'i', 'p', 'd', 'v']
['d', 'v', 'b', 's']
['i', 'p', 'd', 'v']
['d', 'v', 'i', 'p']
['v', 'i', 'p', 'd']
['p', 'd', 'v', 'i']
['d', 'v', 'p']
['v', 'b', 's']
['v', 'p', 'd']
['p', 'd', 'v']
['b', 's']
['s', 'b']


In [9]:
abbreviations = {
    "b": "bar",
    "o": "position",
    "s": "time_signature",
    "t": "tempo",
    "i": "instrument",
    "p": "pitch",
    "d": "duration",
    "v": "velocity",
    "n": "pitch_name",
    "c": "pitch_octave",
    "f": "family",
    "e": "special"
}

In [10]:
reversed_abbrs = {v: k for k, v in abbreviations.items()}
print(reversed_abbrs)

{'bar': 'b', 'position': 'o', 'time_signature': 's', 'tempo': 't', 'instrument': 'i', 'pitch': 'p', 'duration': 'd', 'velocity': 'v', 'pitch_name': 'n', 'pitch_octave': 'c', 'family': 'f', 'special': 'e'}


In [12]:
model_result_tokens_list = [
    ('s', 9), ('o', 0), ('t', 41), ('t', 42), ('p', 48), ('d', 2), ('v', 20), 
    ('o', 6), ('t', 41), ('p', 42), ('p', 48), ('d', 6), ('v', 20), ('p', 52), 
    ('d', 2), ('v', 20), ('o', 6), ('t', 41), ('i', 0), ('p', 62), ('d', 11), 
    ('v', 20), ('o', 24), ('t', 41), ('i', 42), ('p', 48), ('d', 6), ('v', 20), 
    ('b', 1), ('s', 8), ('o', 0), ('t', 41), ('i', 42), ('p', 48), ('d', 6)
]

import const_lib.musecoco_const as mcc
def musecoco_tokens_list_to_token_list_lines_converter(
    musecoco_tokens_list: list[tuple[str, int]]
):
    lines = []

    line_separators = ['position', 'time_signature']
    line_separators = [reversed_abbrs[key] for key in line_separators]

    current_line = []
    for pair in musecoco_tokens_list:
        key, value = pair

        if key in line_separators:
            if current_line != []:
                lines.append(current_line)
                current_line = []
            else:
                pass
        else:
            current_line.append(pair)

    return lines

token_list_lines = musecoco_tokens_list_to_token_list_lines_converter(model_result_tokens_list)
for line in token_list_lines:
    print(line)

def is_valid_token_list(
        token_list,
        next_acceptable_keys: dict = mcc.next_acceptable_keys,
    ):
    for i in range(len(token_list) - 1):
        key, value = token_list[i]
        next_key = token_list[i + 1][0]

        if next_key not in next_acceptable_keys[key]:
            return False
        
    return True

for line in token_list_lines:
    print(is_valid_token_list(line))

def nearest_valid_line(
    target_line_idx, 
    are_lines_valid: list[bool]
):
    """
        Find the nearest valid line to the target line (previous lines first, then next lines)
        Return the index of the nearest valid line

        target_line_idx: the index of the target line
        are_lines_valid: a list of boolean values to check if the line is valid or not

        Return: the index of the nearest valid line
    """
    nearest_valid_line_idx = -1
    
    for i in range(target_line_idx - 1, -1, -1):
        if are_lines_valid[i]:
            nearest_valid_line_idx = i
            break
        else:
            pass
    if nearest_valid_line_idx == -1:
        for i in range(target_line_idx + 1, len(are_lines_valid)):
            if are_lines_valid[i]:
                nearest_valid_line_idx = i
                break
            else:
                pass
    else:
        pass

    return nearest_valid_line_idx

def fill_missing_by_line(
    musecoco_tokens_list: list[tuple[str, int]]
):
    are_lines_valid = []
    for line in musecoco_tokens_list:
        are_lines_valid.append(is_valid_token_list(line))

    for i in range(len(are_lines_valid)):
        if not are_lines_valid[i]:
            nearest_valid_line_idx = nearest_valid_line(i, are_lines_valid)
            print(f"Fill line {i} by line {nearest_valid_line_idx}")

fill_missing_by_line(token_list_lines)

[('t', 41), ('t', 42), ('p', 48), ('d', 2), ('v', 20)]
[('t', 41), ('p', 42), ('p', 48), ('d', 6), ('v', 20), ('p', 52), ('d', 2), ('v', 20)]
[('t', 41), ('i', 0), ('p', 62), ('d', 11), ('v', 20)]
[('t', 41), ('i', 42), ('p', 48), ('d', 6), ('v', 20), ('b', 1)]
False
False
True
True
Fill line 0 by line 2
Fill line 1 by line 2
