In [1]:
# CHANGE YOUR PATH HERE
midi_lib_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/midi_lib"

output_dir = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/Ideas/data/output"

log_dir = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/Ideas/hooktheory/output/logs/model_result_log"

In [2]:
import sys

sys.path.append(midi_lib_path)

In [3]:
from datetime import datetime
import json

from const_lib import musecoco_const as mcc

In [4]:
for k, v in mcc.next_acceptable_keys.items():
    print(f"{k}: {v}")

s: ['b', 'o']
o: ['t']
t: ['i']
i: ['p']
p: ['d']
d: ['v']
v: ['i', 'b', 'p', 'o']
b: ['s']


In [5]:
def all_available_structure(next_acceptable_keys):
    '''
        List all possible paths in the graph (if a path meets the start of itself, stop searching on that path)
        Return a list of all possible paths

        next_acceptable_keys: a adjacency list of all possible next keys. Example:
            {
                's': ['b', 'o'],
                'o': ['t'],
                't': ['i'],
                'i': ['p'],
                'p': ['d'],
                'd': ['v'],
                'v': ['i', 'b', 'p', 'o'],
                'b': ['s']
            }

        Return: a list of all possible paths. Example:
            [
                ['s', 'b'],
                ['s', 'o', 't', 'i', 'p', 'd', 'v'],
                ...
            ]
    '''
    paths = []

    def dfs(node: str, path: list):
        if node in path:
            paths.append(path)
            return
        else:
            pass

        path.append(node)
        
        for next_node in next_acceptable_keys[node]:
            dfs(next_node, path.copy())

    for k in next_acceptable_keys.keys():
        dfs(k, [])

    return paths

In [6]:
structures = all_available_structure(mcc.next_acceptable_keys)

for path in structures:
    print(path)

['s', 'b']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['s', 'o', 't', 'i', 'p', 'd', 'v', 'b']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v', 'b', 's']
['o', 't', 'i', 'p', 'd', 'v', 'b', 's']
['o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v', 'b', 's']
['t', 'i', 'p', 'd', 'v', 'b', 's', 'o']
['t', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v', 'o']
['i', 'p', 'd', 'v']
['i', 'p', 'd', 'v', 'b', 's']
['i', 'p', 'd', 'v', 'b', 's', 'o', 't']
['i', 'p', 'd', 'v']
['i', 'p', 'd', 'v', 'o', 't']
['p', 'd', 'v', 'i']
['p', 'd', 'v', 'b', 's']
['p', 'd', 'v', 'b', 's', 'o', 't', 'i']
['p', 'd', 'v']
['p', 'd', 'v', 'o', 't', 'i']
['d', 'v', 'i', 'p']
['d', 'v', 'b', 's']
['d', 'v', 'b', 's', 'o', 't', 'i', 'p']
['d', 'v', 'p']
['d', 'v', 'o', 't', 'i', 'p']
['v', 'i', 'p', 'd']
['v', 'b', 's']
['v', 'b', 's', 'o', 't', 'i', 'p', 'd']
['v', 'p', 'd']
[

In [7]:
def remove_duplicate_structures(
    structures, 
    should_sort_by_length=True, 
    descending_length=True
):
    '''
        Remove duplicate structures in the list of structures
        Return a list of unique structures

        structures: a list of structures. Example:
            [
                ['s', 'b'],
                ['s', 'b'],
                ['s', 'o', 't', 'i', 'p', 'd', 'v'],
                ...
            ]
        
        should_sort_by_length: a boolean value to sort the list of unique structures by length or not. Default is True.

        descending_length: a boolean value to sort the list of unique structures by descending length or not. Default is True.

        Return: a list of unique structures. Example:
            [
                ['s', 'b'],
                ['s', 'o', 't', 'i', 'p', 'd', 'v'],
                ...
            ]
    '''
    # Unique structure
    unique_structures = set([tuple(structure) for structure in structures])
    unique_structures = [list(structure) for structure in unique_structures]

    if should_sort_by_length:
        unique_structures = sorted(
            unique_structures, 
            key=lambda x: len(x), reverse=descending_length
        )
    else:
        pass

    return unique_structures

| **Thuộc tính** | **Tên đầy đủ** | **Mô tả** | **Thuộc tính liền sau** |
|----------------|----------------|-----------|-------------------------|
| s              | Time Signature | Nhịp của bản nhạc | b, o.                  |
| o              | Position       | Thời điểm xuất hiện của sự kiện | t.       |
| t              | Tempo          | Tốc độ bản nhạc | i.                        |
| i              | Instrument     | Nhạc cụ | p.                           |
| p              | Pitch          | Cao độ nốt nhạc | d.                        |
| d              | Duration       | Độ dài nốt nhạc | v.                        |
| v              | Velocity       | Tốc độ nhấn/Lực nhấn phím đàn | i, b, p, o. |
| b              | Bar            | Vạch nhịp | s.                         |


In [8]:
inappropriate_head_keys = ['d', 'v']

inappropriate_tail_keys = ['p', 'd']

In [9]:
unique_structures = remove_duplicate_structures(structures)

appropriate_structures = []

# Remove inappropriate structures (inappropriate head/tail keys)
for new_structure in unique_structures:
    if new_structure[0] in inappropriate_head_keys:
        print(f"Ignore {new_structure}")
    elif new_structure[-1] in inappropriate_tail_keys:
        print(f"Ignore {new_structure}")
    else:
        appropriate_structures.append(new_structure)

print()
print("Appropriate structures:")
for path in appropriate_structures:
    print(path)

Ignore ['v', 'b', 's', 'o', 't', 'i', 'p', 'd']
Ignore ['d', 'v', 'b', 's', 'o', 't', 'i', 'p']
Ignore ['v', 'o', 't', 'i', 'p', 'd']
Ignore ['d', 'v', 'o', 't', 'i', 'p']
Ignore ['v', 'i', 'p', 'd']
Ignore ['d', 'v', 'i', 'p']
Ignore ['d', 'v', 'b', 's']
Ignore ['v', 'p', 'd']
Ignore ['v', 'b', 's']
Ignore ['d', 'v', 'p']

Appropriate structures:
['i', 'p', 'd', 'v', 'b', 's', 'o', 't']
['s', 'o', 't', 'i', 'p', 'd', 'v', 'b']
['o', 't', 'i', 'p', 'd', 'v', 'b', 's']
['p', 'd', 'v', 'b', 's', 'o', 't', 'i']
['t', 'i', 'p', 'd', 'v', 'b', 's', 'o']
['b', 's', 'o', 't', 'i', 'p', 'd', 'v']
['t', 'i', 'p', 'd', 'v', 'b', 's']
['s', 'o', 't', 'i', 'p', 'd', 'v']
['o', 't', 'i', 'p', 'd', 'v']
['i', 'p', 'd', 'v', 'b', 's']
['t', 'i', 'p', 'd', 'v', 'o']
['p', 'd', 'v', 'o', 't', 'i']
['i', 'p', 'd', 'v', 'o', 't']
['p', 'd', 'v', 'b', 's']
['t', 'i', 'p', 'd', 'v']
['i', 'p', 'd', 'v']
['p', 'd', 'v', 'i']
['p', 'd', 'v']
['s', 'b']
['b', 's']


In [10]:
golden_head = 's'
golden_tail = 'v'

golden_structures: list[list[str]] = []

for new_structure in appropriate_structures:
    if new_structure[0] == golden_head and new_structure[-1] == golden_tail:
        golden_structures.append(new_structure)

golden_structure = min(golden_structures, key=lambda x: len(x))

print(f"Golden Structure: {golden_structure}")

Golden Structure: ['s', 'o', 't', 'i', 'p', 'd', 'v']


In [11]:
abbreviations = {
    "b": "bar",
    "o": "position",
    "s": "time_signature",
    "t": "tempo",
    "i": "instrument",
    "p": "pitch",
    "d": "duration",
    "v": "velocity",
    "n": "pitch_name",
    "c": "pitch_octave",
    "f": "family",
    "e": "special"
}

In [12]:
reversed_abbrs = {v: k for k, v in abbreviations.items()}
print(reversed_abbrs)

{'bar': 'b', 'position': 'o', 'time_signature': 's', 'tempo': 't', 'instrument': 'i', 'pitch': 'p', 'duration': 'd', 'velocity': 'v', 'pitch_name': 'n', 'pitch_octave': 'c', 'family': 'f', 'special': 'e'}


In [13]:
musecoco_str = "d-6 v-20 o-48 t-25 p-68 d-27 t-36 i-40 p-72 i-24 p-64 d-6 v-20 o-48 t-39 i-25 p-57 p-67 d-6 v-20 o-36 i-0 p-62 d-6 v-20 t-29 d-6 v-20 t-38 i-90 i-22 i-52 p-69 d-3 v-20 t-25 i-16 d-6 v-20 t-36 i-0 p-57 d-12 v-20 t-39 i-52 t-36 i-16 p-69 d-6 v-20 t-26 p-62 d-27 v-17 o-48 t-33 i-40 p-68 d-6 v-20 t-36 i-3 p-60 v-17 o-48 t-33 i-29 p-64 d-6 v-20 t-38 t-30 i-2 p-74 t-29 p-71 d-6 v-20 t-31 d-6 v-20 t-41 i-24 p-58 i-22 p-62 d-6 v-20 t-35 i-41 p-67 d-18 v-17 o-0 t-35 i-24 p-53 d-16 i-3 p-65 d-3 v-20 t-28 i-0 i-0 p-62 d-27 v-17 t-39 i-0 p-71 d-9 v-20 t-36 i-0 p-27 d-27 v-17 o-0 t-29 i-41 p-55 i-16 d-27 v-17 o-96 d-12 v-17 t-36 i-27 t-36 i-52 d-18 v-17 t-30 i-25 p-62 p-62 d-18 v-20 t-36 i-40 p-35 d-27 v-17 t-35 i-24 p-60 d-6 v-20 t-38 d-18 v-17 t-36 i-2 p-70 p-69 d-6 v-20 t-36 i-0 p-57 d-18 i-24 p-51 d-27 v-17 t-36 i-61 p-62 d-6 v-17 t-41 i-3 d-6 v-20 t-38 i-21 p-74 v-17 t-36 i-2 p-72 v-20 t-41 d-6 v-20 t-38 i-16 p-70 d-3 v-20 t-38 i-27 p-70 t-33 i-90 p-28 d-6 v-20 t-41 i-25 i-16 p-66 p-55 d-18 v-17 t-36 i-61 p-62 d-6 v-17 o-0 t-35 p-59 p-60 v-17 t-36 i-52 t-38 d-21 v-17 t-29 i-16 i-53 p-55 d-27 v-17 t-31 d-27 v-17 o-69 t-35 d-15 v-17 t-38 p-62 d-15 d-27 v-17 t-48 i-61 p-71 v-17 t-35 i-41 p-71 d-6 v-17 t-36 i-3 d-12 v-17 d-12 d-27 v-17 t-29 i-8 p-62 d-27 v-17 t-27 p-60 d-27 v-17 t-34 i-0 p-65 d-18 v-17 o-72 t-36 i-4 p-67 d-27 v-17 t-38 i-90 d-6 v-17 t-36 i-3 i-41 p-66 d-18 v-17 t-36 i-3 p-67 d-27 v-17 t-37 i-53 p-62 d-12 v-17 t-42 i-90 p-59 d-18 v-17 t-35 i-8 t-34 i-61 i-0 p-68 d-18 v-17 t-40 i-61 d-6 v-17 t-39 p-65 d-27 v-17 t-35 i-0 v-17 t-36 i-41 p-55 d-27 v-17 t-35 i-90 p-26 v-17 t-35 p-64 d-18 v-17 t-42 d-15 v-17 t-38 p-67 d-18 v-17 t-39 i-90 p-34 d-27 v-17 t-38 i-90 d-9 v-20 t-24 i-21 p-55 i-16 p-67 d-21 v-17 t-32 i-90 p-69 d-24 p-61 d-18 v-17 t-24 i-90 d-18 v-17 t-35 i-61 p-48 d-12 v-20 t-35 i-0 p-31 d-27 v-17 t-35 i-8 p-60 d-27 v-17 t-29 p-77 p-63 p-63 d-6 v-17 t-33 i-21 p-55 v-17 t-23 v-17 t-36 i-0 p-60 d-27 v-17 t-36 i-2 p-59 d-12 v-17 d-27 v-17 d-12 v-17 t-32 i-4 p-56 d-27 v-17 t-33 p-74 d-18 v-17 t-33 d-27 v-17 t-36 i-2 t-35 i-90 p-59 d-27 v-17 t-36 i-8 p-29 d-18 v-17 t-36 i-90 p-55 d-27 v-17 t-35 i-8 p-58 p-55 d-27 v-17 t-36 i-61 p-62 d-18 v-17 t-38 i-21 i-21 p-37 d-27 v-17 t-36 i-0 p-59 d-12 v-17 t-36 i-61 p-52 d-27 v-17 t-35 i-41 p-32 v-17 t-35 i-3 p-59 d-18 v-17 t-36 i-3 d-12 v-17 d-27 v-17 t-38 i-90 d-18 i-90 i-90 p-60 d-18 v-17 t-35 d-18 v-17 t-35 i-2 d-27 v-17 t-36 i-90 p-68 d-27 v-17 t-35 i-4 v-17 t-36 i-41 p-57 v-17 t-33 d-27 v-17 t-32 i-90 d-27 v-17 t-32 i-90 p-51 d-27 v-17 t-33 d-18 v-17 t-35 i-8 p-60 d-27 v-17 t-37 v-17 t-36 i-3 p-67 d-18 i-4 d-18 v-17 t-35 i-4 p-65 d-27 v-17 t-40 i-4 d-27 v-17 t-36 i-0 p-61 d-18 v-17 t-35 d-15 v-17 t-33 i-61 p-63 d-18 v-17 d-27 v-17 t-33 i-8 p-70 d-12 v-17 t-39 d-27 v-17 t-33 d-37 v-17 t-36 i-21 p-52 d-18 v-17 t-35 i-4 i-21 p-67 d-27 v-17 t-36 d-27 v-17 t-33 d-18 v-17 t-33 i-3 i-0 v-17 t-36 v-17 t-43 d-18 v-17 t-29 i-41 i-8 d-6 v-17 t-36 i-41 p-62 v-17 t-35 i-21 i-3 i-41 p-63 d-27 v-17 t-35 i-41 p-74 v-17 t-36 i-61 p-59 d-12 v-17 t-33 v-17 t-41 i-41 t-37 i-21 p-62 d-27 v-17 t-32 i-8 p-65 i-21 i-21 p-71 d-18 i-3 t-30 i-2 i-61 p-57 p-57 d-18 v-17 t-34 i-2 d-27 v-17 t-33 i-3 d-18 v-17 d-27 v-17 t-36 i-3 p-65 d-18 v-17 t-41 d-18 v-17 t-33 i-90 p-61 i-3 p-70 d-27 v-17 t-37 i-3 p-31 i-8 p-36 d-27 v-17 t-36 p-76 d-27 v-17 t-35 d-18 v-17 t-37 i-3 p-59 p-59 p-33 d-27 v-17 t-37 i-41 p-56 d-18 v-17 t-35 i-16 d-27 v-17 t-36 i-41 p-61 d-18 v-17 i-61 p-30 v-17 d-27 v-17 t-33 i-21 p-67 d-27 v-17 t-35 i-8 p-68 d-27 v-17 t-36 v-17 t-36 i-8 p-61 d-15 v-17 t-36 i-0 p-67 d-18 v-17 t-33 i-3 p-57 d-18 v-17 t-38 d-18 v-17 t-38 i-3 p-38 d-18 v-17 t-36 i-16 p-71 d-27 v-17 t-43 d-18 v-17 t-35 v-17 v-17 t-24 i-8 i-8 p-59 d-27 v-17 t-41 d-18 v-17 t-33 p-31 p-66 d-12 v-17 i-61 d-27 v-17 t-36 i-90 v-17 t-33 i-61 v-17 t-36 i-41 d-18 v-17 i-61 d-12 v-17 t-33 v-17 t-33 i-61 p-65 v-17 t-36 i-61 d-12 v-17 t-33 i-3 i-2 p-62 d-27 v-17 t-35 i-2 i-2 p-67 d-27 v-17 t-38 i-2 p-60 d-18 v-17 t-41 i-2 d-27 v-17 t-23 i-3 p-76 d-27 v-17 t-33 d-27 v-17 t-35 i-2 p-28 d-37 v-17 t-33 d-27 v-17 t-43 i-16 p-35 d-27 v-17 t-43 d-18 v-17 t-33 i-8 d-27 v-17 t-33 v-17 t-36 i-61 p-35 d-27 v-17 t-36 i-41 p-33 d-18 v-17 v-17 t-29 i-2 p-31 d-6 v-20 t-36 i-41 p-65 d-27 v-17 i-4 d-27 v-17 d-27 v-17 t-30 i-16 p-68 d-12 v-17 t-23 v-17 t-41 i-4 p-39 p-63 p-60 d-27 v-17 t-37 i-8 p-36 d-18 v-17 t-33 i-90 p-59 d-27 v-17 t-36 i-3 p-52 d-27 v-17 t-30 v-17 t-36 i-3 d-27 v-17 t-36 i-3 v-17 t-33 i-61 v-17 t-35 i-4 p-59 d-27 v-17 v-17 t-33 d-18 v-17 t-35 i-3 p-55 d-18 v-17 t-33 i-8 d-27 v-17 t-36 i-90 p-60 d-27 v-17 t-30 i-61 i-8 p-67 d-18 v-17 t-33 i-90 p-33 d-27 v-17 t-37 i-8 p-62 d-18 v-17 t-36 i-4 i-90 d-18 v-17 t-37 v-17 t-41 p-64 d-27 v-17 t-35 i-2 p-60 d-18 v-17 t-36 i-16 d-12 v-17 t-33 i-61 v-17 t-29 v-17 t-38 v-17 t-33 i-3 d-12 v-20 t-28 i-2 p-37 t-37 i-41 p-33 d-18 v-17 d-27 v-17 t-33 d-27 v-17 t-36 i-0 i-21 p-55 d-27 v-17 t-33 d-25 v-17 t-33 d-18 v-17 t-35 i-41 d-18 v-17 t-33 v-17 t-35 i-21 p-53 p-65 d-27 v-17 t-33 p-28 i-41 v-17 v-17 t-36 i-2 v-17 t-33 i-90 p-38 d-18 v-17 t-40 i-21 d-18 v-17 t-33 i-61 v-17 t-42 d-18 v-17 t-35 i-4 p-57 d-27 v-17 t-36 d-27 v-17 d-15 d-27 v-17 t-36 i-3 p-69 d-18 v-17 t-33 v-17 t-33 v-17 t-36 i-61 p-34 d-27 v-17 t-33 v-17 t-33 v-17 t-33 i-61 p-31 d-27 v-17 t-34 i-21 d-18 d-18 v-17 t-33 d-18 v-17 t-41 i-16 p-69 d-27 v-17 t-35 i-8 p-38 d-18 v-17 t-33 i-21 p-38 v-17 t-33 i-3 p-53 d-12 v-17 t-33 i-8 p-31 d-27 v-17 i-41 p-66 d-27 v-17 t-37 i-3 d-27 v-17 t-33 i-41 p-65 d-27 v-17 d-18 v-17 t-33 i-21 i-21 d-27 v-17 d-18 v-17 t-36 i-16 p-35 i-3 v-17 t-29 d-18 v-17 t-33 i-21 d-18 v-17 t-31 v-17 t-33 d-6 v-17 t-33 i-3 p-27 v-17 t-38 i-41 v-17 i-8 p-60 v-17 t-36 i-41 p-70 d-18 v-17 d-27 v-17 t-33 i-41 p-69 d-18 v-17 t-33 d-18 v-17 t-30 i-21 p-38 d-18 v-17 t-36 i-4 d-27 v-17 t-36 i-21 p-63 i-21 v-17 t-41 i-3 d-27 v-17 t-33 i-8 d-27 v-17 t-28 i-3 p-63 d-15 v-17 t-32 i-3 p-68 d-27 v-17 t-36 i-3 p-28 i-4 p-62 v-17 t-42 i-3 p-55 d-12 v-17 t-33 i-61 v-17 t-35 i-41 i-41 d-27 v-17 t-36 i-2 v-17 t-33 i-16 p-56 v-17 t-36 i-2 p-62 d-18 v-17 t-36 i-41 d-27 v-17 t-33 v-17 i-61 p-39 i-90 v-17 t-41 d-18 v-17 i-61 d-27 v-17 t-23 v-17 t-33 d-18 v-17 t-30 i-21 p-59 d-18 v-17 t-33 i-90 d-18 v-17 t-42 i-41 p-57 v-17 t-35 i-21 p-57 d-18 v-17 t-29 d-27 v-17 t-33 i-90 d-27 v-17 t-36 i-61 v-17 t-36 i-2 v-17 t-36 i-4 p-66 i-90 d-18 v-17 v-17 t-33 d-18 v-17 t-33 i-4 p-68 d-27 v-17 t-41 i-8 d-27 v-17 t-36 i-3 p-73 i-3 p-65 d-27 v-17 t-33 i-90 p-67 i-2 i-2 p-60 d-18 v-17 t-33 i-21 d-6 p-77 v-17 t-33 i-2 i-2 t-33 i-90 p-61 d-18 v-17 t-43 v-17 t-42 d-27 v-17 i-4 p-57 d-18 v-17 t-33 i-61 v-17 t-33 i-21 d-27 v-17 t-33 i-3 i-3 d-18 v-17 i-21 p-36 d-27 v-17 t-36 i-8 p-33 d-27 v-17 t-33 i-8 p-59 d-27 v-17 t-33 i-90 d-18 v-17 t-33 d-18 v-17 t-35 i-3 p-57 d-12 v-17 i-90 p-69 d-18 d-18 p-64 d-18 v-17 t-36 d-27 v-17 t-33 i-90 d-18 d-18 v-17 t-33 i-4 p-31 d-18 i-3 i-21 p-34 v-17 i-2 p-36 v-17 t-31 d-15 v-17 t-36 i-90 d-27 v-17 d-27 v-17 t-39 i-2 v-17 t-33 d-18 v-17 t-41 i-21 p-31 v-17 t-31 i-90 d-27 v-17 t-36 i-21 i-61 d-27 v-17 t-32 i-21 p-33 d-18 v-17 t-36 i-3 t-33 i-90 p-60 d-18 v-17 t-29 i-16 p-34 d-18 v-17 t-45 d-27 v-17 t-36 i-21 p-62 d-27 v-17 t-33 v-17 t-36 i-0 v-17 t-36 d-27 v-17 d-27 v-17 d-12 v-17 t-35 i-8 p-57 i-4 v-17 t-25 i-3 p-61 d-12 v-17 t-42 i-21 i-4 v-17 t-29 d-27 v-17 t-33 d-27 v-17 t-39 i-3 p-57 d-18 v-17 i-21 p-57 d-27 v-17 t-33 i-3 i-21 p-31 d-27 v-17 d-27 v-17 t-36 i-2 v-17 t-33 i-21 d-27 v-17 t-33 d-18 v-17 t-33 i-3 d-27 v-17 t-32 d-18 v-17 t-38 v-17 t-33 i-3 p-70 d-18 v-17 t-36 i-3 p-67 d-27 v-17 d-27 v-17 t-36 i-4 p-64 d-27 v-17 d-27 v-17 t-33 v-17 t-33 i-4 p-32 d-18 v-17 t-36 v-17 t-33 i-3 p-67 d-27 v-17 t-35 i-41 d-27 v-17 t-33 v-17 i-0 p-42 d-18 v-17 t-33 i-2 p-52 p-55 d-12 v-17 t-36 i-4 p-39 d-18 v-17 t-30 p-62 d-18 v-17 d-27 v-17 t-35 i-3 p-66 d-6 v-17 t-33 d-18 v-17 t-39 i-90 p-64 d-18 v-17 t-33 d-18 p-60 d-27 v-17 t-33"
# musecoco_str = "s-0 s-1 o-8 t-8 i-5 p-2 d-0 v-10 s-5 s-8 k-9"

token_list: list[tuple[str, int]] = [
    (token.split("-")[0], int(token.split("-")[1])) for token in musecoco_str.split(" ")
]

print(token_list)

[('d', 6), ('v', 20), ('o', 48), ('t', 25), ('p', 68), ('d', 27), ('t', 36), ('i', 40), ('p', 72), ('i', 24), ('p', 64), ('d', 6), ('v', 20), ('o', 48), ('t', 39), ('i', 25), ('p', 57), ('p', 67), ('d', 6), ('v', 20), ('o', 36), ('i', 0), ('p', 62), ('d', 6), ('v', 20), ('t', 29), ('d', 6), ('v', 20), ('t', 38), ('i', 90), ('i', 22), ('i', 52), ('p', 69), ('d', 3), ('v', 20), ('t', 25), ('i', 16), ('d', 6), ('v', 20), ('t', 36), ('i', 0), ('p', 57), ('d', 12), ('v', 20), ('t', 39), ('i', 52), ('t', 36), ('i', 16), ('p', 69), ('d', 6), ('v', 20), ('t', 26), ('p', 62), ('d', 27), ('v', 17), ('o', 48), ('t', 33), ('i', 40), ('p', 68), ('d', 6), ('v', 20), ('t', 36), ('i', 3), ('p', 60), ('v', 17), ('o', 48), ('t', 33), ('i', 29), ('p', 64), ('d', 6), ('v', 20), ('t', 38), ('t', 30), ('i', 2), ('p', 74), ('t', 29), ('p', 71), ('d', 6), ('v', 20), ('t', 31), ('d', 6), ('v', 20), ('t', 41), ('i', 24), ('p', 58), ('i', 22), ('p', 62), ('d', 6), ('v', 20), ('t', 35), ('i', 41), ('p', 67), ('d'

In [14]:
print("/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/Ideas/vocab_manager_dict.json")

/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/Ideas/vocab_manager_dict.json


In [15]:
model_result_tokens_list = token_list

import const_lib.musecoco_const as mcc
import random

def musecoco_tokens_list_to_token_list_lines_converter(
    musecoco_tokens_list: list[tuple[str, int]],
    line_separators: list[str] = ['position', 'time_signature']
):
    """
        Convert a list of musecoco tokens to a list of lines using line separators (keep the separators in their own). Each line is a list of musecoco tokens.

        musecoco_tokens_list: a list of musecoco tokens. Example: [('i', 40), ('p', 68), ('d', 6), ('v', 20)].

        line_separators: a list of keys to separate lines. Default is ['position', 'time_signature'].

        Return: a list of lines. Each line is a list of musecoco tokens. Example: [[('i', 40), ('p', 68)], [('d', 6), ('v', 20)]].
    """
    lines = []

    line_separators = ['position', 'time_signature']
    line_separators = [reversed_abbrs[key] for key in line_separators]

    current_line = [musecoco_tokens_list[0]]

    for i in range(1, len(musecoco_tokens_list)):
        key, value = musecoco_tokens_list[i]

        if key in line_separators:
            lines.append(current_line)
            current_line = [musecoco_tokens_list[i]]
        else:
            current_line.append(musecoco_tokens_list[i])

    if len(current_line) > 0:
        lines.append(current_line)
    else:
        pass

    return lines

def is_token_list_valid(
    token_list: list[tuple[str, int]],
    next_acceptable_keys: dict[str, list[str]] = mcc.next_acceptable_keys,
):
    """
        Check if the list of tokens is valid or not by checking if the next key of each key in the list is in the adjacency list of its next acceptable keys

        token_list: a list of tokens. Example: [('i', 40), ('p', 68), ('d', 6), ('v', 20)]

        next_acceptable_keys: a adjacency list of all possible next keys. Example: {'s': ['b', 'o'], 'o': ['t'], ...}

        Return: a boolean value to check if the list of tokens is valid or not
    """
    for i in range(len(token_list) - 1):
        key, value = token_list[i]
        next_key = token_list[i + 1][0]

        if next_key not in next_acceptable_keys[key]:
            return False
        
    return True

def nearest_valid_line(
    target_line_idx, 
    are_lines_valid: list[bool]
):
    """
        Find the nearest valid line to the target line (previous lines first, then next lines)
        Return the index of the nearest valid line

        target_line_idx: the index of the target line to find the nearest valid line
        are_lines_valid: a list of boolean values to check if the line is valid or not

        Return: the index of the nearest valid line
    """
    nearest_valid_line_idx = -1
    
    for i in range(target_line_idx - 1, -1, -1):
        if are_lines_valid[i]:
            nearest_valid_line_idx = i
            break
        else:
            pass
    if nearest_valid_line_idx == -1:
        for i in range(target_line_idx + 1, len(are_lines_valid)):
            if are_lines_valid[i]:
                nearest_valid_line_idx = i
                break
            else:
                pass
    else:
        pass

    result = nearest_valid_line_idx if nearest_valid_line_idx >= 0 else None

    return result

def copy_line_a_structure_to_line_b(
    musecoco_tokens_list_a: list[tuple[str, int]],
    musecoco_tokens_list_b: list[tuple[str, int]]
):
    """
        Copy the structure (the keys) of line a to line b. Use the value from line b if the target key exists in line b.
    """
    new_line_b = musecoco_tokens_list_a.copy()

    old_line_b_dict = {k: v for k, v in musecoco_tokens_list_b}
    
    for i in range(len(new_line_b)):
        key, value = new_line_b[i]
        if key in old_line_b_dict.keys():
            new_line_b[i] = (key, old_line_b_dict[key])
        else:
            # Keep the original value of line b
            pass

    return new_line_b

def are_all_list_elements_equal_to_value(
    list_to_check: list,
    value
):
    for element in list_to_check:
        if element != value:
            return False
        else:
            pass

    return True

def token_list_to_key_only_str(token_list: list[tuple[str, int]]):
    return "".join([pair[0] for pair in token_list])

def fix_error_for_first_token_line(
    musecoco_token_lines: list[list[tuple[str, int]]],
    new_structure: list[str] = golden_structure,
    inappropriate_head_keys: list[str] = ['d', 'v'],
):
    """
        Fix the error for the first line of the list of musecoco tokens. If the first line is invalid, fill it with a suitable structure.

        musecoco_token_lines: a list of musecoco tokens. Example: 
        
            [
                [('i', 40), ('p', 68)], 
                
                [('d', 6), ('v', 20)]
            ]

        new_structure: a list of keys to fill the first line. Default is the golden structure.

        inappropriate_head_keys: a list of keys that are inappropriate to be the head key of the first line. Default is ['d', 'v'].

        Return: a list of musecoco tokens with the first line is valid.
    """
    is_first_line_valid = is_token_list_valid(musecoco_token_lines[0])

    is_first_key_valid = musecoco_token_lines[0][0][0] not in inappropriate_head_keys
    
    if is_first_line_valid and is_first_key_valid:
        pass
    else:
        new_first_line: list[tuple[str, int]] = []

        values_for_each_key = {
            key: set()
            for key in new_structure
        }

        for line in musecoco_token_lines:
            for key, value in line:
                values_for_each_key[key].add(value)

        for key in new_structure:
            if len(values_for_each_key[key]) > 0:
                pass
            else:
                print(f"Key {key} has no value. Fill with 0.")
                
                # Brainstorming/MIDI/Ideas/vocab_manager_dict.json
                four_four_time_signature = 9
                
                values_for_each_key[key].add(
                    four_four_time_signature
                )

            values_for_each_key[key] = list(values_for_each_key[key])

        for key in new_structure:
            n_values_for_key = len(values_for_each_key[key])
            
            start_idx = 0
            end_idx = n_values_for_key - 1 if n_values_for_key > 1 else 0

            random_value_idx = random.randint(start_idx, end_idx)

            random_value = values_for_each_key[key][random_value_idx]

            new_first_line.append((key, random_value))

        musecoco_token_lines[0] = new_first_line

    return musecoco_token_lines

def nearest_line_fill(
    musecoco_token_lines: list[list[tuple[str, int]]]
):
    """
        Fill the missing lines by the nearest valid line's structure. If the first line is invalid, fill it with a suitable structure.

        musecoco_token_lines: a list of musecoco tokens. Example: [[('i', 40), ('p', 68)], [('d', 6), ('v', 20)]]

        Return: a list of musecoco tokens with all lines are valid.
    """
    are_lines_valid = []
    for line in musecoco_token_lines:
        are_lines_valid.append(is_token_list_valid(line))

    if are_all_list_elements_equal_to_value(are_lines_valid, True):
        return musecoco_token_lines
    elif are_all_list_elements_equal_to_value(are_lines_valid, False):
        print("All lines are invalid. Cannot apply by_line_nearest_fill.")
        return None
    else:
        # Fix the first line if it is invalid
        musecoco_token_lines = fix_error_for_first_token_line(musecoco_token_lines)

        for i in range(len(are_lines_valid)):
            if not are_lines_valid[i]:
                nearest_valid_line_idx = nearest_valid_line(i, are_lines_valid)
                
                print(f"Fill line {i} by line {nearest_valid_line_idx}'s structure")
                if nearest_valid_line_idx is not None:
                    musecoco_token_lines[i] = copy_line_a_structure_to_line_b(
                        musecoco_token_lines[nearest_valid_line_idx],
                        musecoco_token_lines[i]
                    )

                    # Validate the line's tail key (if it is invalid, choose a suitable structure to fill)
                else:
                    raise ValueError(f"fill_missing_by_line: Cannot find any valid line to fill line {i}")

    return musecoco_token_lines

token_list_lines = musecoco_tokens_list_to_token_list_lines_converter(
    model_result_tokens_list
)

print(f"Original token list: {model_result_tokens_list}")

print(f"Token list lines: {token_list_lines}")

for i in range(len(token_list_lines)):
    print(f"Line {i}: {is_token_list_valid(token_list_lines[i])} {token_list_lines[i]}")

fill_result = nearest_line_fill(token_list_lines)

print()
print("Result:")

for line in fill_result:
    print(line)

Original token list: [('d', 6), ('v', 20), ('o', 48), ('t', 25), ('p', 68), ('d', 27), ('t', 36), ('i', 40), ('p', 72), ('i', 24), ('p', 64), ('d', 6), ('v', 20), ('o', 48), ('t', 39), ('i', 25), ('p', 57), ('p', 67), ('d', 6), ('v', 20), ('o', 36), ('i', 0), ('p', 62), ('d', 6), ('v', 20), ('t', 29), ('d', 6), ('v', 20), ('t', 38), ('i', 90), ('i', 22), ('i', 52), ('p', 69), ('d', 3), ('v', 20), ('t', 25), ('i', 16), ('d', 6), ('v', 20), ('t', 36), ('i', 0), ('p', 57), ('d', 12), ('v', 20), ('t', 39), ('i', 52), ('t', 36), ('i', 16), ('p', 69), ('d', 6), ('v', 20), ('t', 26), ('p', 62), ('d', 27), ('v', 17), ('o', 48), ('t', 33), ('i', 40), ('p', 68), ('d', 6), ('v', 20), ('t', 36), ('i', 3), ('p', 60), ('v', 17), ('o', 48), ('t', 33), ('i', 29), ('p', 64), ('d', 6), ('v', 20), ('t', 38), ('t', 30), ('i', 2), ('p', 74), ('t', 29), ('p', 71), ('d', 6), ('v', 20), ('t', 31), ('d', 6), ('v', 20), ('t', 41), ('i', 24), ('p', 58), ('i', 22), ('p', 62), ('d', 6), ('v', 20), ('t', 35), ('i',