In [1]:
refined_template_from_musecoco_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/musecoco_refined_template_en.json"   

verified_data_dir = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/verified"

In [2]:
midi_lib_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/midi_lib"

In [3]:
import sys

sys.path.append(midi_lib_path)

import const_lib.musecoco_const as mcc

In [4]:
import json

In [5]:
def load_json_file(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)
    return data

In [6]:
musecoco_refined_template: list = load_json_file(refined_template_from_musecoco_path)

print(f"File path: {refined_template_from_musecoco_path}")
print(f"Total templates: {len(musecoco_refined_template)}")

File path: /Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/musecoco_refined_template_en.json
Total templates: 4856


In [7]:
def does_response_contain_required_substring_for_attribute(
    response: str, 
    attribute: str
) -> bool:
    """
        Check if the response contains the required substring for the attribute

        Args:
        - response (str): the response to check
        - attribute (str): the attribute to check

        Returns:
        - bool: True if the response contains the required substring for the attribute, False otherwise
    """
    result = False

    # Main instrument is not specified
    if attribute == "I4_0": 
        return True
    
    if attribute in mcc.required_substring_for_attributes:
        for substring in mcc.required_substring_for_attributes[attribute]:
            if substring.lower() in response.lower():
                result = True
                break

    if attribute in mcc.required_substring_for_positive_and_negative_attributes:
        for substring in mcc.required_substring_for_positive_and_negative_attributes[attribute]:
            if substring.lower() in response.lower():
                result = True and result
                break

    return result

In [8]:
def is_gpt_refined_template_acceptable(gpt_refined_template):
    attrs = gpt_refined_template["attributes"]  

    response = gpt_refined_template["response"]

    for attr in attrs:
        if not does_response_contain_required_substring_for_attribute(response, attr):
            print(f"Response does not contain required substring for attribute: {attr}, response: {response}")
            return False
        
    return True

In [9]:
def filter_acceptable_gpt_refined_templates(
    gpt_refined_templates: list
) -> dict[str, list]:
    acceptable_templates: list = []

    ignored_templates: list = []

    for gpt_refined_template in gpt_refined_templates:
        if is_gpt_refined_template_acceptable(gpt_refined_template):
            acceptable_templates.append(gpt_refined_template)
        else:
            ignored_templates.append(gpt_refined_template)

    return {
        "acceptable_templates": acceptable_templates,
        "ignored_templates": ignored_templates
    }

| **Ký hiệu** | **Mô tả**              |
|-------------|------------------------|
| I1s2        | Instrument             |
| I4          | Main Instrument        |
| R3          | Rhythm Intensity       |
| B1s1        | Bar                    |
| TS1s1       | Time Signature         |
| K1          | Key                    |
| T1s1        | Tempo                  |
| P4          | Pitch Range            |
| TM1         | Time                   |


In [10]:
# {
#     "I1_1": "[INSTRUMENTS] should be included in the music.", 
#     "I1_0": "[INSTRUMENTS] are not featured in this song.", 
#     "P4_1": "Its pitch range is within [RANGE] octaves.", 
#     "C1_0": "This is a song that has a bright feeling from the beginning to the end.", 
#     "C1_1": "This is a song that has a very gloomy feeling from the beginning to the end.", 
#     "C1_2": "The song begins bright and then turns dark.", 
#     "C1_3": "The song begins dark and then brightens up.", 
#     "R1_1": "This music is suitable for dancing.", 
#     "R1_0": "This music is not suitable for dancing.", 
#     "R3_1": "The beat of this song is extremely strong.", 
#     "R3_0": "This song has a very peaceful beat.", 
#     "R3_2": "This song has a moderate beat.", 
#     "S4_1": "The song belongs to the [GENRE] genre.", 
#     "S4_0": "The song does not fit into the conventions of [GENRE] style.", 
#     "S2_1": "The music is in the vein of [ARTIST].",
#     "S2_0": "The song does not conform to [ARTIST]'s typical sound.", 
#     "B1_1": "The song spans approximately [NUM_BARS] bars.", 
#     "TS1_1": "The music is in [TIME_SIGNATURE].", 
#     "TS1_o": "The time signature of this song is not commonly used.", 
#     "K1_1": "This music is composed in the [KEY] key.", 
#     "T1_0": "The tempo of this song is rapid.", 
#     "T1_1": "The tempo of this song is slow.", 
#     "T1_2": "The tempo of this song is moderate.", 
#     "EM1_1": "The music conveys [EMOTION].", 
#     "TM1_1": "This song has a duration of [TM1] seconds."
# }

In [11]:
acceptable_templates_file_path = f"{verified_data_dir}/musecoco_acceptable_templates_en.json"
ignored_templates_file_path = f"{verified_data_dir}/musecoco_ignored_templates_en.json"

filtered_gpt_refined_templates = filter_acceptable_gpt_refined_templates(musecoco_refined_template)

Response does not contain required substring for attribute: TM1_1, response: The key of this music gives it a special emotional quality that is further enhanced by its atypical time signature [TIME_SIGNATURE]. The track has a moderate tempo and an easy-going rhythm that is supported by the featured instruments. The music is imbued with [EMOTION] and is structured with roughly [NUM_BARS] bars. Overall, this song is a unique blend of key, rhythm, tempo, and emotion that creates a captivating musical experience.
Response does not contain required substring for attribute: TM1_1, response: With a pitch range spanning [RANGE] octaves, this music offers a diverse and dynamic listening experience. Its choice of [KEY] key results in a captivating and memorable experience, complemented by a moderate tempo. The incorporation of [INSTRUMENTS] adds depth and richness to the musical composition. Although the time signature of this song is not standard, its quick pace enhances the overall energy. Thr

In [12]:
acceptable_templates = filtered_gpt_refined_templates["acceptable_templates"]
print(f"Accept {len(acceptable_templates)} templates")

ignored_templates = filtered_gpt_refined_templates["ignored_templates"]
print(f"Ignore {len(ignored_templates)} templates")

Accept 4815 templates
Ignore 41 templates


In [13]:
acceptable_templates_with_unique_responses: list = []

already_existing_responses: set = set()

for acceptable_template in acceptable_templates:
    response = acceptable_template["response"]

    if response not in already_existing_responses:
        already_existing_responses.add(response)

        acceptable_templates_with_unique_responses.append(acceptable_template)
    else:
        print(f"Response already exists: {response}")

acceptable_templates = acceptable_templates_with_unique_responses

print(f"Total templates: {len(musecoco_refined_template)}")
print("Total acceptable templates with unique responses: ", len(acceptable_templates))

Total templates: 4856
Total acceptable templates with unique responses:  4815


In [14]:
with open(acceptable_templates_file_path, "w") as f:
    json.dump(acceptable_templates, f, indent=4)

print(f"All acceptable templates: {len(acceptable_templates)}")

print(f"File path: {acceptable_templates_file_path}")

All acceptable templates: 4815
File path: /Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/verified/musecoco_acceptable_templates_en.json


In [15]:

with open(ignored_templates_file_path, "w") as f:
    json.dump(ignored_templates, f, indent=4)

print(f"Ignored templates (except duplicated): {len(ignored_templates)}")

print(f"File path: {ignored_templates_file_path}")

Ignored templates (except duplicated): 41
File path: /Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/verified/musecoco_ignored_templates_en.json
