In [1]:
refined_template_from_gpt_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/gpt_refined_template_en.json"   

verified_data_dir = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/verified"

In [2]:
midi_lib_path = "/Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/MIDI/midi_lib"

In [3]:
import sys

sys.path.append(midi_lib_path)

import const_lib.musecoco_const as mcc

In [4]:
import json

In [5]:
def load_json_file(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)
    return data

In [6]:
gpt_refined_template = load_json_file(refined_template_from_gpt_path)

print(f"Total templates: {len(gpt_refined_template)}")

Total templates: 4300


In [7]:
def does_response_contain_required_substring_for_attribute(
    response: str, 
    attribute: str
) -> bool:
    """
        Check if the response contains the required substring for the attribute

        Args:
        - response (str): the response to check
        - attribute (str): the attribute to check

        Returns:
        - bool: True if the response contains the required substring for the attribute, False otherwise
    """
    result = False

    if attribute in mcc.required_substring_for_attributes:
        for substring in mcc.required_substring_for_attributes[attribute]:
            if substring.lower() in response.lower():
                result = True
                break

    if attribute in mcc.required_substring_for_positive_and_negative_attributes:
        for substring in mcc.required_substring_for_positive_and_negative_attributes[attribute]:
            if substring.lower() in response.lower():
                result = True and result
                break

    return result

In [8]:
def is_gpt_refined_template_acceptable(gpt_refined_template):
    attrs = gpt_refined_template["attributes"]  

    response = gpt_refined_template["response"]

    for attr in attrs:
        if not does_response_contain_required_substring_for_attribute(response, attr):
            print(f"Response does not contain required substring for attribute: {attr}, response: {response}")
            return False
        
    return True

In [9]:
def filter_acceptable_gpt_refined_templates(
    gpt_refined_templates: list
) -> dict[str, list]:
    acceptable_templates: list = []

    ignored_templates: list = []

    for gpt_refined_template in gpt_refined_templates:
        if is_gpt_refined_template_acceptable(gpt_refined_template):
            acceptable_templates.append(gpt_refined_template)
        else:
            ignored_templates.append(gpt_refined_template)

    return {
        "acceptable_templates": acceptable_templates,
        "ignored_templates": ignored_templates
    }

In [10]:
acceptable_templates_file_path = f"{verified_data_dir}/gpt_acceptable_templates_en.json"
ignored_templates_file_path = f"{verified_data_dir}/gpt_ignored_templates_en.json"

filtered_gpt_refined_templates = filter_acceptable_gpt_refined_templates(gpt_refined_template)

Response does not contain required substring for attribute: S2_1, response: The music resembles the style of The Beatles. The music is in 6/8 time signature. This music is perfect for dancing. Its pitch range covers 4 octaves.
Response does not contain required substring for attribute: S4_0, response: The song doesn't fit the typical jazz style. The song doesn't include guitar and drums. The song doesn't match Mozart's usual sound. The song begins dark and eventually brightens up.
Response does not contain required substring for attribute: I1_0, response: The song has a moderate tempo. The song doesn't include guitar and drums. This song carries a gloomy vibe from beginning to end. The music evokes happiness in the listener. This music isn't suited for dancing.
Response does not contain required substring for attribute: B1_1, response: The song spans around 62 bars. This song maintains a bright feeling from start to finish. This song carries a gloomy vibe from beginning to end. The bea

In [11]:
acceptable_templates = filtered_gpt_refined_templates["acceptable_templates"]
print(f"Accept {len(acceptable_templates)} templates")

ignored_templates = filtered_gpt_refined_templates["ignored_templates"]
print(f"Ignore {len(ignored_templates)} templates")

Accept 4099 templates
Ignore 201 templates


In [12]:
acceptable_templates_with_unique_responses: list = []

already_existing_responses: set = set()

for acceptable_template in acceptable_templates:
    response = acceptable_template["response"]

    if response not in already_existing_responses:
        already_existing_responses.add(response)

        acceptable_templates_with_unique_responses.append(acceptable_template)

acceptable_templates = acceptable_templates_with_unique_responses

print("Total acceptable templates with unique responses: ", len(acceptable_templates))

Total acceptable templates with unique responses:  3121


In [13]:
with open(acceptable_templates_file_path, "w") as f:
    json.dump(acceptable_templates, f, indent=4)

print(f"All acceptable templates: {len(acceptable_templates)}")

print(f"File path: {acceptable_templates_file_path}")

All acceptable templates: 3121
File path: /Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/verified/gpt_acceptable_templates_en.json


In [14]:

with open(ignored_templates_file_path, "w") as f:
    json.dump(ignored_templates, f, indent=4)

print(f"All ignored templates: {len(ignored_templates)}")

print(f"File path: {ignored_templates_file_path}")

All ignored templates: 201
File path: /Users/4rr311/Documents/VectorA/KHTN/Nam4/HKII/Thesis/Brainstorming/Text/text_lib/Ideas/augmentation/data/verified/gpt_ignored_templates_en.json
