In [None]:
import re

def strings_to_be_replaced(input_string: str, regex: str = r"\[\w+\]"):
    return list(set(re.findall(regex, input_string)))

In [None]:
def read_file(file_path: str) -> str:
    with open(file_path, "r") as file:
        return file.read()

In [None]:
json_str = read_file("../refined_template_vn.json")

print(json_str)

In [None]:
strs_to_be_replaced = strings_to_be_replaced(json_str)

print(strs_to_be_replaced)

In [None]:
genres = {
    'New Age': "new age",
    'Electronic': "điện tử",
    'Rap': 'rap',
    'Religious': 'tôn giáo',
    'International': 'quốc tế',
    'Easy_Listening': 'dễ nghe',
    'Avant_Garde': 'avant-garde',
    'RnB': 'RnB',
    'Latin': 'Latin',
    'Children': 'trẻ em',
    'Jazz': 'jazz',
    'Classical': 'cổ điển',
    'Comedy_Spoken': 'hài kịch',
    'Pop_Rock': 'pop',
    'Reggae': 'reggae',
    'Stage': 'stage',
    'Folk': 'dân ca',
    'Blues': 'blues',
    'Vocal': 'vocal',
    'Holiday': 'holiday',
    'Country': 'đồng quê',
    "Symphony": 'giao hưởng'
}

strs_to_be_replaced.extend(set(genres.keys()))

print(strs_to_be_replaced)

In [None]:
import itertools

replacements = {
    k : f"""[{
        ''.join(
            [
                f"{k[i]}{i % 10}" for i in range(len(k))
                if k[i] not in ['[', ']']
            ]
        )
    }]""" for k in strs_to_be_replaced
} 

print(len(strs_to_be_replaced))
print(len(replacements))
print(replacements)

In [None]:
def replace_by_mapper(text: str, replacement_list: list[tuple]) -> str:
    for k, v in replacement_list:
        print(f"Replacing {k} with {v}")
        text = text.replace(k, v)
    return text

In [None]:
import csv
import json

def json_str_to_csv_file(json_str: str, csv_file_path: str, func_to_apply = None, func_kwargs=None):
    '''
        Convert a JSON string to a CSV file
    '''
    if func_to_apply is not None:
        json_str = func_to_apply(json_str, **func_kwargs)
    else:
        pass

    json_str = json.loads(json_str)

    with open(csv_file_path, "w") as csv_file:
        csv_writer = csv.writer(csv_file)

        count = 0
        for item in json_str:
            if count == 0:
                header = item.keys()
                csv_writer.writerow(header)
                count += 1

            csv_writer.writerow(item.values())

In [None]:
attributes_with_academic_term = [
    {
        "attribute": "[KEY]",
        "prefix": "",
        "postfix": "key",
        "vietnamese": "giọng"
    },
    {
        "attribute": "[TIME_SIGNATURE]",
        "prefix": "",
        "postfix": "time signature",
        "vietnamese": "nhịp"
    }
]

In [None]:
def academic_translation_encoder(attribute_item):
    text = ' '.join([
        attribute_item['prefix'],
        replacements[attribute_item['attribute']],
        attribute_item['postfix']
    ]).strip()

    start = 0
    stop = len(text)
    step = 2
    return text, f"""[{''.join(
            [
                f'{text[i:i+step]}{(i // 2)% 10}'
                for i in range(start, stop, step)
            ]
        )}]"""

In [None]:
academic_replacements = {
    k[0] : k[1]
    for attr in attributes_with_academic_term
    for k in [academic_translation_encoder(attr)]
}

adcademic_replacement_list = [(k, v) for k, v in academic_replacements.items()]

print(academic_replacements)
print(adcademic_replacement_list)

In [None]:
json_str = read_file("../refined_template_vn.json")
csv_file_path = "../refined_template_vn.csv"

replacement_list = [(k, v) for k, v in replacements.items()]
replacement_list.extend(adcademic_replacement_list)

json_str_to_csv_file(json_str, csv_file_path, replace_by_mapper, func_kwargs={"replacement_list" : replacement_list})