# Tests Notebook

Notebook to develop the main code

In [78]:
#                                   Libraries
import os

# to use praat
import parselmouth
from parselmouth import Sound, praat, TextGrid

In [79]:
#                                   Constants
corpus_folder = "corpus/"
auxiliar_folder = "auxiliar/"
labeled_intervals_to_wav = "save_labeled_intervals_to_wav_sound_files.praat"

variable = "test" # variable de test para probar git push

file_name = "auxiliar/manipular-pitch/12345.wav"
script_name = "auxiliar/save_labeled_intervals_to_wav_sound_files.praat"

In [80]:
assert os.path.exists(file_name) and os.path.exists(script_name), "ERROR"

In [81]:
#                                                   Auxiliar Functions

def get_files_from_folder(folder, ext):

    audiofiles = []
    for file in os.listdir(folder):
        if file.endswith(ext):
            audiofiles.append(folder + file)
    return audiofiles

In [82]:
path_to_files = "Frases para Extracción de Difonos/"
marks_files = get_files_from_folder(path_to_files, ".marks")

script_name = "auxiliar/save_labeled_intervals_to_wav_sound_files.praat"
saving_folder = "diphones/"

marks_files

['Frases para Extracción de Difonos/Efre.marks',
 'Frases para Extracción de Difonos/emEkre.marks',
 'Frases para Extracción de Difonos/feketEs.marks',
 'Frases para Extracción de Difonos/frEk.marks',
 'Frases para Extracción de Difonos/ketrEm.marks',
 'Frases para Extracción de Difonos/krEref.marks',
 'Frases para Extracción de Difonos/meresE.marks',
 'Frases para Extracción de Difonos/sefEte.marks',
 'Frases para Extracción de Difonos/tefekE.marks',
 'Frases para Extracción de Difonos/trEt.marks']

In [88]:
def get_marks_content(file):
    '''
    Get the content of a mark file in a dictionary

    :param file: filename with the path
    :return: file_content_dictionary, dictionary with the content of the filename

    '''

    filename = open(file, 'r')
    Lines = filename.readlines()

    # how we are going to split the data of the file
    file_content_dictionary = {
        "time": [],
        "type": [],
        "start": [],
        "end": [],
        "value": []
    }

    for string in Lines:

        # clean characters that we dont want to use
        string = string.replace("{", "")
        string = string.replace("}", "")
        string = string.replace("\"", "")
        string = string.replace("\n", "")

        # split by each value
        str_content = string.split(",")

        # for each content of the string, add it to the dict
        for idx, content in enumerate(str_content):
            # if we have "time":125 we will save the value of the str (125), as we are separating it in 2, by the :
            content_key_from_dict = str_content[idx].split(":")[0]
            content_value = str_content[idx].split(":")[1]

            file_content_dictionary[content_key_from_dict].append(content_value)

    return file_content_dictionary


def get_parameters_from_dictionary(content_dictionary, line_to_start = 14):
    n_file_lines = len(content_dictionary["time"])

    value = []
    start_time = []
    end_time = []

    flag_founded = False
    for line in range(line_to_start, n_file_lines - 1):
        if content_dictionary["type"][line] != "word":

            value.append(content_dictionary["value"][line])

            start_time.append((int(content_dictionary["time"][line])) / 1000)
            end_time.append(int((content_dictionary["time"][line + 1])) / 1000)
        else:
            flag_founded -= True

        if flag_founded: break

    return value, start_time, end_time

def callPraatScript(file_name, script_name, start_interval, end_interval, folder, saving_name):
    # Check that all the parameters exists
    assert os.path.exists(file_name) and os.path.exists(script_name), "ERROR"

    # constant parameters for the script
    tier = 1  # (int) Which IntervalTier in this TextGrid would you like to process?
    Start_from = start_interval  # (int) Starting at which interval?
    End_at = end_interval  # (int) ending at which interval?
    Exclude_empty_labels = True  # (boolean)
    Exclude_intervals_labeled_as_xxx = True  # (boolean)
    Exclude_intervals_starting_with_dot = True  # (boolean)
    positive_Margin = 0.0001  # (float) Give a small margin for the files if you like (seconds)
    sentence_Folder = folder  # Give the folder where to save the sound files
    sentence_Prefix = saving_name  # Give an optional prefix for all filenames:
    sentence_Suffix = ""  # Give an optional suffix for all filenames (.wav will be added anyway):

    # parameters = [tier, Start_from, End_at, Exclude_empty_labels, Exclude_intervals_labeled_as_xxx,
    #              Exclude_intervals_starting_with_dot, positive_Margin, sentence_Folder, sentence_Prefix,
    #              sentence_Suffix]

    snd = Sound(file_name)
    # snd.save("tests/test.wav","WAV")

    tgt = TextGrid(float(start_interval), float(end_interval), "Mary John bell", "bell")
    # tgt.save("tests/test.TextGrid")

    objects = [tgt, snd]

    # So the everything before the script (script_name) are selected objects, Everything after are arguments to this form
    praat.run_file(objects, script_name, tier, Start_from, End_at, Exclude_empty_labels,
                   Exclude_intervals_labeled_as_xxx, Exclude_intervals_starting_with_dot,
                   positive_Margin, sentence_Folder, sentence_Prefix, sentence_Suffix)

In [90]:
marks_files = []
f1 = "Frases para Extracción de Difonos/Efre.marks"
f2 = "Frases para Extracción de Difonos/emEkre.marks"
marks_files.append(f1)
marks_files.append(f2)

for file in marks_files:

    file_content_dictionary = get_marks_content(file)

    file = file.replace(".marks", ".mp3")

    filename = file.split("/")[1].replace(".mp3","")

    value, start, end = get_parameters_from_dictionary(file_content_dictionary, 14)

    print(start, end)
    file.replace(".mp3", "")

    for idx, phone in enumerate(value):
        saving_name = filename + "_" + phone

        print(phone, float(start[idx]), float(end[idx]))
        # callPraatScript(file, script_name, 0.125, 0.187, saving_folder, saving_name)

        callPraatScript(file, script_name, float(start[idx]), float(end[idx]), saving_folder, saving_name)


[0.937, 1.0, 1.062, 1.137] [1.0, 1.062, 1.137, 1.25]
e 0.937 1.0
f 1.0 1.062
t 1.062 1.137


PraatError: There are not that many intervals in the IntervalTier!
Script not completed.

In [96]:
n_file_lines = len(file_content_dictionary["time"])

for line in range(n_file_lines-1):
    if file_content_dictionary["type"][line] != "word":
        print(file_content_dictionary["value"][line])
        start_time = (int(file_content_dictionary["time"][line]))/1000
        end_time = int((file_content_dictionary["time"][line+1]))/1000
        print(start_time, end_time)
        print("=======================")

p
0.125 0.187
a
0.187 0.25
J
0.25 0.312
a
0.312 0.375
t
0.375 0.5
a
0.5 0.562
i
0.562 0.625
t
0.625 0.687
e
0.687 0.812
a
0.812 0.937
e
0.937 1.0
f
1.0 1.062
t
1.062 1.137
e
1.137 1.25
u
1.25 1.312
p
1.312 1.325
p
1.325 1.375
o
1.375 1.487
k
1.487 1.562
o
1.562 1.675


PraatError: Cannot create file “C:\Users\albam\OneDrive - UNIVERSIDAD DE HUELVA\2. Universidad\4º Curso\1. 1º Cuatrimestre\Procesamiento del Habla, Visión e Interacción Multimodal\Proyecto\Sintetizador Concatenativo de Difonos\auxiliar\tests\test.wav”.
Sounds not concatenated and not saved to “C:\Users\albam\OneDrive - UNIVERSIDAD DE HUELVA\2. Universidad\4º Curso\1. 1º Cuatrimestre\Procesamiento del Habla, Visión e Interacción Multimodal\Proyecto\Sintetizador Concatenativo de Difonos\auxiliar\tests\test.wav”.
Script line 88 not performed or completed:
« Write to WAV file... 'intervalfile$' »
Script not completed.