In [2]:
import numpy as np

In [3]:
from ExtractData import *

In [4]:
path = '../Data/DoodleSample'

In [5]:
all_four_part_harmonies = coconet_harmony(path, 1)

In [6]:
def sort_phrase(phrase):
    """
    A function that sorts an unordered phrase into a four-bar phrase.
    
    Args:
        - phrase (tuple): a tuple of lists of the form (pitches, note_start_step, note_end_step, instruments).
        
    Returns:
        - A NumPy array of the four-part harmony sorted by time. 
        
    """
    # Assign the lists in the tuple
    pitch_list, note_start_list, note_end_list, instrument_list = phrase

    # Initialise a 4x32 NumPy array filled with zeros
    phrase_array = np.zeros((4, 32), dtype=int)

    # Iterate through the data and fill the array
    for i in range(len(instrument_list)):
        instrument = instrument_list[i]   # Adjust to zero-based index
        pitch = pitch_list[i]
        start_step = note_start_list[i]
        end_step = note_end_list[i]

        # Fill the array with pitch values for the specified range of steps
        phrase_array[instrument, start_step:end_step + 1] = pitch

    return phrase_array

In [7]:
sort_phrase(all_four_part_harmonies[0])

array([[71, 71, 69, 69, 74, 74, 74, 74, 72, 72, 71, 71, 67, 67, 71, 71,
        69, 69, 69, 69, 71, 71, 71, 71, 69, 69, 69, 69, 69, 69, 69, 69],
       [67, 67, 67, 67, 69, 69, 69, 69, 67, 67, 66, 66, 67, 67, 67, 64,
        64, 64, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67],
       [62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 62, 62, 67, 67, 55, 67,
        57, 57, 57, 57, 62, 62, 62, 62, 64, 64, 64, 64, 57, 57, 62, 62],
       [43, 43, 43, 43, 55, 55, 54, 54, 52, 52, 50, 50, 50, 52, 50, 50,
        48, 48, 48, 48, 47, 47, 47, 47, 49, 49, 45, 45, 50, 50, 50, 50]])

In [8]:
def clean_sorted_phrases(all_harmonies):
    """
    A function to clean sorted phrases.
    We remove any dupliccate phrases or phrases that are not the full length (4 bars/32 quarter notes).
    
    Args:
        - all_harmonies (list): a list of all phrases.
        
    Returns:
        - A list of arrays of sorted phrases.
    """
    unique_melodies = []
    cleaned_and_sorted_phrases = []

    for phrase in all_harmonies:
        if phrase[2][-1] == 32:
            four_part_harmony = sort_phrase(phrase)
            melody = tuple(four_part_harmony[0])
            if melody not in unique_melodies:
                unique_melodies.append(melody)
                cleaned_and_sorted_phrases.append(four_part_harmony)
    
    return cleaned_and_sorted_phrases

In [9]:
all_cleaned_phrases = clean_sorted_phrases(all_four_part_harmonies)

In [10]:
def get_cleaned_phrases(path, num_users):
    """
    A function that extracts, sorts, and cleans the phrases.
    
    Args:
        - path (str): The path to the JSON file.
        
    Returns:
        - A list of arrays of suitable phrases to use.
    """
    
    all_four_part_harmonies = coconet_harmony(path, num_users)
    all_cleaned_phrases = clean_sorted_phrases(all_four_part_harmonies)
    
    return all_cleaned_phrases

In [11]:
get_cleaned_phrases(path, 1)

[array([[71, 71, 69, 69, 74, 74, 74, 74, 72, 72, 71, 71, 67, 67, 71, 71,
         69, 69, 69, 69, 71, 71, 71, 71, 69, 69, 69, 69, 69, 69, 69, 69],
        [67, 67, 67, 67, 69, 69, 69, 69, 67, 67, 66, 66, 67, 67, 67, 64,
         64, 64, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67],
        [62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 62, 62, 67, 67, 55, 67,
         57, 57, 57, 57, 62, 62, 62, 62, 64, 64, 64, 64, 57, 57, 62, 62],
        [43, 43, 43, 43, 55, 55, 54, 54, 52, 52, 50, 50, 50, 52, 50, 50,
         48, 48, 48, 48, 47, 47, 47, 47, 49, 49, 45, 45, 50, 50, 50, 50]]),
 array([[71, 71, 69, 69, 74, 74, 74, 74, 72, 72, 71, 71, 67, 67, 71, 71,
         69, 69, 69, 69, 72, 72, 76, 76, 69, 69, 74, 74, 67, 67, 72, 72],
        [67, 67, 67, 67, 69, 69, 69, 69, 67, 67, 65, 65, 67, 67, 64, 64,
         69, 69, 69, 69, 69, 69, 64, 64, 65, 65, 65, 65, 64, 64, 65, 65],
        [62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 62, 62, 60, 60, 60, 60,
         60, 60, 60, 60, 65, 65, 59, 59, 60

In [12]:
all_phrases = get_cleaned_phrases(path, 1)

In [13]:
sample_phrase = all_phrases[0]
pitch_and_length(sample_phrase, 0)

([71, 69, 74, 72, 71, 67, 71, 69, 71, 69],
 [1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0])