In [1]:
import os
import numpy as np
import music21 as m21
import pandas as pd
import json
import matplotlib.pyplot as plt
from scipy import stats
import time

np.random.seed(777)
us = m21.environment.UserSettings()

# us['musescoreDirectPNGPath']="/home/sirivasv/Downloads/MuseScore-3.4.2-x86_64.AppImage"
us['musescoreDirectPNGPath']='/home/sirivasv/.local/bin/MuseScore-3.5.2.312125617-x86_64.AppImage'

# Define dataset paths
# MXML_PATH="/media/sirivasv/JASON/Saul/MCC/DATASETS/DATASUBSET/MTC-ANN-2.0.1/mid"
MXML_PATH="/media/sirivasv/DATAL/MCC/DATASUBSET/MTC-ANN-2.0.1/mid"

# METADATA_PATH="/media/sirivasv/JASON/Saul/MCC/DATASETS/DATASUBSET/MTC-ANN-2.0.1/metadata"
METADATA_PATH="/media/sirivasv/DATAL/MCC/DATASUBSET/MTC-ANN-2.0.1/metadata" 

## Data

In [2]:
# Read table of tune family
tune_family_filename = "MTC-ANN-tune-family-labels.csv"
tune_family_df = pd.read_csv(os.path.join(METADATA_PATH, tune_family_filename), header=None)
tune_family_df.head()

Unnamed: 0,0,1
0,NLB072587_01,Daar_ging_een_heer_1
1,NLB072587_02,Daar_ging_een_heer_1
2,NLB072774_02,Daar_ging_een_heer_1
3,NLB073046_01,Daar_ging_een_heer_1
4,NLB073588_01,Daar_ging_een_heer_1


In [3]:
# Traverse musicxml files and tune family
song_id_x_family = {}
family_x_songs = {}
for root, directories, files in os.walk(MXML_PATH):
    for file in files:
        song_id = file.split(".")[0]
        if (song_id not in song_id_x_family):
            family_name = tune_family_df[tune_family_df[0] == song_id].iloc[0][1]
            song_id_x_family[song_id] = (file, family_name)
            if (family_name not in family_x_songs):
                family_x_songs[family_name] = []
            family_x_songs[family_name].append(song_id)

In [4]:
# Remove the incomplete anotated tunes from the dataframe
reduced_tune_family_df = tune_family_df[tune_family_df[0].isin(list(song_id_x_family.keys()))]
reduced_tune_family_df.head()

Unnamed: 0,0,1
0,NLB072587_01,Daar_ging_een_heer_1
1,NLB072587_02,Daar_ging_een_heer_1
2,NLB072774_02,Daar_ging_een_heer_1
3,NLB073046_01,Daar_ging_een_heer_1
4,NLB073588_01,Daar_ging_een_heer_1


## Functions

In [5]:
DIV_CONST = 4

In [6]:
def getSongKey(song):
    key = song.analyze("key")
    return key

In [7]:
def getSongKeyFromMelody_W_Times(melody_w_times_in_k):
    sc_test = m21.stream.Score()
    p0_test = m21.stream.Part()
    p0_test.id = 'part0'
    for pitch_i in melody_w_times_in_k:
        n_i = m21.note.Note(pitch_i[4])
        p0_test.append(n_i)
    sc_test.insert(0, p0_test)
    return getSongKey(sc_test)

In [8]:
# Function to retrieve a list of midi pitch events and its timestamp
def getMelodyDeltaTimes(eventsintrack):
    
    # Initialize array
    DeltaTimes = []
    
    # Initialize cumulative sum
    cum_sum = 0
    
    # Initialize variable to track the time delta
    prev_deltatime = 0
    
    # Traverse the events
    for ev in eventsintrack:
        
        # If a note starts
        if (ev.isNoteOn()):
            
            # Get the pitch name and save it with the cumulative sum, midi pitch and name
            pitch_in_time = m21.pitch.Pitch(ev.pitch)
            DeltaTimes.append((cum_sum, prev_deltatime, pitch_in_time.midi, pitch_in_time.spanish, pitch_in_time))
            
            # Restart the delta time
            prev_deltatime = 0
        
        # Else if there is a delta time
        elif(str(ev.type) == "DeltaTime"):
            
            # We sum the time
            cum_sum += ev.time
            
            # We sum it to the current delta time
            prev_deltatime += ev.time
    
    # Return the array
    return DeltaTimes

In [9]:
def get_SCLM_v100(melody_w_times_A, melody_w_times_B):
    
    # We use a Dynamic Programming approach
    max_len = max(len(melody_w_times_A), len(melody_w_times_B)) + 1
    
    # memoization array
    memo = np.full(shape=(max_len,max_len), fill_value=-1)
    
    # Get the limits for each melody
    lim_A = len(melody_w_times_A)
    lim_B = len(melody_w_times_B)
    
    # Actual DP implementation
    for i in range(lim_A, -1, -1):
        for j in range(lim_B, -1, -1):
            
            # If we are at the limits the solution is 0
            if i == lim_A or  j == lim_B:
                memo[i][j] = 0
                continue
            
            # If there is a match a possible solution is the previous plus one
            curr_value = 0
            
            tot_delta_time = (float(melody_w_times_A[i][1]) + float(melody_w_times_B[j][1])) / float(DIV_CONST)
            tot_diff_time = np.abs(float(melody_w_times_A[i][1]) - float(melody_w_times_B[j][1]))
            
            
            if (melody_w_times_A[i][3] == melody_w_times_B[j][3]) and (tot_diff_time <= tot_delta_time):
                curr_value = memo[i + 1][j + 1] + 1
                
            # The actual solution is the maximum between the one if there is a match, or skip on the melody A or melody B
            curr_value = max(curr_value, max(memo[i + 1][j], memo[i][j + 1]))
            
            # Save the solution
            memo[i][j] = curr_value
    
    # With the memoization table we can retrieve the actual melody
    i = 0
    j = 0
    SCLM = []
    while i != lim_A and j != lim_B:
    
        if ((memo[i + 1][j + 1] + 1) == memo[i][j]):
            SCLM.append((i, j))
            i += 1
            j += 1
        elif (memo[i + 1][j] == memo[i][j]):
            i += 1
        elif (memo[i][j + 1] == memo[i][j]):
            j += 1
    
    return SCLM

In [10]:
def get_max_timestamp_dif(melody_w_times_A, melody_w_times_B):
    return max(
        melody_w_times_A[len(melody_w_times_A) - 1][0] - melody_w_times_A[0][0],
        melody_w_times_B[len(melody_w_times_B) - 1][0] - melody_w_times_B[0][0]
    )

In [11]:
def getDifSCLM(melody_w_times_A, melody_w_times_B, sclm):
    
    # If there is no sclm or it is just one return max possible value
    if (len(sclm) <= 1):
        return get_max_timestamp_dif(melody_w_times_A, melody_w_times_B)
    
    
    # Initialize the arrays
    T_A = np.zeros(shape=(len(sclm) - 1))
    T_B = np.zeros(shape=(len(sclm) - 1))
    T_C = np.zeros(shape=(len(sclm) - 1))
    Dif_ = np.zeros(shape=(len(sclm) - 1))
    
    for i in range(1, len(sclm)):
        T_A[i - 1] = melody_w_times_A[sclm[i][0]][0] - melody_w_times_A[sclm[i-1][0]][0]
        T_B[i - 1] = melody_w_times_B[sclm[i][1]][0] - melody_w_times_B[sclm[i-1][1]][0]
        T_C[i - 1] = np.abs(T_A[i - 1] - T_B[i - 1])
    
    T_C_mean = np.mean(T_C)
    
    for i in range(0, len(T_B)):
        T_B[i] += T_C_mean
        Dif_[i] = T_A[i] - T_B[i]
    
    return T_C_mean
    

In [12]:
def get_MTRC_v100_from_melody_w_times(melody_w_times_A, melody_w_times_B):
    
    # Assert at least one element for each melody
    if (len(melody_w_times_A) == 0 or len(melody_w_times_B) == 0):
        return 1
    
    # Initialize result variable
    result_value = 0
    
    # Get Keys
    key_A = getSongKeyFromMelody_W_Times(melody_w_times_A)
    key_B = getSongKeyFromMelody_W_Times(melody_w_times_B)
    
    # D1: Scale  
    scale_dif1 = 0
    if (key_A.name != key_B.name):
        scale_dif1 = W1
    result_value += scale_dif1
    
    # D2: Mode  
    mode_dif2 = 0
    if (key_A.mode != key_B.mode):
        mode_dif2 = W2
    result_value += mode_dif2
    
    # Get SCLM v100
    sclm = get_SCLM_v100(melody_w_times_A, melody_w_times_B)
    
    # Get max len
    max_len = max(len(melody_w_times_A), len(melody_w_times_B))
    
    # D3: SCLM Length
    sclmlen_dif3 = ((max_len - len(sclm)) / max_len) * W3
    result_value += sclmlen_dif3
    
    # Get the Diff on temporal spacing in the SCLM
    dif_sclm = getDifSCLM(melody_w_times_A, melody_w_times_B, sclm)
    
    # D4: dif in sclm
    max_timestamp_dif = get_max_timestamp_dif(melody_w_times_A, melody_w_times_B)
    sclmdif_dif4 = (dif_sclm / max_timestamp_dif) * W4
    result_value += sclmdif_dif4
    
    return result_value

In [13]:
# Read Files 
song_m21_streams = {}

# We traverse the reduced table
for query_row in reduced_tune_family_df.iterrows():
    tune_family_query = query_row[1][1]
    song_id_A = query_row[1][0]
    
    song_stream_A = m21.converter.parseFile(os.path.join(MXML_PATH, song_id_x_family[song_id_A][0]))
    midi_tracks_A = m21.midi.translate.streamToMidiFile(song_stream_A)
    melody_w_times_A = getMelodyDeltaTimes(midi_tracks_A.tracks[0].events)
    
    song_m21_streams[song_id_A] = {
        "song_stream": song_stream_A,
        "midi_tracks": midi_tracks_A,
        "melody_w_times": melody_w_times_A
    }

## Noises

### Type 1. Random Pitch

In [14]:
def get_random_pitch():
    
    new_pitch_class = np.random.randint(0, 12)
    new_pitch_octave = np.random.randint(1, 9)
    
    return m21.pitch.Pitch(octave=new_pitch_octave, pitchClass=new_pitch_class)

In [15]:
# Define apply Transformation type 1: Ruido en notas
def apply_note_noise(melody_w_times_in, percentage=50):
    
    # Track modified notes 
    modified_notes = {}
    
    # Store the length of the melody
    len_melody = len(melody_w_times_in)
    
    # According to the desired percentage of noise we get the number of notes to be modified
    many_notes = int((len_melody * percentage)//100)
    
    for noise_i in range(many_notes):
        
        # Select a random position that we haven't seen yet
        note_to_change = np.random.randint(0, len_melody)
        while (note_to_change in modified_notes):
            note_to_change = np.random.randint(0, len_melody)
        modified_notes[note_to_change] = 1
        
        # Creating a new pitch note
        previous_pitch = melody_w_times_in[note_to_change][3]
        p_new = get_random_pitch()
        while (p_new.spanish == previous_pitch):
            p_new = get_random_pitch()
        
        
        # Replace the data 
        melody_w_times_in[note_to_change] = (
            melody_w_times_in[note_to_change][0],
            melody_w_times_in[note_to_change][1],
            p_new.midi,
            p_new.spanish,
            p_new)
    
    # Return the modified melody
    return melody_w_times_in

### Type 2. Random DeltaTime

In [16]:
def recalculate_timestamps(melody_w_times_in):
    
    # Store the length of the melody
    len_melody = len(melody_w_times_in)
    
    # Define current start time
    current_start_time = 0
    
    # Traverse the melody
    for note_i in range(len_melody):
        current_start_time += melody_w_times_in[note_i][1]
        melody_w_times_in[note_i] = (
            current_start_time,
            melody_w_times_in[note_i][1],
            melody_w_times_in[note_i][2],
            melody_w_times_in[note_i][3],
            melody_w_times_in[note_i][4])
    
    # Return the recalculated melody
    return melody_w_times_in

In [17]:
def get_random_deltatime():
    return np.random.randint(0, 4097)

In [18]:
# Define apply Transformation type 2: Ruido en tiempos
def apply_deltatime_noise(melody_w_times_in, percentage=50):
    
    # Track modified notes 
    modified_notes = {}
    
    # Store the length of the melody
    len_melody = len(melody_w_times_in)
    
    # According to the desired percentage of noise we get the number of notes to be modified
    many_notes = int((len_melody * percentage)//100)
    
    for noise_i in range(many_notes):
        
        # Select a random position that we haven't seen yet
        note_to_change = np.random.randint(0, len_melody)
        while (note_to_change in modified_notes):
            note_to_change = np.random.randint(0, len_melody)
        modified_notes[note_to_change] = 1
        
        # Creating a new deltatime
        previous_deltatime = melody_w_times_in[note_to_change][1]
        deltatime_new = get_random_deltatime()
        while (deltatime_new == previous_deltatime):
            deltatime_new = get_random_deltatime()
        
        # ratio_of_change = np.abs((deltatime_new - previous_deltatime))
        # if previous_deltatime != 0:
        #     ratio_of_change /= previous_deltatime
        # else:
        #     ratio_of_change = -1
        # print("AAA", ratio_of_change)
        
        # Replace the data 
        melody_w_times_in[note_to_change] = (
            melody_w_times_in[note_to_change][0],
            deltatime_new,
            melody_w_times_in[note_to_change][2],
            melody_w_times_in[note_to_change][3],
            melody_w_times_in[note_to_change][4])
        
        # Recalculate timestamps due to the modification in deltatimes
        melody_w_times_in = recalculate_timestamps(melody_w_times_in)
    
    # Return the modified melody
    return melody_w_times_in

### Type 3. Noise in Pitch and Deltatime

In [19]:
# Define apply Transformation type 3: Ruido en tiempos y notas (reemplazo)
def apply_deltatime_and_note_noise(melody_w_times_in, percentage=50):
    
    # Track modified notes 
    modified_notes = {}
    
    # Store the length of the melody
    len_melody = len(melody_w_times_in)
    
    # According to the desired percentage of noise we get the number of notes to be modified
    many_notes = int((len_melody * percentage)//100)
    
    for noise_i in range(many_notes):
        
        # Select a random position that we haven't seen yet
        note_to_change = np.random.randint(0, len_melody)
        while (note_to_change in modified_notes):
            note_to_change = np.random.randint(0, len_melody)
        modified_notes[note_to_change] = 1
        
        # Creating a new deltatime
        previous_deltatime = melody_w_times_in[note_to_change][1]
        deltatime_new = get_random_deltatime()
        while (deltatime_new == previous_deltatime):
            deltatime_new = get_random_deltatime()
        
        # Creating a new pitch note
        previous_pitch = melody_w_times_in[note_to_change][3]
        p_new = get_random_pitch()
        while (p_new.spanish == previous_pitch):
            p_new = get_random_pitch()
            
        # Replace the data 
        melody_w_times_in[note_to_change] = (
            melody_w_times_in[note_to_change][0],
            deltatime_new,
            p_new.midi,
            p_new.spanish,
            p_new)
        
        # Recalculate timestamps due to the modification in deltatimes
        melody_w_times_in = recalculate_timestamps(melody_w_times_in)
    
    # Return the modified melody
    return melody_w_times_in

### Type 4. Removing notes

In [20]:
# Define apply Transformation type 4: Noise by removing events
def apply_removing_noise(melody_w_times_in, percentage=50):
    
    # Store the length of the melody
    len_melody = len(melody_w_times_in)
    
    # According to the desired percentage of noise we get the number of notes to be modified
    many_notes = int((len_melody * percentage)//100)
    
    for noise_i in range(many_notes):
        
        # Select a random position to remove
        note_to_remove = np.random.randint(0, len(melody_w_times_in))
        
        # Remove element
        melody_w_times_in.pop(note_to_remove)
        
        # Recalculate timestamps due to the modification in deltatimes continuity
        melody_w_times_in = recalculate_timestamps(melody_w_times_in)
    
    # Return the modified melody
    return melody_w_times_in

### Type 5. Inserting new notes

In [21]:
# Define apply Transformation type 5: Noise by Inserting events
def apply_inserting_noise(melody_w_times_in, percentage=50):
    
    # Assert only percentages p <= 100 and p > 0
    if percentage >= 100 or percentage < 0:
        percentage = 99
    
    # Store the length of the melody
    len_melody = len(melody_w_times_in)
    
    # According to the desired percentage of noise we get the number of notes to be modified
    new_len = int(len_melody / (1 - (percentage / 100)))
    many_notes = new_len - len_melody
    
    for noise_i in range(many_notes):
        
        # Create new Event
        # Creating a new deltatime
        deltatime_new = get_random_deltatime()
        
        # Creating a new pitch note
        p_new = get_random_pitch()
            
        # Replace the data 
        new_midi_event = (
            0,
            deltatime_new,
            p_new.midi,
            p_new.spanish,
            p_new)
        
        # Select a random position to insert
        pos_to_insert = np.random.randint(0, len(melody_w_times_in))
        
        # Insert element
        melody_w_times_in.insert(pos_to_insert, new_midi_event)
        
        # Recalculate timestamps due to the modification in deltatimes continuity
        melody_w_times_in = recalculate_timestamps(melody_w_times_in)
    
    # Return the modified melody
    return melody_w_times_in

### Noise Controller

In [22]:
def apply_ith_noise(noise_type, melody_w_times_in, percentage=50):
    
    if (noise_type == 1):
        return apply_note_noise(melody_w_times_in, percentage)
    if (noise_type == 2):
        return apply_deltatime_noise(melody_w_times_in, percentage)
    if (noise_type == 3):
        return apply_deltatime_and_note_noise(melody_w_times_in, percentage)
    if (noise_type == 4):
        return apply_removing_noise(melody_w_times_in, percentage)
    
    return apply_inserting_noise(melody_w_times_in, percentage)

## Experiments

In [23]:
def apply_threshold(val_x):
    val_x *= 100
    val_x = int(val_x)
    val_x = round(val_x / 10)
    return val_x / 10.0

In [24]:
def get_metric_range_to_100(val_x):
    return (val_x * 1.0) / (0.7)

In [25]:
def get_mean_distance_for_song(song_id_query, with_threshold=False):
    
    # We initialize the mean difference across noises
    diff_x_noises = []
    
    # Metric Values
    metric_values = []
    
    # Difference per percentage
    differences_per_percentage = []
    
    # We traverse the noises
    for noise_type_i in noise_types:
        # if (noise_type_i == 2):
            # continue
        
        # We initialize the mean distance in the current noise
        sorted_distance_mean_current_noise = 0
        
        # We save the similarities differences with the percentage
        similarity_differences_for_mean = []
        
        # We save metric values per noise type
        metric_values_per_noise = []
        
        # We traverse the percentages
        for noise_percentage_i in noise_percentages:
            
            melody_w_times_query = getMelodyDeltaTimes(
                song_m21_streams[song_id_query]["midi_tracks"].tracks[0].events)
            melody_w_times_test = getMelodyDeltaTimes(
                song_m21_streams[song_id_query]["midi_tracks"].tracks[0].events)

            melody_w_times_test = apply_ith_noise(noise_type_i, melody_w_times_test, noise_percentage_i)
             
            current_similarity = get_MTRC_v100_from_melody_w_times(
                    melody_w_times_query,
                    melody_w_times_test)
            
            # current_similarity = get_metric_range_to_100(current_similarity)
            
            if (with_threshold):
                current_similarity = apply_threshold(current_similarity)
            
            metric_values_per_noise.append(current_similarity)
            similarity_differences_for_mean.append(np.abs(current_similarity - float(noise_percentage_i/100.0)))
        
        # Get the mean of the 
        similarity_difference_mean_current_noise = np.mean(similarity_differences_for_mean)
        
        # Save the differences per percentage
        differences_per_percentage.append(similarity_differences_for_mean)
        
        # Save the metric values per percentage
        metric_values.append(metric_values_per_noise)
        
        # We sum the distance for this noise
        diff_x_noises.append(similarity_difference_mean_current_noise)
    
    # We return the mean distance per noise
    return metric_values, differences_per_percentage, diff_x_noises

In [26]:
def get_weights_from_encoded_conf(encoded_conf):
    return tuple(map(lambda x: x, map(float, encoded_conf.split(","))))

## No Threshold

In [27]:
%%time
# Define noise type array
noise_types = [1, 2, 3, 4, 5]
len_noise_types = len(noise_types)

# Define the percentages of noise
noise_percentages = list(map(int, np.linspace(10, 100, 10)))

# Weights
#W1 = 0.25
#W2 = 0.25
#W3 = 0.25
#W4 = 0.25

W1 = 0.0
W2 = 0.0
W3 = 1.0
W4 = 0.0

# TEST
heuristic_raw_results = {}
heuristic_raw_results['0.0,0.0,1.0,0.0'] = get_mean_distance_for_song('NLB072967_01', False)

CPU times: user 7.98 s, sys: 48 ms, total: 8.03 s
Wall time: 8.05 s


In [28]:
print(np.mean(heuristic_raw_results['0.0,0.0,1.0,0.0'][2]))

0.07116279261758142


In [29]:
ts = time.time()
with open('./HEURISTIC_TEMPOMETRIC_1_{0}_NOISE_RAW_v106_{1}.json'.format(DIV_CONST, str(ts)), 'w') as outfile:
    json.dump(heuristic_raw_results, outfile)

In [31]:
print("1_2", 0.11794245363453057)
print("1_4", 0.07116279261758142)
print("1_8", 0.04743397905825938)
print("1_32", 0.027433979058259384)
print("1_64", 0.022688216346394972)
print("1_128", 0.01997635193961531)
print("1_512", 0.018959402787072935)
print("1_2048", 0.018620419736225476)

1_2 0.11794245363453057
1_4 0.07116279261758142
1_8 0.04743397905825938
1_32 0.027433979058259384
1_64 0.022688216346394972
1_128 0.01997635193961531
1_512 0.018959402787072935
1_2048 0.018620419736225476
