# LIBRARY

In [81]:
import numpy as np
import mido
import os

# M

In [82]:
def return_melody(file):
    M = []
    for i, track in enumerate(file.tracks):
        for msg in track:
            if msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0): 
                M.append(msg.note)
    return M

# Vm

In [83]:
def return_delta(M):
    Vm = []
    for i in range(len(M)-1):
            delta = M[i+1] - M[i]
            Vm.append(delta)
    return Vm

# Rm

In [84]:
# Function to convert ticks to time in seconds, TEST2
def tick_to_time_test2(ticks, tempo, ticks_per_quarter_note):
    
    duration_per_tick = tempo / ticks_per_quarter_note
    
    note_duration = duration_per_tick * ticks
    
    note_duration_seconds = note_duration / 1000000
    
    
    return note_duration_seconds

In [85]:
def loop_duration_note(mid):

    # Get ticks per beat from the MIDI file
    ticks_per_beat = mid.ticks_per_beat

    #list of note duration
    note_duration_test2 = []

    for i, track in enumerate(mid.tracks): 
        for msg in track:
            
            if msg.type == 'set_tempo': #take tempo of ticks -> track 0
                current_tempo = msg.tempo

            elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                # Note off: calculate the duration
                note_duration_test2.append(tick_to_time_test2(msg.time, current_tempo, ticks_per_beat)) 

    return note_duration_test2

# PATH SONG

In [86]:
def name_all_song(path):

    lista_nomi = []
    dir_list = os.listdir(path)
    

    for i in range(len(dir_list)): 
        song_2 = path + '\\' + dir_list[i] 
        lista_nomi.append(song_2) #save name song
        
    return lista_nomi

# DEFINITION OF K

In [87]:
def find_k (song1, song2):
    
    min_M = len(song2["M"])
    if len(song1["M"]) < len(song2["M"]):
        min_M = len(song1["M"])
    min_Vm = len(song2["Vm"])
    if(len(song1["Vm"]) < len(song2["Vm"])):
        min_Vm = len(song1["Vm"])

    if(min_M < min_Vm):
        k = np.random.randint(low=min_M, high=min_Vm)
    else:
        k = np.random.randint(low = min_Vm, high=min_M)

    return k

# CREATION OF SUBSET (Sk)

In [88]:
def return_subset_of_Vm(Vm, k):
    return Vm[0:k]

In [89]:
def return_subset_of_Rm(Rm, k):
    return Rm[0:k]

# COMPUTE THE WEIGHTED MELODIC/RHYTMIC VECTORIAL DISTANCE BETWEEN SUBSETS

In [90]:
#s1 : sub_Vm_1
#s2 : sub_Vm_2
def melodic_vector_distance(s1, s2, list_song):
    
    distance = 0
    
    #calculate the eeuclidean distance
    for i in range(len(s1)):
        
        punto1 = s1[i] * (i+1) 
        punto2 = s2[i] * (i+1)
        
        absolute_value = abs(punto1 - punto2)
        sqrt_abs_value = np.power(absolute_value, 2)
        distance += sqrt_abs_value

    distance_sub_Vm = np.sqrt(distance)
    
    return distance_sub_Vm

# JACCARD SIMILARITY

In [91]:
# for Vm and Rm
def jaccard_similarity(vector_a, vector_b):
    
    num = vector_a.intersection(vector_b)
    den = vector_a.union(vector_b)
    jaccard_result = float(len(num))/float(len(den))

    return round(jaccard_result, 4)

# SUBSET SHINGLES

In [92]:
def return_subset_shingles(subset_Vm, k_shingles):

    list_shin = []

    for i in range(len(subset_Vm)- k_shingles +1):
        list_shin.append(subset_Vm[i:i+k_shingles])
    
    return list_shin

# AVERAGE JACCARD SIMILARITY

In [93]:
#Vm
def average_jaccard_similarity(list_song, k, k_shingles):
    
    #1) creation of subset Vm1 and subset Vm2
    subset_Vm1 = return_subset_of_Vm(list_song[0]["Vm"], k) #Sk(Vm1)
    subset_Vm2 = return_subset_of_Vm(list_song[1]["Vm"], k) #Sk(Vm2)

    #2)divide subset based on len k_shingles
    k_shingles_x = return_subset_shingles(subset_Vm1, k_shingles)
    k_shingles_y = return_subset_shingles(subset_Vm2, k_shingles)

    #3)Compute local jaccard similarity / initialize average jaccard similarity

    #total
    similarities = []

    for x in k_shingles_x:

        set_x = set(x)

        local_similarities = []
        
        for y in k_shingles_y:

            set_y = set(y)
            similarity = jaccard_similarity(set_x, set_y)
            local_similarities.append(similarity)

        if local_similarities:
            
            max_similarity = max(local_similarities) 
            similarities.append(max_similarity)
    
    #4) compute the average similaities
    if similarities:
        return sum(similarities) / len(similarities)
    else:
        return 0




In [94]:
#Rm
def average_jaccard_similarity_Rm(list_song, k, k_shingles):
    
    #1) creation of subset Vm1 and subset Vm2
    subset_Rm1 = return_subset_of_Rm(list_song[0]["Rm"], k) #Sk(Vm1)
    subset_Rm2 = return_subset_of_Rm(list_song[1]["Rm"], k) #Sk(Vm2)

    #2)divide subset based on len k_shingles
    k_shingles_x = return_subset_shingles(subset_Rm1, k_shingles)
    k_shingles_y = return_subset_shingles(subset_Rm2, k_shingles)

    #3)Compute local jaccard similarity / initialize average jaccard similarity

    #total
    similarities = []

    for x in k_shingles_x:

        set_x = set(x)

        local_similarities = []
        
        for y in k_shingles_y:

            set_y = set(y)

            similarity = jaccard_similarity(set_x, set_y)
            local_similarities.append(similarity)

        if local_similarities:
            
            max_similarity = max(local_similarities) 
            similarities.append(max_similarity)
    
    #4) compute the average similaities
    if similarities:
        return sum(similarities) / len(similarities)
    else:
        return 0




# PART OF FUZZY DEEP

In [95]:
def all_lambda(k_shingles_x, k_shingles_y):

    #list lambda
    list_lambdas = []

    for s_y in k_shingles_y:
        
        #save all the distance before product
        distances = []

        for s_x in k_shingles_x:  
            distance_norm = np.linalg.norm(np.array(s_y) - np.array(s_x)) / max(np.linalg.norm(s_y), np.linalg.norm(s_x)) #d(sx,sy)
            distances.append(distance_norm)
        
        one_minus_distance = [1- d for d in distances] # (1-d(sy,sx))
        product_dist = np.prod(one_minus_distance)
        lambda_sy_Mx = 1 - product_dist
        list_lambdas.append(lambda_sy_Mx)

    return list_lambdas


# MAIN

In [96]:
dict = {
    "name_song": "",
    "M": [],
    "Vm": [],
    "Rm": [],
}

#list dict for each song
list_song_ori = []
list_song_plag = []

#final ajs
list_finals = []

#list_real_song
list_name_ori = name_all_song(path = "BMMDet_MPDSet-master\BMMDet_MPDSet-master\data\midi\dataset_real_ori")
list_name_plag = name_all_song(path = "BMMDet_MPDSet-master\BMMDet_MPDSet-master\data\midi\dataset_real_plag")

#scorro le canzoni: -> update folder
for i in range(len(list_name_ori)):

    #opne file midi
    mid_ori = mido.MidiFile(list_name_ori[i])
    mid_plag = mido.MidiFile(list_name_plag[i])

    #CREO M
    M_ori = return_melody(mid_ori)
    M_plag = return_melody(mid_plag)

    #CREO Vm
    Vm_ori = return_delta(M_ori)
    Vm_plag = return_delta(M_plag)

    #CREO Rm
    #list to store note duration test 2
    Rm_ori = loop_duration_note(mid_ori)
    Rm_plag = loop_duration_note(mid_plag)

    dict_ori = {"name_song": mid_ori.filename, "M":M_ori, "Vm":Vm_ori, "Rm":Rm_ori}
    list_song_ori.append(dict_ori)
    dict_plag = {"name_song": mid_plag.filename, "M":M_plag, "Vm":Vm_plag, "Rm":Rm_plag}
    list_song_plag.append(dict_plag)

for i in range(len(list_song_plag)):

    #creo kappa 
    k = find_k(list_song_ori[i], list_song_plag[i])

    #find subset of Vm for the 2 songs based on kappa
    sub_Vm_1 = return_subset_of_Vm(list_song_ori[i]["Vm"], k) #sk (My)
    sub_Vm_2 = return_subset_of_Vm(list_song_plag[i]["Vm"], k) #sk (Mx)

    #calulate the distance for Vm used s1, s2 (subsets)
    dist_sub_Vm = melodic_vector_distance(sub_Vm_1, sub_Vm_2, [])

    #find the subset of Rm for the 2 songs based on kappa
    sub_Rm_1 = return_subset_of_Rm(list_song_ori[i]["Rm"], k) #s1
    sub_Rm_2 = return_subset_of_Rm(list_song_plag[i]["Rm"], k) #s2

    ##calulate the distance for Rm used z1, z2 (subsets)
    dist_sub_Rm = melodic_vector_distance(sub_Rm_1, sub_Rm_2, [])

    #jaccard similarity
    js_vm = jaccard_similarity(set(sub_Vm_1), set(sub_Vm_2))
    js_rm = jaccard_similarity(set(sub_Rm_1), set(sub_Rm_2))

    #k_shingles test
    k_shingles = 5

    #ajs ckeck list
    list_ajs = [list_song_ori[i], list_song_plag[i]]

    #average jaccard similarity
    ajs_vm = average_jaccard_similarity(list_ajs, k, k_shingles)
    ajs_rm = average_jaccard_similarity_Rm(list_ajs, k, k_shingles)

    #save
    check = {"plag_song": list_song_plag[i]["name_song"], "original_song": list_song_ori[i]["name_song"], "ajs_vm": ajs_vm, "ajs_rm": ajs_rm}
    list_finals.append(check)

#THRESHOLD
list_threshold = []

list_threshold_ori = []
list_threshold_plag = []

for i in list_finals:
    if i["ajs_vm"] > 0.2 and i["ajs_rm"] > 0.15 :
        list_threshold.append(i)
        for y,z in zip(list_song_ori, list_song_plag):
            if i["original_song"] == y["name_song"]:
                list_threshold_ori.append(y)
            if i["plag_song"] == z["name_song"]:
                list_threshold_plag.append(z)

# FUZZY
for i in range(len(list_threshold)): 

    k = find_k(list_threshold_ori[i], list_threshold_plag[i])

    #creation Sk(Vm)
    sub_Vm_y = return_subset_of_Vm(list_threshold_ori[i]["Vm"], k) #sk (My)
    sub_Vm_x = return_subset_of_Vm(list_threshold_plag[i]["Vm"], k) #sk (Mx)

    #k -> shingles
    shi = 5

    #2)divide subset based on len k_shingles
    k_shingles_y = return_subset_shingles(sub_Vm_y, shi) # sy E sk(My)
    k_shingles_x = return_subset_shingles(sub_Vm_x, shi) # sx E sk(Mx)

    #compute lambdas
    lambda_factors = all_lambda(k_shingles_x, k_shingles_y)
    #compute fuzzy
    F_My_Mx = sum(lambda_factors) / len(list_threshold_ori[i]["Vm"])

    list_threshold[i]["F_My_Mx"] = F_My_Mx
    
#FINAL

alpha = 0.75

final = []

for i in list_threshold:
    if i["F_My_Mx"] >= alpha:
        final.append(i)


  distance += sqrt_abs_value
  distance_sub_Vm = np.sqrt(distance)
  distance_norm = np.linalg.norm(np.array(s_y) - np.array(s_x)) / max(np.linalg.norm(s_y), np.linalg.norm(s_x)) #d(sx,sy)


In [97]:
final

[{'plag_song': 'BMMDet_MPDSet-master\\BMMDet_MPDSet-master\\data\\midi\\dataset_real_plag\\case10_Schenkt uns Dummheit, kein Niveau-FreiWild.mid',
  'original_song': 'BMMDet_MPDSet-master\\BMMDet_MPDSet-master\\data\\midi\\dataset_real_ori\\case10_Anftrag Deutsches Reich-Stahlgewitter.mid',
  'ajs_vm': 0.8059197368421053,
  'ajs_rm': 0.9561447368421053,
  'F_My_Mx': 0.95},
 {'plag_song': 'BMMDet_MPDSet-master\\BMMDet_MPDSet-master\\data\\midi\\dataset_real_plag\\case17_perhaps.mid',
  'original_song': 'BMMDet_MPDSet-master\\BMMDet_MPDSet-master\\data\\midi\\dataset_real_ori\\case17_ma este meg.mid',
  'ajs_vm': 0.8352557692307696,
  'ajs_rm': 0.678251923076923,
  'F_My_Mx': 0.8},
 {'plag_song': 'BMMDet_MPDSet-master\\BMMDet_MPDSet-master\\data\\midi\\dataset_real_plag\\case19_Till you.mid',
  'original_song': 'BMMDet_MPDSet-master\\BMMDet_MPDSet-master\\data\\midi\\dataset_real_ori\\case19_phantom Song.mid',
  'ajs_vm': 0.5745243243243243,
  'ajs_rm': 0.3790378378378379,
  'F_My_Mx': 0