In [29]:
from crim_intervals import *
import pandas as pd
import ast
import matplotlib
from itertools import tee, combinations
import numpy as np

In [58]:
# THIS IS DEV COPY for use with CLOSE/EXACT Matches

import numpy as np

# Converts lists to tuples

def lists_to_tuples_a(el):
    if isinstance(el, list):
        return tuple(el)
    else:
        return el

# Filters for the length of the Presentation Type in the Classifier

def limit_offset_size(array, limit):
    under_limit = np.cumsum(array) <= limit
    return array[: sum(under_limit)]

# Gets the the list of offset differences for each group 

def get_offset_difference_list_a(group):
    # if we do sort values as part of the func call, then we don't need this first line
    group = group.sort_values("start_offset")
    group["next_offset"] = group.start_offset.shift(-1)
    offset_difference_list = (group.next_offset - group.start_offset).dropna().tolist()
    return offset_difference_list

# The classifications are done here
# be sure to have the offset difference limit set here and matched in gap check below  80 = ten bars

def classify_offsets_a(offset_difference_list):
    """
    Put logic for classifying an offset list here
    """
    # 
    offset_difference_list = limit_offset_size(offset_difference_list, 500)
    
    alt_list = offset_difference_list[::2]
    
    if len(set(offset_difference_list)) == 1 and len(offset_difference_list) > 1:
        return ("PEN", offset_difference_list)
    # elif (len(offset_difference_list) %2 != 0) and (len(set(alt_list)) == 1):
    elif (len(offset_difference_list) %2 != 0) and (len(set(alt_list)) == 1) and (len(offset_difference_list) >= 3):
        return ("ID", offset_difference_list)
    elif len(offset_difference_list) >= 1:
        return ("Fuga", offset_difference_list)
    else: 
        return ("Singleton", offset_difference_list)
    
# adds predicted type, offsets and entry numbers to the results

def predict_type_a(group):
    offset_differences = get_offset_difference_list_a(group)
    predicted_type, offsets = classify_offsets_a(offset_differences)

    group["predicted_type"] = [predicted_type for i in range(len(group))]
    group["offset_diffs"] = [offsets for i in range(len(group))]
    group["entry_number"] = [i + 1 for i in range(len(group))]

    return group

In [59]:
# read CSV output of CRIM Intervals Search.

df = pd.read_csv(('A_Test_Files/Mass_0005_1_Exact_4.csv'), index_col=0)
df = df.drop(columns=["ema", "ema_url", "end_measure", "end_beat"])
df.head()


Unnamed: 0,pattern_generating_match,pattern_matched,piece_title,part,start_measure,start_beat,start_offset,end_offset,note_durations
0,"[4, 1, 2, 2, -2]","[4, 1, 2, 2, -2]",Missa Ave Maria: Kyrie,Sup[erius],3,3.0,20.0,46.0,"[4.0, 8.0, 4.0, 4.0, 6.0, 1.0]"
1,"[4, 1, 2, 2, -2]","[4, 1, 2, 2, -2]",Missa Ave Maria: Kyrie,Altus,1,1.0,0.0,26.0,"[4.0, 8.0, 4.0, 4.0, 6.0, 1.0]"
2,"[4, 1, 2, 2, -2]","[4, 1, 2, 2, -2]",Missa Ave Maria: Kyrie,Bassus,10,1.0,72.0,95.0,"[4.0, 8.0, 4.0, 4.0, 3.0, 1.0]"
3,"[1, 2, 2, -2, -2]","[1, 2, 2, -2, -2]",Missa Ave Maria: Kyrie,Sup[erius],4,1.0,24.0,47.0,"[8.0, 4.0, 4.0, 6.0, 1.0, 1.0]"
4,"[1, 2, 2, -2, -2]","[1, 2, 2, -2, -2]",Missa Ave Maria: Kyrie,Altus,1,3.0,4.0,27.0,"[8.0, 4.0, 4.0, 6.0, 1.0, 1.0]"


In [60]:
# lists for views.  These are used when calling the df below

simple_view = ["piece_title", "part", "pattern_generating_match", "pattern_matched", "start_offset"]

offset_details = ["start_measure", "start_beat", "end_offset", "note_durations", "prev_entry_off", "next_entry_off"]

drop_cols = ["pattern_matched", "part", "pattern_generating_match", "piece_title", "start_measure", "start_beat", "end_offset", "note_durations", "prev_entry_off", "next_entry_off"]

ready_classify = ["pattern_generating_match", "pattern_matched", "piece_title", "part", "start_measure", "start_beat", "start_offset", "sub_group_id"]

In [61]:
df['note_durations'] = df.note_durations.apply(ast.literal_eval)
df["sum_durs"] = df.note_durations.apply(sum)

In [62]:
# now process the original match data df by:
# sorting by start offset, then group by pattern generating match and applying the function above for prev entry
# also give each group a number
# sort by group number and start offset
# so that we can find PARALLEL (=0) Forward Gaps (the distance to NEXT entry), and Backward Gaps (distance to PREVIOUS Entry)
# All Proximate Matches are part of same sub Group (and go to classifier)
# Parallels are part of Sub Group but NOT part of Classified.  We will filter them OUT before classification
# Forward ONLY means the NEXT entry is a GAP.  No problem for Forward ONLY, since these are part of the previous set.
# Backward ONLY means the LAST entry was a GAP.  So these are NEW subgroups, since a new Presentation Type begins
# Forward AND Backward Gaps are SINGLETONS:  We will filter then OUT before Classification
# If an entry is the LAST in a Group and also has a BACKWARD gap it is also a SINGLETON

df2 = df
# df2 = df.sort_values("start_offset")

# Make Groups, Sort By Group and Offset, then and Add Previous/Next
df2["group_number"] = df2.groupby('pattern_generating_match').ngroup()
df2 = df2.sort_values(['group_number', 'start_offset'])
df2["prev_entry_off"] = df2["start_offset"].shift(1)
df2["next_entry_off"] = df2["start_offset"].shift(-1)


first_of_group = df2.drop_duplicates(subset=["pattern_generating_match"], keep='first').index
df2["is_first"] = df2.index.isin(first_of_group)
last_of_group = df2.drop_duplicates(subset=["pattern_generating_match"], keep='last').index
df2["is_last"] = df2.index.isin(last_of_group)

# Check Differences between Next and Last Offset

df2["last_off_diff"] = df2["start_offset"] - df2["prev_entry_off"]
df2["next_off_diff"] = df2["next_entry_off"] - df2["start_offset"]

# Find Parallel Entries 
df2["parallel"] = df2["last_off_diff"] == 0

# Set Gap Limits and Check Gaps Forward and Back
df2["forward_gapped"] = df2["next_off_diff"] >= 50
df2["back_gapped"] = df2["last_off_diff"] >= 50

# Find Singletons and Split Groups with Gaps
df2["singleton"] = ((df2['forward_gapped'] == True) & (df2['back_gapped'] == True) | (df2['back_gapped'] == True) & (df2["is_last"]))
df2["split_group"] = (df2['forward_gapped'] == False) & (df2['back_gapped'] == True)

#Mask Out Parallels and Singletons
df2 = df2[df2["parallel"] != True]
df2 = df2[df2["singleton"] != True]
df2["next_off_diff"] = df2["next_off_diff"].abs()
df2["last_off_diff"] = df2["last_off_diff"].abs()

# Find Final Groups
df2["combined_group"] = (df2.split_group | df2.is_first)
df2.loc[(df2["combined_group"]), "sub_group_id"] = range(df2.combined_group.sum())
df2["sub_group_id"] = df2["sub_group_id"].ffill()


df2.head()

Unnamed: 0,pattern_generating_match,pattern_matched,piece_title,part,start_measure,start_beat,start_offset,end_offset,note_durations,sum_durs,...,is_last,last_off_diff,next_off_diff,parallel,forward_gapped,back_gapped,singleton,split_group,combined_group,sub_group_id
277,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Bassus,31,1.0,240.0,250.0,"[3.0, 1.0, 2.0, 2.0, 2.0, 2.0]",12.0,...,False,,3.0,False,False,False,False,False,True,0.0
278,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Bassus,31,2.5,243.0,252.0,"[1.0, 2.0, 2.0, 2.0, 2.0, 2.0]",11.0,...,False,3.0,351.0,False,True,False,False,False,False,0.0
274,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Altus,73,2.0,594.0,602.0,"[4.0, 1.0, 1.0, 1.0, 1.0, 1.0]",9.0,...,False,351.0,4.0,False,False,True,False,True,True,1.0
275,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Altus,73,4.0,598.0,603.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]",6.0,...,False,4.0,1.0,False,False,False,False,False,False,1.0
276,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Altus,73,4.5,599.0,604.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 4.0]",9.0,...,False,1.0,75.0,False,True,False,False,False,False,1.0


In [63]:
# compact view for inspection of relevant columns


df3 = df2[ready_classify]
df3.head()

Unnamed: 0,pattern_generating_match,pattern_matched,piece_title,part,start_measure,start_beat,start_offset,sub_group_id
277,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Bassus,31,1.0,240.0,0.0
278,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Bassus,31,2.5,243.0,0.0
274,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Altus,73,2.0,594.0,1.0
275,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Altus,73,4.0,598.0,1.0
276,"[-2, -2, -2, -2, -2]","[-2, -2, -2, -2, -2]",Missa Ave Maria: Kyrie,Altus,73,4.5,599.0,1.0


In [65]:
# run the classifier on the newly curated list of groups

classified2 = df3.applymap(lists_to_tuples_a).groupby("sub_group_id").apply(predict_type_a)

# drop the new singletons

classified2.drop(classified2[classified2['predicted_type'] == "Singleton"].index, inplace = True)

# use the following to filter for particular types--PEN, ID, Fuga

# classified2 = classified2[classified2["predicted_type"] == "ID"]

classified2.head(50)

# output to csv
# classified2.to_csv('Mass_0005_1_Classified.csv')


Unnamed: 0,pattern_generating_match,pattern_matched,piece_title,part,start_measure,start_beat,start_offset,sub_group_id,predicted_type,offset_diffs,entry_number
17,"[-2, 2, -2, -2, -2]","[-2, 2, -2, -2, -2]",Missa Ave Maria: Kyrie,Altus,4,3.0,28.0,24.0,ID,"[20.0, 48.0, 20.0]",1
12,"[-2, 2, -2, -2, -2]","[-2, 2, -2, -2, -2]",Missa Ave Maria: Kyrie,Sup[erius],7,1.0,48.0,24.0,ID,"[20.0, 48.0, 20.0]",2
25,"[-2, 2, -2, -2, -2]","[-2, 2, -2, -2, -2]",Missa Ave Maria: Kyrie,Bassus,13,1.0,96.0,24.0,ID,"[20.0, 48.0, 20.0]",3
26,"[-2, 2, -2, -2, -2]","[-2, 2, -2, -2, -2]",Missa Ave Maria: Kyrie,Bassus,15,3.0,116.0,24.0,ID,"[20.0, 48.0, 20.0]",4
