#### Presentation Types with Hidden Types

* March 2022 version

    * Uses getDistance to identify `close matches` with side-by-side comparison of soggetti.  With a distance of "1", the soggetti `4, 1, 2, 3`, and `5, 1, 2, 3` will count as the same.  These are reported as "flexed entries" in a separate column.

    * Labels Fuga, PEn, and ID according to time intervals.  
    * If two entries are separated by more than 10 bars (80 offsets), the tool resets to a new pattern
    * Finds time intervals between entries (expressed as offsets, like `8.0, 4.0, 8.0`)
    * Finds melodic intervals between first note of successive entries in each pattern (like `P-5, P-8`)
    * Counts number of entries
    * Provides offset and measure/beat locations
    * Sorts all presentation types by the order in which they appear in the piece
    * Reports voice names of the entries, in order of their appearance
    * Omits singleton soggetti (just one entry of a given motive in isolation)
    
    ALSO
    
    * Finds "hidden" types within a longer Fuga.  That is, if a 5-voice fuga also contains a PEN, it will label both of these as separate presentation type, along with all the relevant data noted above.

In [1]:
import intervals
from intervals import * 
from intervals import main_objs
import intervals.visualizations as viz
import pandas as pd
import re
import altair as alt 
from ipywidgets import interact
from pandas.io.json import json_normalize
from pyvis.network import Network
from IPython.display import display
import requests
import os
import numpy
import itertools
MYDIR = ("saved_csv")
CHECK_FOLDER = os.path.isdir(MYDIR)

# If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    os.makedirs(MYDIR)
    print("created folder : ", MYDIR)

else:
    print(MYDIR, "folder already exists.")

saved_csv folder already exists.


#### The following are special functions used by the classifier.  Don't change them.

In [2]:
def find_entry_int_distance(coordinates, piece: intervals.main_objs.ImportedPiece):
    
    """
    This function finds the melodic intervals between the first notes of 
    successive entries in a given presentation type.  
    They are represented as intervals with quality and direction, thus P-4, m3, P5, P5, M-9, P-4, P4
    
    """
    
    tone_list = []
    all_tones = piece.getNoteRest()
    
    for item in coordinates:
        filtered_tones = all_tones.loc[item] 
        tone_list.append(filtered_tones)
        
    noteObjects = [note.Note(tone) for tone in tone_list]
    _ints = [interval.Interval(noteObjects[i], noteObjects[i + 1]) for i in range(len(noteObjects) - 1)]
    entry_ints = []
    
    for _int in _ints:
        entry_ints.append(_int.directedName)
    
    return entry_ints

def split_by_threshold(seq, max_diff=70):  
    
    """
    This function finds gaps between sequences of matching melodic entries.  
    The threshold is set to 70 offsets by default--under about 10 measures.
    
    """
    it = iter(seq)
    last = next(it)
    part = [last]

    for curr in it:
        if curr - last > max_diff:
            yield part
            part = []

        part.append(curr)
        last = curr
#         print(part)
        
    yield part
    

def classify_by_offset(offset_diffs):
    """
    This function predicts the Presentation Types. It relies of the differences between 
    the first offsets of successive melodic entries. 
    
    If the offset differences are identical:  PEN
    If the odd-numbered offset differences are identical:  ID, since these represent
    situations in which the entries 1-2 have the same offset difference as entries 3-4
    If the offset differences are all different:  FUGA
    
    """
    alt_list = offset_diffs[::2]

    if len(set(offset_diffs)) == 1 and len(offset_diffs) > 1:
        return 'PEN'
    # elif (len(offset_difference_list) %2 != 0) and (len(set(alt_list)) == 1):
    elif (len(offset_diffs) % 2 != 0) and (len(set(alt_list)) == 1) and (len(offset_diffs) >= 3):
        return 'ID'
    elif len(offset_diffs) >= 1:
        return 'FUGA'

    

def temp_dict_of_details(slist, entry_array, det, matches):
    """
    This function assembles various features for the presentation types 
    into a single temporary dictionary, which in turn is appended to the dataframe of 'points'
    
    """
    
    array = entry_array[entry_array.index.get_level_values(0).isin(slist)]
    short_offset_list = array.index.to_list()
    time_ints = numpy.diff(array.index).tolist()
    voice_list = array['voice'].to_list()
    tone_coordinates =  list(zip(short_offset_list, voice_list))
    mel_ints = find_entry_int_distance(tone_coordinates, piece)
    first_offset = short_offset_list[0]
    meas_beat = det[det.index.get_level_values('Offset').isin(short_offset_list)]
    mb2 = meas_beat.reset_index()
    mb2['mb'] = mb2["Measure"].astype(str) + "/" + mb2["Beat"].astype(str)
    meas_beat_list = mb2['mb'].to_list()
    
    # temp results for this set
    temp = {'First_Offset': first_offset, 
                'Offsets': short_offset_list, 
                'Measures_Beats': meas_beat_list,
                "Soggetti": matches,
                'Voices': voice_list, 
                'Time_Entry_Intervals': time_ints, 
                'Melodic_Entry_Intervals': mel_ints}
    return temp

def classify_entries_as_presentation_types(piece):
    
    """
    This function uses several other functions to classify the entries in a given piece.
    The output is a list, in order of offset, of each presentation type, including information about
    measures/beats
    starting offset
    soggetti involved 
    melodic intervals of entry
    time intervals of entry
    
    
    """
    # Classifier with Functions
    points = pd.DataFrame()
    points2 = pd.DataFrame()
    # new_offset_list = []
    nr = piece.getNoteRest()
    det = piece.detailIndex(nr, offset=True)

    # durations and ngrams of durations
    dur = piece.getDuration(df=nr)
    dur_ng = piece.getNgrams(df=dur, n=4)

    # ngrams of melodic entries
    # for chromatic, use:
    # piece.getMelodicEntries(interval_settings=('c', True, True), n=5)
    mel = piece.getMelodicEntries(n=4)
    mels_stacked = mel.stack().to_frame()
    mels_stacked.rename(columns =  {0:"pattern"}, inplace = True)

    # edit distance, based on side-by-side comparison of melodic ngrams
    # gets flexed and other similar soggetti
    dist = piece.getDistance(mel)
    dist_stack = dist.stack().to_frame()


    # filter distances to threshold.  <2 is good
    filtered_dist_stack = dist_stack[dist_stack[0] < 2]
    filtered_dist = filtered_dist_stack.reset_index()
    filtered_dist.rename(columns =  {'level_0':"source", 'level_1':'match'}, inplace = True)

    # Group the filtered distanced patterns
    full_list_of_matches = filtered_dist.groupby('source')['match'].apply(list).reset_index()

    for matches in full_list_of_matches["match"]:
        related_entry_list = mels_stacked[mels_stacked['pattern'].isin(matches)]
        entry_array = related_entry_list.reset_index(level=1).rename(columns = {'level_1': "voice", 0: "pattern"})
        offset_list = entry_array.index.to_list()
        split_list = list(split_by_threshold(offset_list))
        # here is the list of starting offsets of the original set of entries:  slist
        slist = split_list[0]
        temp = temp_dict_of_details(slist, entry_array, det, matches)

        points = points.append(temp, ignore_index=True)
        points['Presentation_Type'] = points['Time_Entry_Intervals'].apply(classify_by_offset)
        points.drop_duplicates(subset=["First_Offset"], keep='first', inplace = True)
        points = points[points['Offsets'].apply(len) > 1]

        l = len(slist)
        if l > 2:
            for r in range(3, l):
    #             list_combinations = list(combinations(slist, r))
                list_combinations = list(combinations(slist, r))
                for slist in list_combinations:

                    temp = temp_dict_of_details(slist, entry_array, det, matches)

                    temp["Presentation_Type"] = classify_by_offset(temp['Time_Entry_Intervals'])

                    if 'PEN' in temp["Presentation_Type"]:
                        points2 = points2.append(temp, ignore_index=True)#.sort_values("First_Offset")
    #                     points = points.append(combo_temp, ignore_index=True).sort_values("First_Offset")
                        points2 = points2[points2['Offsets'].apply(len) > 1]
                    if 'ID' in temp["Presentation_Type"]:
                        points2 = points2.append(combo_temp, ignore_index=True)#.sort_values("First_Offset")
    #                     points = points.append(combo_temp, ignore_index=True).sort_values("First_Offset")
#                 points2.sort_values("First_Offset")
                points2.drop_duplicates(subset=["First_Offset"], keep='first', inplace = True)

    points_combined = points.append(points2, ignore_index=True).sort_values("First_Offset").reset_index(drop=True)
    points_combined['Flexed_Entries'] = points_combined["Soggetti"].apply(len) > 1
    points_combined["Number_Entries"] = points_combined["Offsets"].apply(len)     
    return points_combined

## Load the Piece Here

* Note that you can load from CRIM, or put a file in the **Music_Files** folder in the Notebook.

In [3]:
# piece = importScore('Music_Files/Senfl_Ave_forCRIM.mei_msg.mei')
piece = importScore('https://crimproject.org/mei/CRIM_Mass_0013_3.mei')
# piece = importScore('Music_Files/CRIM_Mass_0007_4.mei')


Downloading remote score...
Successfully imported https://crimproject.org/mei/CRIM_Mass_0013_3.mei


## Run the Classifier Here.  



In [4]:
classify_entries_as_presentation_types(piece)

Unnamed: 0,First_Offset,Measures_Beats,Melodic_Entry_Intervals,Offsets,Soggetti,Time_Entry_Intervals,Voices,Presentation_Type,Flexed_Entries,Number_Entries
0,0.0,"[1/1.0, 3/1.0, 8/3.0, 10/1.0]","[P-4, P-5, P-4]","[0.0, 16.0, 60.0, 72.0]","[5, -2, 2, 2, 4, -2, 2, 2]","[16.0, 44.0, 12.0]","[Superius, Contratenor, Tenor, Bassus]",FUGA,True,4
1,38.0,"[5/4.0, 7/2.0, 14/4.0, 15/4.0, 16/2.0, 18/2.0]","[P-5, M2, P-5, P-4, P11]","[38.0, 50.0, 110.0, 118.0, 122.0, 138.0]","[-3, 2, -2, -2, -3, 2, -2, -3]","[12.0, 60.0, 8.0, 4.0, 16.0]","[Superius, Contratenor, Superius, Tenor, Bassu...",FUGA,True,6
2,94.0,"[12/4.0, 13/4.0, 17/2.0]","[P4, M2]","[94.0, 102.0, 130.0]","[-3, 2, -2, 2, -3, 2, -3, 2]","[8.0, 28.0]","[Tenor, Contratenor, Contratenor]",FUGA,True,3
3,162.0,"[21/2.0, 22/4.0]",[P5],"[162.0, 174.0]","[1, 3, -2, -2]",[12.0],"[Contratenor, Superius]",FUGA,False,2
4,198.0,"[25/4.0, 27/2.0]",[P5],"[198.0, 210.0]","[3, -2, -2, -2, 1, -2, -2, -2, 2, -2, -2, -2]",[12.0],"[Bassus, Tenor]",FUGA,True,2
5,222.0,"[28/4.0, 30/4.0, 35/4.0, 36/4.0, 38/2.0]","[P4, P-8, P8, P-8]","[222.0, 238.0, 278.0, 286.0, 298.0]","[2, -3, 5, 1]","[16.0, 40.0, 8.0, 12.0]","[Contratenor, Superius, Contratenor, Superius,...",FUGA,False,5
6,258.0,"[33/2.0, 34/2.0]",[P8],"[258.0, 266.0]","[2, -3, 5, -5]",[8.0],"[Bassus, Tenor]",FUGA,False,2
7,314.0,"[40/2.0, 40/4.0, 41/4.0, 44/4.0]","[P4, P-8, P12]","[314.0, 318.0, 326.0, 350.0]","[1, 1, 2, 1, 1, 1, 2, 2]","[4.0, 8.0, 24.0]","[Tenor, Contratenor, Bassus, Superius]",FUGA,True,4
8,330.0,"[42/2.0, 43/4.0, 47/4.0, 49/2.0, 49/4.0]","[P-5, P8, P-4, P4]","[330.0, 342.0, 374.0, 386.0, 390.0]","[1, -2, -2, -2, 2, -2, -2, -2]","[12.0, 32.0, 12.0, 4.0]","[Tenor, Bassus, Tenor, Bassus, Contratenor]",FUGA,True,5
9,410.0,"[52/2.0, 53/2.0, 54/4.0, 55/4.0, 56/4.0, 57/4....","[P-5, P-4, P-5, P8, P1, P-5]","[410.0, 418.0, 430.0, 438.0, 446.0, 454.0, 462.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 12.0, 8.0, 8.0, 8.0, 8.0]","[Superius, Contratenor, Tenor, Bassus, Contrat...",FUGA,True,7


In [17]:
output = classify_entries_as_presentation_types(piece)
output

Unnamed: 0,First_Offset,Measures_Beats,Melodic_Entry_Intervals,Offsets,Soggetti,Time_Entry_Intervals,Voices,Presentation_Type,Flexed_Entries,Number_Entries
0,0.0,"[1/1.0, 3/1.0, 8/3.0, 10/1.0]","[P-4, P-5, P-4]","[0.0, 16.0, 60.0, 72.0]","[5, -2, 2, 2, 4, -2, 2, 2]","[16.0, 44.0, 12.0]","[Superius, Contratenor, Tenor, Bassus]",FUGA,True,4
1,38.0,"[5/4.0, 7/2.0, 14/4.0, 15/4.0, 16/2.0, 18/2.0]","[P-5, M2, P-5, P-4, P11]","[38.0, 50.0, 110.0, 118.0, 122.0, 138.0]","[-3, 2, -2, -2, -3, 2, -2, -3]","[12.0, 60.0, 8.0, 4.0, 16.0]","[Superius, Contratenor, Superius, Tenor, Bassu...",FUGA,True,6
2,94.0,"[12/4.0, 13/4.0, 17/2.0]","[P4, M2]","[94.0, 102.0, 130.0]","[-3, 2, -2, 2, -3, 2, -3, 2]","[8.0, 28.0]","[Tenor, Contratenor, Contratenor]",FUGA,True,3
3,162.0,"[21/2.0, 22/4.0]",[P5],"[162.0, 174.0]","[1, 3, -2, -2]",[12.0],"[Contratenor, Superius]",FUGA,False,2
4,198.0,"[25/4.0, 27/2.0]",[P5],"[198.0, 210.0]","[3, -2, -2, -2, 1, -2, -2, -2, 2, -2, -2, -2]",[12.0],"[Bassus, Tenor]",FUGA,True,2
5,222.0,"[28/4.0, 30/4.0, 35/4.0, 36/4.0, 38/2.0]","[P4, P-8, P8, P-8]","[222.0, 238.0, 278.0, 286.0, 298.0]","[2, -3, 5, 1]","[16.0, 40.0, 8.0, 12.0]","[Contratenor, Superius, Contratenor, Superius,...",FUGA,False,5
6,258.0,"[33/2.0, 34/2.0]",[P8],"[258.0, 266.0]","[2, -3, 5, -5]",[8.0],"[Bassus, Tenor]",FUGA,False,2
7,314.0,"[40/2.0, 40/4.0, 41/4.0, 44/4.0]","[P4, P-8, P12]","[314.0, 318.0, 326.0, 350.0]","[1, 1, 2, 1, 1, 1, 2, 2]","[4.0, 8.0, 24.0]","[Tenor, Contratenor, Bassus, Superius]",FUGA,True,4
8,330.0,"[42/2.0, 43/4.0, 47/4.0, 49/2.0, 49/4.0]","[P-5, P8, P-4, P4]","[330.0, 342.0, 374.0, 386.0, 390.0]","[1, -2, -2, -2, 2, -2, -2, -2]","[12.0, 32.0, 12.0, 4.0]","[Tenor, Bassus, Tenor, Bassus, Contratenor]",FUGA,True,5
9,410.0,"[52/2.0, 53/2.0, 54/4.0, 55/4.0, 56/4.0, 57/4....","[P-5, P-4, P-5, P8, P1, P-5]","[410.0, 418.0, 430.0, 438.0, 446.0, 454.0, 462.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 12.0, 8.0, 8.0, 8.0, 8.0]","[Superius, Contratenor, Tenor, Bassus, Contrat...",FUGA,True,7


In [12]:
output = output.loc[output['Presentation_Type'] == "PEN"] 
output

Unnamed: 0,First_Offset,Measures_Beats,Melodic_Entry_Intervals,Offsets,Soggetti,Time_Entry_Intervals,Voices,Presentation_Type,Flexed_Entries,Number_Entries
10,430.0,"[54/4.0, 55/4.0, 56/4.0]","[P-5, P8]","[430.0, 438.0, 446.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 8.0]","[Tenor, Bassus, Contratenor]",PEN,True,3
11,438.0,"[55/4.0, 56/4.0, 57/4.0]","[P8, P1]","[438.0, 446.0, 454.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 8.0]","[Bassus, Contratenor, Superius]",PEN,True,3
12,446.0,"[56/4.0, 57/4.0, 58/4.0]","[P1, P-5]","[446.0, 454.0, 462.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 8.0]","[Contratenor, Superius, Tenor]",PEN,True,3
15,526.0,"[66/4.0, 68/4.0, 70/4.0]","[P8, P-5]","[526.0, 542.0, 558.0]","[-2, 2, 2, 2]","[16.0, 16.0]","[Tenor, Superius, Contratenor]",PEN,False,3
16,542.0,"[68/4.0, 71/2.0, 73/4.0]","[P-12, P12]","[542.0, 562.0, 582.0]","[-2, 2, 2, 2]","[20.0, 20.0]","[Superius, Bassus, Superius]",PEN,False,3
17,558.0,"[70/4.0, 72/2.0, 73/4.0]","[P-4, P8]","[558.0, 570.0, 582.0]","[-2, 2, 2, 2]","[12.0, 12.0]","[Contratenor, Tenor, Superius]",PEN,False,3
28,1398.0,"[170/4.0, 172/2.0, 173/4.0, 175/2.0]","[P5, P5, P4]","[1398.0, 1410.0, 1422.0, 1434.0]","[1, 1, -5, 3]","[12.0, 12.0, 12.0]","[Bassus, Tenor, Contratenor, Superius]",PEN,False,4
29,1398.0,"[170/4.0, 172/2.0, 173/4.0]","[P5, P5]","[1398.0, 1410.0, 1422.0]","[1, 1, -5, 3]","[12.0, 12.0]","[Bassus, Tenor, Contratenor]",PEN,False,3
30,1410.0,"[172/2.0, 173/4.0, 175/2.0]","[P5, P4]","[1410.0, 1422.0, 1434.0]","[1, 1, -5, 3]","[12.0, 12.0]","[Tenor, Contratenor, Superius]",PEN,False,3
32,1506.0,"[184/2.0, 185/4.0, 187/2.0]","[P1, P5]","[1506.0, 1518.0, 1530.0]","[3, 3, 1, -2]","[12.0, 12.0]","[Tenor, Bassus, Contratenor]",PEN,False,3


In [20]:
filtered = output.loc[output['Number_Entries'] < 4] 
filtered

Unnamed: 0,First_Offset,Measures_Beats,Melodic_Entry_Intervals,Offsets,Soggetti,Time_Entry_Intervals,Voices,Presentation_Type,Flexed_Entries,Number_Entries
2,94.0,"[12/4.0, 13/4.0, 17/2.0]","[P4, M2]","[94.0, 102.0, 130.0]","[-3, 2, -2, 2, -3, 2, -3, 2]","[8.0, 28.0]","[Tenor, Contratenor, Contratenor]",FUGA,True,3
3,162.0,"[21/2.0, 22/4.0]",[P5],"[162.0, 174.0]","[1, 3, -2, -2]",[12.0],"[Contratenor, Superius]",FUGA,False,2
4,198.0,"[25/4.0, 27/2.0]",[P5],"[198.0, 210.0]","[3, -2, -2, -2, 1, -2, -2, -2, 2, -2, -2, -2]",[12.0],"[Bassus, Tenor]",FUGA,True,2
6,258.0,"[33/2.0, 34/2.0]",[P8],"[258.0, 266.0]","[2, -3, 5, -5]",[8.0],"[Bassus, Tenor]",FUGA,False,2
10,430.0,"[54/4.0, 55/4.0, 56/4.0]","[P-5, P8]","[430.0, 438.0, 446.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 8.0]","[Tenor, Bassus, Contratenor]",PEN,True,3
11,438.0,"[55/4.0, 56/4.0, 57/4.0]","[P8, P1]","[438.0, 446.0, 454.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 8.0]","[Bassus, Contratenor, Superius]",PEN,True,3
12,446.0,"[56/4.0, 57/4.0, 58/4.0]","[P1, P-5]","[446.0, 454.0, 462.0]","[4, 2, 2, -3, 5, 2, 2, -3]","[8.0, 8.0]","[Contratenor, Superius, Tenor]",PEN,True,3
15,526.0,"[66/4.0, 68/4.0, 70/4.0]","[P8, P-5]","[526.0, 542.0, 558.0]","[-2, 2, 2, 2]","[16.0, 16.0]","[Tenor, Superius, Contratenor]",PEN,False,3
16,542.0,"[68/4.0, 71/2.0, 73/4.0]","[P-12, P12]","[542.0, 562.0, 582.0]","[-2, 2, 2, 2]","[20.0, 20.0]","[Superius, Bassus, Superius]",PEN,False,3
17,558.0,"[70/4.0, 72/2.0, 73/4.0]","[P-4, P8]","[558.0, 570.0, 582.0]","[-2, 2, 2, 2]","[12.0, 12.0]","[Contratenor, Tenor, Superius]",PEN,False,3


#### Below is Development Work

In [None]:
# this works with ONE list of offsets

points2 = pd.DataFrame()
split_list = [90.0, 94.0, 102.0, 106.0, 134.0, 146.0, 162.0]

l = len(split_list)  
for r in range(3, l):
    list_combinations = list(combinations(split_list, r))
#             combo_time_ints = []
    for combo in list_combinations:
        combo_time_ints = numpy.diff(combo).tolist()
        combo_array = entry_array[entry_array.index.get_level_values(0).isin(combo)]
        combo_voice_list = combo_array['voice'].to_list()
        combo_patterns = combo_array['pattern']
        unique_combo_patterns = list(set(combo_patterns))
        tone_coordinates =  list(zip(combo, combo_voice_list))
# tone_coordinates.ffill(inplace=True)
        mel_ints = find_entry_int_distance(tone_coordinates, piece)
        hidden_type = classify_by_offset(combo_time_ints)

        meas_beat = det[det.index.get_level_values('Offset').isin(combo)]
        mb2 = meas_beat.reset_index()
        mb2['mb'] = mb2["Measure"].astype(str) + "/" + mb2["Beat"].astype(str)
        meas_beat_list = mb2['mb'].to_list()

        combo_temp = {'First_Offset': combo[0], 
            'Offsets': combo, 
            'Measures_Beats': meas_beat_list,
            'Presentation_Type': hidden_type,
            "Soggetti": unique_combo_patterns,
            'Voices': combo_voice_list, 
            'Time_Entry_Intervals': combo_time_ints, 
            'Melodic_Entry_Intervals': mel_ints}

        if 'PEN' in hidden_type:
            points2 = points2.append(combo_temp, ignore_index=True).sort_values("First_Offset")
#             points2 = points2[points2['Offsets'].apply(len) > 1]
        if 'ID' in hidden_type:
            points2 = points2.append(combo_temp, ignore_index=True).sort_values("First_Offset")
#             points2 = points2[points2['Offsets'].apply(len) > 1]
        
        
# combo_time_ints
# combo_array
# # combo_voice_list
# # combo_patterns
# # unique_combo_patterns
# # tone_coordinates
# # mel_ints
# # combo_temp
points2

In [None]:
# this finds hidden fugas.  
# try to run each of the first set of results above ('points') through this tool, then append the 
# new results to the full DF, and sort again.  
# mark each long pattern with 'has hidden pattern' boolean?  or ?

sample_list = points["Offsets"][4]

hidden_pts = []
n = len(sample_list)
for item in range(3, n):
    list_combinations = list(combinations(sample_list, item))
    for group in list_combinations:
        group_time_ints = numpy.diff(group).tolist()
        hidden_type = classify_by_offset(group_time_ints)
        if 'PEN' in hidden_type:
            print(group)
            print(group_time_ints)
            print(hidden_type)
            hidden_pts.append(group_time_ints)
        if 'ID' in hidden_type:
            print(group)
            print(group_time_ints)
            print(hidden_type)
            hidden_pts.append(group_time_ints)
        

list_combinations

In [None]:
def classify_entries_as_presentation_types(piece):
    # Classifier with Functions
    points = pd.DataFrame()
    points2 = pd.DataFrame()
    # new_offset_list = []
    nr = piece.getNoteRest()
    det = piece.detailIndex(nr, offset=True)

    # durations and ngrams of durations
    dur = piece.getDuration(df=nr)
    dur_ng = piece.getNgrams(df=dur, n=4)

    # ngrams of melodic entries
    # for chromatic, use:
    # piece.getMelodicEntries(interval_settings=('c', True, True), n=5)
    mel = piece.getMelodicEntries(n=4)
    mels_stacked = mel.stack().to_frame()
    mels_stacked.rename(columns =  {0:"pattern"}, inplace = True)

    # edit distance, based on side-by-side comparison of melodic ngrams
    # gets flexed and other similar soggetti
    dist = piece.getDistance(mel)
    dist_stack = dist.stack().to_frame()


    # filter distances to threshold.  <2 is good
    filtered_dist_stack = dist_stack[dist_stack[0] < 2]
    filtered_dist = filtered_dist_stack.reset_index()
    filtered_dist.rename(columns =  {'level_0':"source", 'level_1':'match'}, inplace = True)

    # Group the filtered distanced patterns
    full_list_of_matches = filtered_dist.groupby('source')['match'].apply(list).reset_index()

    for matches in full_list_of_matches["match"]:
        related_entry_list = mels_stacked[mels_stacked['pattern'].isin(matches)]
        entry_array = related_entry_list.reset_index(level=1).rename(columns = {'level_1': "voice", 0: "pattern"})
        offset_list = entry_array.index.to_list()
        split_list = list(split_by_threshold(offset_list))
        # here is the list of starting offsets of the original set of entries:  slist
        slist = split_list[0]
        temp = temp_dict_of_details(slist, entry_array, det, matches)

        points = points.append(temp, ignore_index=True)
        points['Presentation_Type'] = points['Time_Entry_Intervals'].apply(classify_by_offset)
        points.drop_duplicates(subset=["First_Offset"], keep='first', inplace = True)
        points = points[points['Offsets'].apply(len) > 1]

        l = len(slist)
        if l > 2:
            for r in range(3, l):
    #             list_combinations = list(combinations(slist, r))
                list_combinations = list(combinations(slist, r))
                for slist in list_combinations:

                    temp = temp_dict_of_details(slist, entry_array, det, matches)

                    temp["Presentation_Type"] = classify_by_offset(temp['Time_Entry_Intervals'])

                    if 'PEN' in temp["Presentation_Type"]:
                        points2 = points2.append(temp, ignore_index=True)#.sort_values("First_Offset")
    #                     points = points.append(combo_temp, ignore_index=True).sort_values("First_Offset")
                        points2 = points2[points2['Offsets'].apply(len) > 1]
                    if 'ID' in temp["Presentation_Type"]:
                        points2 = points2.append(combo_temp, ignore_index=True)#.sort_values("First_Offset")
    #                     points = points.append(combo_temp, ignore_index=True).sort_values("First_Offset")
                points2.sort_values("First_Offset")
                points2.drop_duplicates(subset=["First_Offset"], keep='first', inplace = True)

    points_combined = points.append(points2, ignore_index=True).sort_values("First_Offset").reset_index(drop=True)
    points_combined['Flexed_Entries'] = points_combined["Soggetti"].apply(len) > 1
    points_combined["Number_Entries"] = points["Offsets"].apply(len)     
    return points2


In [None]:
# This test works


l = len(split_list[0])  
for item in range(3, l):
    list_combinations = list(combinations(sample_list, item))
    for group in list_combinations:
        group_time_ints = numpy.diff(group).tolist()
        hidden_type = classify_by_offset(group_time_ints)
        for item in group:
#         print(item)
        array = group[entry_array.index.get_level_values(0).isin(item)]
        short_offset_list = array.index.to_list()
        time_ints = numpy.diff(array.index).tolist()
        voice_list = array['voice'].to_list()
        if 'PEN' in hidden_type:
            print(group)
            print(group_time_ints)
            print(hidden_type)
            hidden_pts.append(group_time_ints)
        if 'ID' in hidden_type:
            print(group)
            print(group_time_ints)
            print(hidden_type)
            hidden_pts.append(group_time_ints)
# len(split_list[0])           