In [1]:
from music21 import converter, note
import pandas as pd
import datetime
import time

In [2]:
piece = converter.parse("../data/others/example1.mxl")

# Extracting the data

In [3]:
notes = []
durations = []
phrases = []

currentColor = None
for element in piece.flat:
    if isinstance(element, note.Note) or isinstance(element, note.Rest): 
        if element.isRest:
            notes.append(128)
        else:
            notes.append(element.pitch.midi)
        durations.append(element.duration.quarterLength)
        if currentColor is None: # for the first note
            currentColor = element.style.color
            phrases.append(0)
            continue
        if str(currentColor) != str(element.style.color):
            currentColor = element.style.color
            phrases[-1] = 1
            phrases.append(0)
        else:
            phrases.append(0)
phrases[-1] = 1

### Convert music data into a log file

In [4]:
# cases
cases = []

# resources
resource_dict = { 1 : "voice1", 2 : "voice2", 3 : "voice3", 4 : "voice4" } 
resources = []

# activity
activities = []

# timestamp
timestamps = []
voice1_ts = int(time.time()) # epoch
voice2_ts = int(time.time())
voice3_ts = int(time.time())
voice4_ts = int(time.time())

phrase_counter = 1
diff = None
for i in range(len(phrases)):
    caseID = "ID{:06}".format(phrase_counter)
    cases.append(caseID)
    if diff is None:
        diff = note.Note("C2").pitch.midi - notes[i]
    # normalization
    if notes[i] == 128: # rest
        activities.append("rest")
    else:
        n = note.Note(notes[i] + diff) # each phrase should begin with C2
        activities.append(n.nameWithOctave)
    
    match phrase_counter%4+1:
        case 1:
            voice1_ts += durations[i] * 16 # for example times 16 (it does not matter)
            timestamps.append(datetime.datetime.fromtimestamp(voice1_ts))
            resources.append(resource_dict[1])
        case 2:
            voice2_ts += durations[i] * 16
            timestamps.append(datetime.datetime.fromtimestamp(voice2_ts))
            resources.append(resource_dict[2])
        case 3:
            voice3_ts += durations[i] * 16
            timestamps.append(datetime.datetime.fromtimestamp(voice3_ts))
            resources.append(resource_dict[3])
        case 4:
            voice4_ts += durations[i] * 16
            timestamps.append(datetime.datetime.fromtimestamp(voice4_ts))
            resources.append(resource_dict[4])

    if phrases[i] == 1:
        phrase_counter += 1
        diff = None

In [5]:
logs = pd.DataFrame( { "case:concept:name" : cases, "time:timestamp" : timestamps, "concept:name" : activities, "org:resource" : resources} )

In [6]:
logs.to_csv("ex1_music_log_data.csv")

### The phrases ID000001 and ID000014 are very similar (they should be identified as the same)

In [7]:
logs.loc[logs["case:concept:name"] == "ID000003"]

Unnamed: 0,case:concept:name,time:timestamp,concept:name,org:resource
13,ID000003,2023-05-03 16:58:32,C2,voice4
14,ID000003,2023-05-03 16:58:40,C2,voice4
15,ID000003,2023-05-03 16:58:48,E2,voice4
16,ID000003,2023-05-03 16:58:56,F2,voice4
17,ID000003,2023-05-03 16:59:04,A2,voice4
18,ID000003,2023-05-03 16:59:20,G2,voice4
19,ID000003,2023-05-03 16:59:28,E2,voice4


In [8]:
logs.loc[logs["case:concept:name"] == "ID000015"]

Unnamed: 0,case:concept:name,time:timestamp,concept:name,org:resource
102,ID000015,2023-05-03 17:01:52,C2,voice4
103,ID000015,2023-05-03 17:02:00,C2,voice4
104,ID000015,2023-05-03 17:02:08,E2,voice4
105,ID000015,2023-05-03 17:02:16,F2,voice4
106,ID000015,2023-05-03 17:02:24,A2,voice4
107,ID000015,2023-05-03 17:02:40,G2,voice4
108,ID000015,2023-05-03 17:02:48,E2,voice4
109,ID000015,2023-05-03 17:02:56,rest,voice4
