In [1]:
from music21 import *
import music21
import os
import glob
import re
import numpy as np
import math

In [2]:
#global functions
def cal_offset(e):
    if e is None:
        return 0
    return e.offset+cal_offset(e.activeSite)

In [3]:
class Score:
    def __init__(self):
        self.beat_list=[Beat()]
        
    #extract info from note and add to corrsponding beat
    def add_note(self,note):
        length=note.quarterLength
        start=cal_offset(note)
        end=start+length
        rounded_floor_start=math.floor(start)
        #loop until the note played to its end
        while start<end-0.000000000001:
            if len(self.beat_list)-1<rounded_floor_start:
                new_beat=rounded_floor_start-(len(self.beat_list)-1)
                #the input note maybe is a chord -> recurse all pitch inside
                for _ in range(new_beat):
                    self.beat_list.append(Beat())
            self.beat_list[rounded_floor_start].add_note(note,min(rounded_floor_start+1-start,end-start))
            start+=min(rounded_floor_start+1-start,end-start)
            rounded_floor_start=int(start)
            
    #add key to the first occurence of beat
    def add_key(self,note):
        assert(note.lyric is not None and '(' in note.lyric)
        key_change_beat=cal_offset(note)
        rounded_floor_key_change_beat=math.floor(key_change_beat)
        self.beat_list[rounded_floor_key_change_beat].add_key(note.lyric.split('(')[0])
        
    #onyl call once
    def infer_key(self):
        first_key_in_num=None
        first_key_full=None
        first_key_major=None
        #backtrack
        for e in self.beat_list:
            if e.key_full is not None:
                first_key_full=e.key_full
                first_key_in_num=e.key_in_num
                first_key_major=e.major
                break
        #bring forward
        for e in self.beat_list:
            if e.key_full is None:
                e.key_full=first_key_full
                e.key_in_num=first_key_in_num
                e.major=first_key_major
            else:
                first_key_full=e.key_full
                first_key_in_num=e.key_in_num
                first_key_major=e.major
                
class Beat:
    key_mapping={
        'C':0,
        'D':2,
        'E':4,
        'F':5,
        'G':7,
        'A':9,
        'B':11
    }
    def __init__(self):
        self.notes = np.zeros((12,7))  #from C1 to C7
        self.total_duration = np.zeros((12,7))
        self.notes_occurences_count= np.zeros((12,7))
        self.key_full=None
        self.major=None
        self.key_in_num=None
    def key2num(self,k):  
        k=k.upper()
        num=self.key_mapping[k[0]]
        modifier=len(k)
        if modifier==1:
            return num
        elif k[1]=='#':
            return (num+(modifier-1))%12
        elif k[1]=='B' or k[1]=='-' or k[1]=='♭':
            return (num-(modifier-1))%12
        elif k[1]=='X':
            return (num+(modifier-1)*2)%12
    def add_note(self,note,duration):
        assert(duration<=1)
        pitches=note.pitches
        for pitch in pitches:
            pitch_idx=self.key2num(pitch.nameWithOctave[:-1])
            octave=int(pitch.nameWithOctave[-1])-1
            if octave<0:
                octave=0
            elif octave>6:
                octave=6
            self.notes[pitch_idx,octave]=1
            self.total_duration[pitch_idx,octave]+=duration
            self.notes_occurences_count[pitch_idx,octave]+=1
            
    def add_key(self,k):
        self.major = 'M' in k
        self.key_full=k
        k=k[:-1]
        self.key_in_num=self.key2num(k)

In [4]:
#load data beat by beat
all_score_train=[]
all_score_test=[]
all_score=[]
files=["../musicxml(train)/*.mxl","../musicxml(test)/*.mxl"]
for piece in glob.glob(files[0]):
    all_score.append(piece)
    all_beat_train=Score()
    all_score_train.append(all_beat_train)
    print('train',piece)
    chords = []
    notes = []
    c = converter.parse(piece)
    post = c.flat

    #extract note
    for note in post.notes:
        all_beat_train.add_note(note)
        if note.lyric is not None and '(' in note.lyric:
            all_beat_train.add_key(note)

    all_beat_train.infer_key()
for piece in glob.glob(files[1]):
    all_score.append(piece)
    all_beat_test=Score()
    all_score_test.append(all_beat_test)
    print('test',piece)
    chords = []
    notes = []
    c = converter.parse(piece)
    post = c.flat

    #extract note
    for note in post.notes:
        all_beat_test.add_note(note)
        if note.lyric is not None and '(' in note.lyric:
            all_beat_test.add_key(note)

    all_beat_test.infer_key()

train ../musicxml(train)\Fugue_in_G_Minor.mxl
train ../musicxml(train)\G_minor.mxl
train ../musicxml(train)\Menuet_in_G_Minor.mxl
train ../musicxml(train)\Minuet_in_F.mxl
train ../musicxml(train)\Minuet_in_G_Major.mxl
train ../musicxml(train)\Moonlight_Sonata_1st_Movement.mxl
train ../musicxml(train)\Musette_in_D.mxl
train ../musicxml(train)\Nocturne_in_B_Major.mxl
train ../musicxml(train)\Nocturne_in_C#_Minor.mxl
train ../musicxml(train)\Nocturne_in_Eb_Major.mxl
train ../musicxml(train)\Nocturne_in_E_Minor.mxl
train ../musicxml(train)\Nocturne_in_F#_Major.mxl
train ../musicxml(train)\Nocturne_no._1.mxl
train ../musicxml(train)\Nocturne_No._20_in_C#_Minor.mxl
train ../musicxml(train)\Prélude_in_A_Major.mxl
train ../musicxml(train)\Prélude_in_B_Major.mxl
train ../musicxml(train)\Prélude_in_B_Minor.mxl
train ../musicxml(train)\Prélude_in_B_Minor_op104a.mxl
train ../musicxml(train)\Prélude_in_Db_Major.mxl
train ../musicxml(train)\Prélude_in_E_Minor.mxl
train ../musicxml(train)\Prélude_in_

In [5]:
#turns each beat to a vector
weight=[1.35,1.25,0.9,0.95,0.8,0.75,0.7]

trainX=[]
trainY=[]
for e in all_score_train:
    tempX=[]
    tempY=[]
    count=0
    for beat in e.beat_list:
        value=beat.total_duration*beat.notes_occurences_count
        if np.sum(value)!=0:
            value/=np.sum(value)
            value*=weight
            value=value.sum(axis=1)
            #value=value.reshape((-1))
            value/=value.sum()
        else:
            value=np.zeros((12))
            
        assert(len(value)==12)
        tempX.append(value)
        
        prepare_y=np.zeros((13,1))
        prepare_y[-1]=beat.major*1
        prepare_y[beat.key_in_num]=1
        assert(len(prepare_y)==13)
        tempY.append(prepare_y)
        count+=1
    trainX.append(tempX)
    trainY.append(tempY)

testX=[]
testY=[]
for e in all_score_test:
    tempX=[]
    tempY=[]
    count=0
    for beat in e.beat_list:
        value=beat.total_duration*beat.notes_occurences_count
        if np.sum(value)!=0:
            value/=np.sum(value)
            value*=weight
            value=value.sum(axis=1)
            #value=value.reshape((-1))
            value/=value.sum()
        else:
            value=np.zeros((12))
            
        assert(len(value)==12)
        tempX.append(value)
        
        prepare_y=np.zeros((13,1))
        prepare_y[-1]=beat.major*1
        prepare_y[beat.key_in_num]=1
        assert(len(prepare_y)==13)
        tempY.append(prepare_y)
        count+=1
    testX.append(tempX)
    testY.append(tempY)

In [6]:
#generate training segments, ordered in score
look_forward=4
look_after=4
dataX,dataY=[],[] #train
dataXX,dataYY=[],[] #test
for idx_p,piece in enumerate(trainX):
    piece_notesX=[]
    piece_notesY=[]
    for idx_b,beat in enumerate(piece):
        tempX=[]
        tempfront=[]
        tempend=[]
        for i in reversed(range(1,look_forward+1)):
            if(idx_b-i)<0:
                tempfront.append(np.zeros(12))
            else:
                tempfront.append(np.array(piece[idx_b-i]))
        tempfront=np.array(tempfront)
        tempfront=np.sum(tempfront,axis=0)
          
        tempX.append(tempfront)
        tempX.append(piece[idx_b])

        for i in range(1,look_after+1):
            if(idx_b+i)>len(piece)-1:
                tempend.append(np.zeros(12))
            else:
                tempend.append(np.array(piece[idx_b+i]))

        tempend=np.sum(tempend,axis=0) 
        tempX.append(tempend)

        piece_notesX.append(tempX)
        piece_notesY.append(trainY[idx_p][idx_b])
    dataX.append(piece_notesX)
    dataY.append(piece_notesY)

for idx_p,piece in enumerate(testX):
    piece_notesX=[]
    piece_notesY=[]
    for idx_b,beat in enumerate(piece):
        tempX=[]
        tempfront=[]
        tempend=[]
        for i in reversed(range(1,look_forward+1)):
            if(idx_b-i)<0:
                tempfront.append(np.zeros(12))
            else:
                tempfront.append(np.array(piece[idx_b-i]))
        tempfront=np.array(tempfront)
        tempfront=np.sum(tempfront,axis=0)
          
        tempX.append(tempfront)
        tempX.append(piece[idx_b])

        for i in range(1,look_after+1):
            if(idx_b+i)>len(piece)-1:
                tempend.append(np.zeros(12))
            else:
                tempend.append(np.array(piece[idx_b+i]))

        tempend=np.sum(tempend,axis=0) 
        tempX.append(tempend)

        piece_notesX.append(tempX)
        piece_notesY.append(testY[idx_p][idx_b])
    dataXX.append(piece_notesX)
    dataYY.append(piece_notesY)

In [7]:
#turn the training/testing data(ordered in score) to tranable format(ordered in segments)
def trainable(X,Y):
  retX=[ b for x in X for b in x ]
  retX=np.array(retX)
  retY=[b for y in Y for b in y]
  retY=np.array(retY).reshape((-1,13))
  return retX,retY

In [8]:
#LSTM model (Key prediction)

In [9]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

In [10]:
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.layers import Bidirectional

In [11]:
#lstm model
def get_model():
  in_data = Input(shape=(3,12))

  lstm = LSTM(128,return_sequences=True)(in_data)#
  lstm = LSTM(12)(lstm)
  lstm = Flatten()(lstm)


  #lstm_2 = Dense(4,activation='relu')(lstm)
  #lstm_2 = Dense(2,activation='relu')(lstm_2)
  output2=Dense(1,activation='sigmoid',name='majorPrediction')(lstm)

  keyclassifier = Dense(64,activation='relu')(lstm)
  output=Dense(12,activation='softmax',name='keyPrediction')(lstm)
  

  model = Model(inputs=in_data, outputs=[output,output2])

  losses ={
          'keyPrediction':'categorical_crossentropy',
          'majorPrediction':'binary_crossentropy'    
        }

  lossWeights={
          'keyPrediction':0.7,
          'majorPrediction':0.3  
        }
      
  model.compile(  loss=losses,
                loss_weights= lossWeights,
                optimizer='adam',
                metrics=['accuracy'])
  return model

callback=EarlyStopping(
    monitor='val_loss', min_delta=0, patience=30, verbose=2, mode='auto',
    baseline=None, restore_best_weights=True)

In [12]:
#HMM model(chord prediction)
import sys
sys.path.append('../modules')
import HMM
h_states=[#Minor
      'MinorI', 'MinorI+',
      'MinorbII', 'MinorII',
      'MinorIII',
      'MinorIV', 'MinorIV+',
      'MinorV', 'MinorV+',
      'MinorVI', 'MinorGerVI', 'MinorFreVI', 'MinorItaVI',
      'MinorVII', 'MinorDimVII',
      #Major
      'MajorI',
      'MajorbII','MajorII',
      'MajorIII',
      'MajorIV',
      'MajorV',
      'MajorbVI','MajorGerVI','MajorFreVI','MajorItaVI','MajorVI',
      'MajorVII'
]

def prepareHMM(K_fold_selection):
  #  print(all_score[K_fold_selection[0]])
    pieces=[all_score[e] for e in K_fold_selection]
    HMM_data=[]
    current_chord=None
    current_key=None
    HMM_notes=[]
    HMM_chord=[]
    HMM_beats=[]
    for piece in pieces:
        data_chord=[]
        data_notes=[]
        data_beats=[]
        c = converter.parse(piece)
        all_notes=[]
        for el in c.recurse().notes:
            if el.lyric is not None:
                el.lyric=el.lyric.replace('♭','b')
            all_notes.append([el.lyric, el, cal_offset(el),el.duration.linked])
        
        
        #sort by first occurence
        b=sorted(all_notes,key=lambda x: (x[-2],x[0] if x[0] is not None else "ZZZZZZZZZZZZZZZ"))
        data_note={}
        for e in b:
   
            if current_chord is None and e[0] is None:
                continue
            elif e[0] is not None:
                if '(' in e[0]:
                    current_key=e[0].split('(')[0]
                    current_chord=e[0].split('(')[1].split(')')[0]
                else:
                    current_key=current_key
                    current_chord=e[0]
             
                major='M' if 'M' in current_key else 'm'
                
                chord=current_key[:-1]+major+'_'+current_chord
                data_chord.append(chord)
                data_beats.append(e[2])
                s=0
                for k,v in data_note.items():
                    s+=v
                for k,v in data_note.items():
                    data_note[k]=v/s
                data_notes.append(data_note)
                data_note={}

            for pitch in e[1].pitches:
                if pitch.name in data_note:
                    data_note[pitch.name]+=e[1].quarterLength*weight[int(pitch.nameWithOctave[-1])-1]
                    data_note[pitch.name]*=1.2 #reward for occurence
                else:
                    data_note[pitch.name]=e[1].quarterLength*weight[int(pitch.nameWithOctave[-1])-1]

        data_notes=data_notes[1:]
        data_notes.append(data_note)
        HMM_notes.append(data_notes)
        HMM_chord.append(data_chord)
        HMM_beats.append(data_beats)
        
        
    return HMM_notes,HMM_chord,HMM_beats

In [13]:
#tools for evaluation
from chordToNote import *
chords=['C','Db','D','Eb','E','F','F#','G','Ab','A','Bb','B']
M=['Minor','Major']

In [None]:
#K- fold crossvalidation
from sklearn.model_selection import KFold

num_folds=10
kfold = KFold(n_splits=num_folds, shuffle=True)
fold_no = 1

acc_per_fold = []
loss_per_fold = []
acc2_per_fold=[]
acc3_per_fold=[]

k_fold_x=dataX+dataXX
k_fold_y=dataY+dataYY
k_fold_x=np.array(k_fold_x)
k_fold_y=np.array(k_fold_y)

HMM_score=0
HMM_count=0

for train, test in kfold.split(k_fold_x, k_fold_y):

  model=get_model()

  processedX,processedY=trainable(k_fold_x[train],k_fold_y[train])
  processedXX,processedYY=trainable(k_fold_x[test],k_fold_y[test])

  #sample_weights
  classes={}
  s=0
  for e,m in zip(processedY[:,:-1],processedY[:,-1]):
    s+=1
    name=str(np.argmax(e))+' '+str(int(m))
    if name in classes:
      classes[name]+=1
    else:
      classes[name]=1

  for e in classes:
    classes[e]=s / (24 * classes[e])

  sample_weights=[]
  for e,m in zip(processedY[:,:-1],processedY[:,-1]):
    name=str(np.argmax(e))+' '+str(int(m))
    sample_weights.append(classes[name])
  sample_weights=np.array(sample_weights)
  sample_weights.shape



  # start K-fold
  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  

  # Fit data to model
  history = model.fit(processedX, [processedY[:,:-1],processedY[:,-1]],
                      validation_data=(processedXX, [processedYY[:,:-1],processedYY[:,-1]]),
                      verbose=0, 
                      epochs=1000,
                      sample_weight=sample_weights,
                      callbacks=[callback],  
                      batch_size=1024,
                      shuffle=True)

  # Generate generalization metrics 
  # use different score explictly
  scores = model.evaluate(processedXX, [processedYY[:,:-1],processedYY[:,-1]], verbose=0)
  print(f'Score for fold {fold_no}: {model.metrics_names[3]} of {scores[3]*100}%; {model.metrics_names[4]} of {scores[4]*100}%')

  # evaluate key prediction
  a,b=(model.predict(processedXX))
  key_count_all=0
  correct=0
  fifth=0
  parallel=0
  relative=0
  for idx,e in enumerate(processedYY):
      #exact match
      key_count_all+=1
      if np.argmax(processedYY[idx][:-1])==np.argmax(a[idx]) and processedYY[idx][-1]==(1 if b[idx]>=0.5 else 0):
          correct+=1
      else:
          if (np.argmax(processedYY[idx][:-1])+7)%12==np.argmax(a[idx]) and processedYY[idx][-1]==(1 if b[idx]>=0.5 else 0):
                fifth+=1
          elif np.argmax(processedYY[idx][:-1])==np.argmax(a[idx]) and processedYY[idx][-1]!=(1 if b[idx]>=0.5 else 0):
                parallel+=1
          elif processedYY[idx][-1]!=(1 if b[idx]>=0.5 else 0):
                if (1 if b[idx]>=0.5 else 0)==0:
                    if np.argmax(processedYY[idx][:-1])==((np.argmax(a[idx])+3)%12):
                        relative+=1
                elif np.argmax(processedYY[idx][:-1])==((np.argmax(a[idx])-3)%12):
                    relative+=1
  print('Correct:',correct,'Fifth:',fifth,'Relative:',relative,'parallel:',parallel,'Wrong:',key_count_all-correct-fifth-relative-parallel)
  print('Acc:',correct/(key_count_all),'weighted Acc:',(correct+0.5*fifth+0.3*relative+0.2*parallel)/key_count_all)

  #log acc of key_prediction
  acc_per_fold.append(((correct+0.5*fifth+0.3*relative+0.2*parallel)/key_count_all)* 100)
  acc2_per_fold.append(scores[3]* 100)
  acc3_per_fold.append(scores[4]* 100)
  loss_per_fold.append(scores[0])

  # Increase fold number
  fold_no = fold_no + 1
  
  #prepare HMM model
  n,c,_=prepareHMM(train)
  hmm_model=HMM.HMM(len(h_states),2,h_states,["outside chord","inside chord"])
  hmm_model.train_supervisied(n,c)#initialize parameter


  #prepare HMM test data
  testnotes,c,offsets=prepareHMM(test)
  key_name_scores=[]
  for test_id,test_piece in enumerate(offsets):
    o1,o2=model.predict(np.array(k_fold_x[test[test_id]]))
    key_name_score=[]
    #append len at end
    test_piece.append(len(o1))
    trace_id=0
    for i in range(len(test_piece)-1):
        temp=[]
        #add predicted key to bag
        while trace_id<test_piece[i+1]:
            if (o2[trace_id][0]>=0.5)*1==0:
                name=chords[np.argmax(o1[trace_id])].lower()+M[(o2[trace_id][0]>=0.5)*1]
            else:
                name=chords[np.argmax(o1[trace_id])]+M[(o2[trace_id][0]>=0.5)*1]
            temp.append(name)
            trace_id+=1
        if len(temp)==0:
            #follow previous pred
            key_name_score.append(key_name_score[-1])
        else:
            #majority vote from bag
            key_name_score.append(max(set(temp), key = temp.count))
    key_name_scores.append(key_name_score)

  #predict on HMM + evaluation
  for test_i in range(len(testnotes)):
        
        
      prediction=hmm_model.predict(testnotes[test_i],key_name_scores[test_i])
      prediction=[h_states[i] for i in prediction]
      predict_result=[]
      for pKey,p in zip(key_name_scores[test_i],prediction):
        if pKey=='gbMinor':
            pKey='f#Minor'
        elif pKey=='dbMinor':
            pKey='c#Minor'
        elif pKey=='abMinor':
            pKey='g#Minor'
        elif pKey=='d#Minor':
            pKey='ebMinor'
        #print(pKey, p[5:])
        p=p[5:]
        p=p.replace('7','')
        p=p.replace('Dim','DimVII')
        predict_result.append(ChordToNote(pKey, p))
        
        
      ground_result=[]
      for pKey in c[test_i]:
        m,p=pKey.split('_')
        m+='ajor' if 'M' in m else 'inor'
        if m=='Gbminor':
            m='F#minor'
        elif m=='Dbminor':
            m='C#minor'
        elif m=='Abminor':
            m='g#minor'
        elif m=='D#minor':
            m='ebminor'
        p=p.replace('7','')
        p=p.replace('Dim','DimVII')
        ground_result.append(ChordToNote(m, p))
        
        
      correct=0
      all=0
      for a,b in zip(predict_result,ground_result):
        all+=1
        if a[0]==b[0]:
            correct+=1
      print(all_score[test[test_i]],correct/all)
      HMM_score+=correct/all
      HMM_count+=1


  k_fold_x=np.array(k_fold_x)
  k_fold_y=np.array(k_fold_y)


------------------------------------------------------------------------
Training for fold 1 ...


In [None]:
#acc: key+chord prediction
HMM_score/HMM_count

In [None]:
# == key prediction ACC==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Key&Maj Hierarchical Accuracy: {acc_per_fold[i]}% - Key Accuracy: {acc2_per_fold[i]}% - Maj Accuracy: {acc3_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Total Hierarchical Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> KeyAccuracy: {np.mean(acc2_per_fold)} (+- {np.std(acc2_per_fold)})')
print(f'> MajAccuracy: {np.mean(acc3_per_fold)} (+- {np.std(acc3_per_fold)})')
print(f'> Total Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')