# skyline (helper functions)

In [1]:
def mergeIntervals(arr):
        # Sorting based on the increasing order 
        # of the start intervals
        arr.sort(key = lambda x: x[0]) 
        # array to hold the merged intervals
        m = []
        s = -10000
        max = -100000
        for i in range(len(arr)):
            a = arr[i]
            if a[0] > max:
                if i != 0:
                    m.append([s,max])
                max = a[1]
                s = a[0]
            else:
                if a[1] >= max:
                    max = a[1]        
        #'max' value gives the last point of 
        # that particular interval
        # 's' gives the starting point of that interval
        # 'm' array contains the list of all merged intervals
        if max != -100000 and [s, max] not in m:
            m.append([s, max])
        return m

def gettop(note,intervals):
    note_interval = [note[4],note[5]]#onset,offset
    overlap_time = 0
    total_time = note[5] - note[4]
    if total_time == 0:
        return 1 #(we do not need this note)
    for interval in intervals:
        maxstart = max(note_interval[0],interval[0])
        minend = min(note_interval[1],interval[1])
        if maxstart < minend:
            overlap_time += minend-maxstart
    return overlap_time/total_time

def skyline(notes): #revised skyline algorithm by Chai, 2000
    #Performed on a single channel
    accepted_notes = []
    notes = sorted(notes, key=lambda x: x[2], reverse=True) #sort by pitch
    intervals = []
    for note in notes:
        if gettop(note,intervals) <=0.5:
            accepted_notes.append(note)
            intervals.append([note[4],note[5]]) #onset,offset
            intervals = mergeIntervals(intervals)
    return sorted(accepted_notes,key=lambda x: (x[4],x[0])) #sort by onset & bar(new)
    
def skyline_reverse(notes): #revised skyline algorithm by Chai, 2000
    #Performed on a single channel
    accepted_notes = []
    notes = sorted(notes, key=lambda x: x[2]) #sort by pitch
    intervals = []
    for note in notes:
        if gettop(note,intervals) <=0.8:
            accepted_notes.append(note)
            intervals.append([note[4],note[5]]) #onset,offset
            intervals = mergeIntervals(intervals)
    return sorted(accepted_notes,key=lambda x: (x[4],x[0])) #sort by onset & bar(new)

def align_token(notes,length):
    out=[]
    bar=[]
    bar_count=0
    seen_first=False
    tpb=480
    note_idx=0
    while note_idx<len(notes):
        note=notes[note_idx]
        if bar_count*4*tpb<=note[4]<(bar_count+1)*tpb*4: #within current bar
            bar.append(note[:4])
            if (seen_first==True and note[0]==0):
                print(note,note_idx,notes)
            assert(not(seen_first==True and note[0]==0)) #no two 0(newbar) within the same bar
            if not seen_first :
                seen_first=True
                bar[-1][0]=0
            note_idx+=1
        else:            
            #assert(len(bar)>0)
            if len(bar)>0:
                out.append(bar)
            else:
                out.append([list(ABS)])
            bar=[]
            bar_count+=1
            seen_first=False
    
    #assert(len(bar)>0)
    if len(bar)>0:
        out.append(bar)
    else:
        out.append([list(ABS)])

    bar=[]
    bar_count+=1
    seen_first=False
    

    assert(bar_count==length)
    return out

# tokenlize data
## tokenlize all .mid files inside a folder
### source data: PianoMidi_nicely_formatted
<code> cd ~/prepare_data/CP</code>

<code> python main.py --task skyline --input_dir ../../skyline_data --output_dir ../../sktline_data --name skylineNPY --dict ../../dict/CP_skyline.pkl</code>

In [2]:
import numpy as np

In [3]:
tokens=np.load('./skyline_data/skylineNPY.npy')

In [4]:
tokens.shape

(1683, 512, 4)

In [5]:
#ref to ./dict/CP_skyline.pkl
PAD=np.array([2,16,86,64]) # --> padding
EOS=np.array([4,18,88,66]) # --> End of input segment
ABS=np.array([5,19,89,67]) #--> empty bar by skyline algo, (e.g. skyline pick a long note from the bar ahead)

In [6]:
tokens_by_song=[]
last_idx=0
for idx,page in enumerate(tokens):
    if (page[-1] == PAD).all() or (page[-1] == EOS).all():
        tokens_by_song.append(tokens[last_idx:idx+1])
        last_idx=idx+1

In [7]:
skyline_max_len=100
full_max_len=600
temp_skyline=[]
allsong_skyline_tokens=[]
allsong_full_tokens=[]
max_token_len=0
for song in tokens_by_song:
    current_bar=-1
    tpb=480
    token_with_on_off_set=[]
    skyline_tokens=[]
    full_tokens=[]
    for page in song:
        for token in page:
            if not((token==PAD).all() or (token==EOS).all()):
                if token[0]==0:
                    current_bar+=1
                temp=list(token)
                temp.append(int(current_bar*4*tpb+token[1]*tpb/4))  #onset
                temp.append(int(current_bar*4*tpb+token[1]*tpb/4+(token[3]+1)*tpb/8))  #offset
                token_with_on_off_set.append(temp)
    
    #skyline
    total_bar=current_bar+1
    org=align_token(token_with_on_off_set,total_bar)
    sl=skyline(token_with_on_off_set)+skyline_reverse(token_with_on_off_set)
    #remove duplication
    sl = [tuple(x) for x in sl]
    sl = list(dict.fromkeys(sl))
    sl = [list(x) for x in sl]
    sl=sorted(sl,key=lambda x: (x[4],x[0])) #sort by onset & bar(new)
    sl=align_token(sl,total_bar)
    
    #output
    current_bar=0
    temp_skyline=[]
    temp_full=[]
    while current_bar<total_bar:
        while current_bar<total_bar and len(temp_skyline)+len(sl[current_bar])<skyline_max_len:
            temp_skyline+=sl[current_bar]
            temp_full+=org[current_bar]
            current_bar+=1
        assert(0<len(temp_skyline)<skyline_max_len and 0<len(temp_full)<full_max_len ) # at least it shld hv the ABS token
        #add EOS
        temp_skyline.append(EOS)
        temp_full.append(EOS)
        temp_skyline=np.array(temp_skyline).reshape(-1,4)
        temp_full=np.array(temp_full).reshape(-1,4)
        #pad
        while len(temp_skyline)<skyline_max_len:            
            temp_skyline=np.vstack((temp_skyline,PAD))
        if len(temp_full)>max_token_len:
            max_token_len=len(temp_full)
        while len(temp_full)<full_max_len:
            temp_full=np.vstack((temp_full,PAD))
        skyline_tokens.append(temp_skyline)
        full_tokens.append(temp_full)
        temp_skyline=[]
        temp_full=[]
        
    assert(len(allsong_skyline_tokens)==len(allsong_full_tokens))
    for batch in skyline_tokens:
        allsong_skyline_tokens.append(batch)
    for batch in full_tokens:
        allsong_full_tokens.append(batch)
allsong_skyline_tokens=np.array(allsong_skyline_tokens)
allsong_full_tokens=np.array(allsong_full_tokens)        
assert(allsong_skyline_tokens.shape[0]==allsong_full_tokens.shape[0])

In [11]:
max_token_len

504

In [12]:
allsong_skyline_tokens.shape,allsong_full_tokens.shape

((5793, 100, 4), (5793, 600, 4))

In [14]:
with open('skyline_data/skyline_tokens.npy', 'wb') as f1:
    np.save(f1, allsong_skyline_tokens)
with open('skyline_data/full_tokens.npy', 'wb') as f2:
    np.save(f2, allsong_full_tokens)