In [27]:
import music21
import pandas as pd
import os
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

In [28]:
df = pd.read_csv('dataset.csv')

In [29]:
chunks = list(sorted(set(df.file.to_list())))

In [30]:
SOPRANO_MIN = 57
SOPRANO_MAX = 81

ALTO_MIN = 52
ALTO_MAX = 74

TENOR_MIN = 48
TENOR_MAX = 69

BASS_MIN = 36
BASS_MAX = 64

ranges = {
    'soprano': {midinumber: (midinumber - SOPRANO_MIN + 1) for midinumber in range(SOPRANO_MIN, SOPRANO_MAX + 1)},
    'alto': {midinumber: (midinumber - ALTO_MIN + 1) for midinumber in range(ALTO_MIN, ALTO_MAX + 1)},
    'tenor': {midinumber: (midinumber - TENOR_MIN + 1) for midinumber in range(TENOR_MIN, TENOR_MAX + 1)},
    'bass': {midinumber: (midinumber - BASS_MIN + 1) for midinumber in range(BASS_MIN, BASS_MAX + 1)},
}

for part, notes in ranges.items():
    print(f'the {part} has a range of {len(notes)} notes (plus the "hold" symbol)')

the soprano has a range of 25 notes (plus the "hold" symbol)
the alto has a range of 23 notes (plus the "hold" symbol)
the tenor has a range of 22 notes (plus the "hold" symbol)
the bass has a range of 29 notes (plus the "hold" symbol)


In [31]:
def encode_note(n, rang):
    if n == '--' or n == 'Rest':
        ret = 0
    else:
        note = music21.note.Note(n)
        ret = ranges[rang][note.pitch.midi]
    return ret

def one_hot_encode(idx, rang):
    length = len(ranges[rang].values())
    ret = [0] * (length + 1)
    ret[idx] = 1
    return ret

In [32]:
x = []
y = []
for chunk in chunks:
    print(f'Procesing {chunk}...')
    dfchunk = df[df.file == chunk]    
    s = dfchunk.soprano.apply(encode_note, args=('soprano',)) 
    xi = np.array([[one_hot_encode(idx, 'soprano') for idx in s]])    
    xi = xi.reshape(-1)    
    a = dfchunk.alto.apply(encode_note, args=('alto',))
    t = dfchunk.tenor.apply(encode_note, args=('tenor',))
    b = dfchunk.bass.apply(encode_note, args=('bass',))    
    ya = np.array([one_hot_encode(idx, 'alto') for idx in a])
    yt = np.array([one_hot_encode(idx, 'tenor') for idx in t])
    yb = np.array([one_hot_encode(idx, 'bass') for idx in b])  
    yi = np.concatenate((ya, yt, yb), axis=None)        
    x.append(xi)
    y.append(yi)  

Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_0...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_1...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_10...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_11...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_12...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_2...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_3...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_4...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_5...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_6...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_7...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_8...
Procesing 113. Ihr Gestirn', ihr hohen L\u3fte_chunk_9...
Procesing 25. Christ lag in Todesbanden_chunk_0...
Procesing 25. Christ lag in Todesbanden_chunk_1...
Procesing 25. Christ lag in Todesbanden_chunk_2...
Procesing 25. Christ lag in Todesbanden_chunk_3...
Procesing 25. Christ lag in Todesbanden

In [33]:
x = np.array(x)
y = np.array(y)

In [34]:
np.save('input.npy', x)
np.save('output.npy', y)