In [1]:
import numpy as np
from tqdm import tqdm

## Process input

In [2]:
def melody_txt2arr(txt):
    '''
    transofrm melody data input a numpy array,
    where notes/rests are represeted as non-negative integers
    and each element in the numpy array lasted a 16th note, or a quarter of a beat
    '''
    compacted_array = txt.split("\n")[:-1]
    compacted_array = list(map(lambda x: list(map(int, x.split())), compacted_array))
    melody_array = list(map(lambda x: [x[0]]*x[1], compacted_array))
    melody_array = [note for sublist in melody_array for note in sublist]
    return np.array(melody_array)


with open("POP909-Dai/001/melody.txt") as f:
    input = f.read()
    print(f"\tInput:\n", input[79:96], sep="")
    melody = melody_txt2arr(input)
    print(f"\n\tProceesed input:\n", melody[80:100], sep="")
    print(f"\n\tLength of input:\n", len(melody), sep="")

	Input:
61 1
0 1
66 5
0 1

	Proceesed input:
[68  0 65  0 61  0 66 66 66 66 66  0 61 63 66 68 70  0 66  0]

	Length of input:
1077


In [3]:
def chord_txt2arr(txt):
    '''
    transform chord data into a numpy array,
    where chords are represented as 5 numbers: 1 root note and 4 chord tones (one may be 0/empty)
    and each element in the numpy array lasted a beat
    '''
    compacted_array = txt.split("\n")[:-1]
    compacted_array = list(map(lambda x: x.replace(",","").replace("[","").replace("]",""), compacted_array))
    compacted_array = list(map(lambda x: list(map(int, x.split()[1:])), compacted_array))
    chord_array = []
    for line in compacted_array:
        for _ in range(line[-1]):
            if len(line) == 1:
                chord_array.append([0]*5)
            else:
                chord_array.append([line[-2]] + (line[:-2]+[0]*4)[:4])
    return np.array(chord_array)


with open("POP909-Dai/001/finalized_chord.txt") as f:
    input = f.read()
    print(f"\tInput:\n", input[45:114], sep="")
    chord = chord_txt2arr(input)
    print(f"\n\tProceesed input:\n", chord[80:90], sep="")
    print(f"\n\tLength of input:\n", len(chord), sep="")

	Input:
Bb:min [1, 5, 10] 10 2 
Eb:min [3, 6, 10] 3 2 
B:maj [3, 6, 11] 11 2 

	Proceesed input:
[[11  3  6 11  0]
 [11  3  6  9 11]
 [ 1  1  5  8  0]
 [ 1  1  5  8  0]
 [10  1  5 10  0]
 [10  1  5 10  0]
 [ 3  3  6 10  0]
 [ 3  3  6 10  0]
 [11  3  6 11  0]
 [11  3  6  9 11]]

	Length of input:
284


In [4]:
NUM_DATA = 909
MAX_INPUT_LENGTH = 800

def process_input(index):
    '''
    given the index of the data (between 1 and 909),
    return the combine melody and chord data,
    where each row represents a beat,
    the first 5 values of a row represents the chord
    and the last 4 values of a row represent the melody
    '''
    with open(f"POP909-Dai/{index:03}/melody.txt") as f:
        melody = melody_txt2arr(f.read())
    with open(f"POP909-Dai/{index:03}/finalized_chord.txt") as f:
        chord = chord_txt2arr(f.read())
    melody = np.pad(melody, (0, MAX_INPUT_LENGTH*4-melody.shape[0]), "constant", constant_values=0)
    melody = np.reshape(melody, (MAX_INPUT_LENGTH,4))
    chord = np.pad(chord, ((0,MAX_INPUT_LENGTH-chord.shape[0]),(0,0)), "constant", constant_values=0)
    return np.hstack((chord, melody))

In [5]:
# Process input
data = np.zeros((NUM_DATA, MAX_INPUT_LENGTH, 9))

for i in tqdm(range(NUM_DATA)):
    index = i + 1
    data[i,:,:] = process_input(index)

with open('./data/processed_input_pad.npy', 'wb') as f:
    np.save(f, data)

100%|██████████| 909/909 [00:04<00:00, 188.93it/s]


## Process labels

In [6]:
def process_label(index, human):
    with open(f"POP909-Dai/{index:03}/human_label{human}.txt") as f:
        label = label_txt2arr(f.read())
    label = np.pad(label, (0, MAX_OUTPUT_LENGTH-label.shape[0]), "constant", constant_values='')
    phrase_vector = np.logical_and(label[0:-1] == label[1:], label[0:-1] != '').astype(int)
    phrase_vector = np.pad(phrase_vector, (0,1), "constant", constant_values=0)
    melody_vector = np.array(list(map(lambda x: x.isupper(), label))).astype(int)
    pattern_matrix = np.zeros((MAX_OUTPUT_LENGTH,MAX_OUTPUT_LENGTH))
    for i in range(MAX_OUTPUT_LENGTH):
        for j in range(i, MAX_OUTPUT_LENGTH):
            if label[i] != '' and label[i].lower() == label[j].lower():
                pattern_matrix[i,j] = 1
                pattern_matrix[j,i] = 1
    return phrase_vector, melody_vector, pattern_matrix

In [7]:
# Process human labels
for i in [1, 2]:
    phrase_data = np.zeros((NUM_DATA, MAX_OUTPUT_LENGTH))
    melody_data = np.zeros((NUM_DATA, MAX_OUTPUT_LENGTH))
    pattern_data = np.zeros((NUM_DATA, MAX_OUTPUT_LENGTH, MAX_OUTPUT_LENGTH))

    for i in tqdm(range(NUM_DATA)):
        index = i + 1
        phrase_vector, melody_vector, pattern_matrix = process_label(index, i)
        phrase_data[i,:] = phrase_vector
        melody_data[i,:] = melody_vector
        pattern_data[i,:,:] = pattern_matrix

    with open(f'./data/human{i}_pharse_vector.npy', 'wb') as f:
        np.save(f, phrase_data)
    with open(f'./data/human{i}_melody_vector.npy', 'wb') as f:
        np.save(f, melody_data)
    with open(f'./data/human{i}_pattern_matrix.npy', 'wb') as f:
        np.save(f, pattern_data)

    pattern_data = pattern_data.reshape(NUM_DATA, MAX_OUTPUT_LENGTH*MAX_OUTPUT_LENGTH)
    ground_data = np.hstack((phrase_data, melody_data, pattern_data))
    with open(f'./data/processed_output_pad_human{i}.npy', 'wb') as f:
        np.save(f, ground_data)

100%|██████████| 909/909 [03:24<00:00,  4.44it/s]
100%|██████████| 909/909 [03:08<00:00,  4.82it/s]
