In [6]:
import os
import pickle
import sys
from typing import Iterator, Generator

import music21
import numpy as np
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
from tqdm import tqdm
%matplotlib inline

In [7]:
# These are by far the most prevalent keys in the corpus, so we're going to restrict ourselves to
# pieces in these keys
KEYS = ['A', 'C', 'D', 'F', 'G']

In [8]:
# Likewise, most pieces have 4, 5, or 6 voices
VOICES = [4, 5, 6]

In [9]:
PIECES = music21.corpus.getComposer('palestrina')

In [10]:
def load_pieces_generator(pieces: [str]=PIECES) -> Iterator[music21.stream.Score]:
    return (music21.corpus.parse(piece) for piece in pieces)

In [11]:
def n_voices(score: music21.stream.Score) -> int:
    return len(score.getElementsByClass(music21.stream.Part))

In [12]:
def get_key(score: music21.stream.Score) -> str:
    return score.analyze('key').tonic.fullName

In [13]:
def should_include(score: music21.stream.Score, keys: [str]=KEYS, voices: [int]=VOICES) -> bool:
    return get_key(score) in keys and n_voices(score) in voices

In [14]:
def transpose_to_all_keys_gen(score: music21.stream.Score, keys: [str]=KEYS) -> Generator[music21.stream.Score, None, None]:
    for key in keys:
        score_key = score.analyze('key')
        if score_key.tonic.fullName == key:
            yield score
        else:
            interval = music21.interval.Interval(score_key.tonic, music21.pitch.Pitch(key))
            yield score.transpose(interval)

In [15]:
def get_score_shape(score: music21.stream.Score) -> (int, int):
    n_voices: int = len(score.getElementsByClass(music21.stream.Part))
    n_eighth_notes: int = int(score.duration.quarterLength * 2)
    return n_voices, n_eighth_notes

In [16]:
idx_slur = 0
idx_rest = 1
idx_beat = 2
max_beats_per_measure = 16
n_meta_features = 18

def score_to_tensor(score: music21.stream.Score) -> (np.ndarray, np.ndarray):
    n_voices, n_eighths = get_score_shape(score)
    score_tensor = np.zeros((n_voices, n_eighths))
    meta_tensor = np.zeros((n_voices, n_eighths, n_meta_features))
    max_beats_per_measure
    try:
        for i, part in enumerate(score.getElementsByClass(music21.stream.Part)):
            for measure in part.getElementsByClass(music21.stream.Measure):
                # we're going to multiply all durations by two,
                # because eighth note is the shortest in the corpus.
                beats_in_measure = measure.duration.quarterLength * 2
                # Get the offset of the beginning of the measure (from the beginning of the piece)
                measure_offset = int(measure.offset)
                for b in range(int(beats_in_measure)):
                    # Annotate each eighth-note pulse in the metadata track
                    meta_tensor[i][measure_offset * 2 + b][idx_beat + b] = 1
                for note in measure.getElementsByClass(music21.note.Note):
                    offset = int(note.offset + measure_offset) * 2
                    for j in range(int(offset), int(offset + note.duration.quarterLength * 2)):
                        # mark the note with its midi pitch throughout its duration
                        score_tensor[i, j] = float(note.midi)
                        if j > offset:
                            # Add a 'slur' annotation for any held note
                            meta_tensor[i, j, idx_slur] = 1
                for rest in measure.getElementsByClass(music21.note.Rest):
                    # Mark all rests in the metadata track
                    offset = int(rest.offset + measure_offset) * 2
                    for j in range(int(offset), int(offset + rest.duration.quarterLength * 2)):
                        meta_tensor[i, j, idx_rest] = 1
        return score_tensor, meta_tensor  
    except:
        return None
    

In [17]:
scores = Parallel(n_jobs=-1, verbose=5)(delayed(music21.corpus.parse)(piece)
    for piece in PIECES)

[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   21.5s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   52.1s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 874 tasks      | elapsed:  5.2min
[Parallel(n_jobs=-1)]: Done 1144 tasks      | elapsed:  6.9min
[Parallel(n_jobs=-1)]: Done 1318 out of 1318 | elapsed:  7.8min finished


In [18]:
included_scores = [score for score in tqdm(scores) if should_include(score)]

100%|██████████| 1318/1318 [02:00<00:00, 10.90it/s]


In [19]:
len(included_scores)

1186

In [20]:
transposed_scores = [
    transposed
    for score in tqdm(included_scores)
    for transposed in transpose_to_all_keys_gen(score)
]

100%|██████████| 1186/1186 [57:50<00:00,  2.93s/it]


In [21]:
# with open('score_tensors.pkl', 'w+b') as scores_file:
# #     pickle.dump(score_tensors, scores_file)
#     scores_file.write(b'foo')

In [22]:
score_tensors = [
    score_to_tensor(score)
    for score in tqdm(transposed_scores)
]

100%|██████████| 5930/5930 [04:05<00:00, 24.19it/s]


In [23]:
for i, score in tqdm(enumerate(score_tensors)):
    tensor, meta = score
    np.save('./data/{}.npy'.format(i), tensor)
    np.save('./data/{}_meta.npy'.format(i), meta)

5930it [00:32, 179.89it/s]


In [24]:
# save a list of the names of the scores
score_names = [list(score)[0].title + " - " + list(score)[0].parentTitle for score in tqdm(transposed_scores)]


100%|██████████| 5930/5930 [00:01<00:00, 5108.16it/s]


In [25]:
np.save('./data/score_names.npy', score_names)

In [26]:
score, meta = score_tensors[0]

In [27]:
max_pitch = np.max([np.max(t) for t, _ in score_tensors])

In [28]:
max_pitch

88.0

In [29]:
min_pitch = np.min([np.min(t[t > 0]) for t, _ in score_tensors])

In [30]:
min_pitch

31.0

In [31]:
n_notes = int(max_pitch - min_pitch)

In [32]:
score_sequence, meta_sequence = make_input_sequence_for_voice(padded_score, padded_meta, 0)

NameError: name 'make_input_sequence_for_voice' is not defined

In [None]:
np.array([score_sequence]).shape

In [None]:
score_target, meta_target = make_targets_for_voice(score, meta, 0)

In [None]:
np.array([make_input_sequence_for_voice()]).shape

Model attempt # 1: do not incorporate metadata

In [None]:
y = np.array([
    make_targets_for_voice(score, meta, voice)[0]
    for score, meta in tqdm(score_tensors)
    for voice in range(score.shape[0])
])

In [2]:
import keras


ModuleNotFoundError: No module named 'keras'