In [15]:
pip install music21 --user

Note: you may need to restart the kernel to use updated packages.


In [82]:
import os
import music21 as m21
import json
import numpy as np
import setuptools.dist
import tensorflow.keras as keras

In [62]:
DATASET_PATH = ".\\MusicXml"
TRANSPOSE_PATH = ".\\Transpose"
PREPROCESS_PATH = ".\\Preprocess"
MINIMUM_DURATION = 1/6
SEQUENCE_LENGTH = 48

In [18]:
def load_songs(dataset_path):
    songs = []
    for filename in os.listdir(dataset_path):
        filepath = os.path.join(dataset_path, filename)
        if os.path.isfile(filepath) and filename.endswith(".xml"):
            song = m21.converter.parse(filepath)
            songs.append(song)

    return songs

In [33]:
def filter_songs_by_duration(song, minimum_duration):
    for note in song.flatten().notesAndRests:
        if note.duration.quarterLength < minimum_duration and note.duration.quarterLength != 0:
            print(note.duration.quarterLength)
            return False
    return True

In [6]:
def generate_all_key_transpositions(song):
    transposed_songs = []
    for i in range(0, 12):
        transposed_song = song.transpose(i)
        transposed_songs.append(transposed_song)
    return transposed_songs

In [5]:
def transpose_to_C_or_A(song):
    key = song.analyze("key")
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":       
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))
    # print(interval)
    transposed_song = song.transpose(interval)
    return transposed_song

In [27]:
def get_minimum_duration(songs):
    minimum_duration = 1
    for song in songs:
        for note in song.flat.notesAndRests and note.duration.quarterLength != 0:
            if note.duration.quarterLength != 0 and note.duration.quarterLength < minimum_duration:
                minimum_duration = note.duration.quarterLength
    return minimum_duration

In [46]:
def encode_solo(song, minimum_duration):
    encoded_solo = []
    
    for element in song.flatten().notesAndRests:
        if isinstance(element, m21.note.Note): symbol = element.pitch.midi
        elif isinstance(element, m21.note.Rest): symbol = "r"
        duration = int(element.duration.quarterLength / minimum_duration)
        for i in range(duration):
            if i == 0: encoded_solo.append(symbol)
            else: encoded_solo.append("_")

    encoded_solo = " ".join(map(str, encoded_solo))
    return encoded_solo

In [47]:
def encode_chords(song, minimum_duration):
    encoded_chords = []
    current_chord = None
    current_duration = 0

    for element in song.flatten():
        if isinstance(element, m21.harmony.ChordSymbol):
            if current_chord: 
                for i in range(current_duration):
                    if i == 0: encoded_chords.append(current_chord.figure)
                    else: encoded_chords.append("_")
            current_chord = element
            current_duration = 0
        elif isinstance(element, m21.note.Note) or isinstance(element, m21.note.Rest):
            current_duration += int(element.duration.quarterLength / minimum_duration)


    encoded_chords = " ".join(map(str, encoded_chords))
    return encoded_chords

In [31]:
songs = load_songs(DATASET_PATH)
print(songs[0].metadata.title)
# minimum_duration = get_minimum_duration(songs)

Another Hairdo


In [43]:
filtered_songs = []

for song in songs:
    if filter_songs_by_duration(song, MINIMUM_DURATION):
        filtered_songs.append(song) 

print(len(filtered_songs))

0.125
0.125
0.125
0.125
1/12
0.125
0.125
0.125
1/12
0.125
0.125
0.125
0.125
0.125
0.125
0.125
0.125
0.125
32


In [11]:
all_key_songs = []
for song in songs:
    all_key_songs.extend(generate_all_key_transpositions(song))

In [18]:
def save_transcribed_songs(song, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for idx, song in enumerate(songs):
        for i in range(0, 12):
            transposed_song = song.transpose(i)
            filename = os.path.join(output_dir, f"score_{idx}_transposed_{i}.xml")
            transposed_song.write("musicxml", fp = filename)

In [19]:
save_transcribed_songs(songs, TRANSPOSE_PATH)

In [12]:
print(all_key_songs[11].metadata.title)
print(encode_solo(all_key_songs[11], minimum_duration))
print(encode_chords(all_key_songs[11], minimum_duration))

Another Hairdo
r _ _ _ _ _ 69 _ _ _ _ _ 74 _ 76 _ 74 _ 69 _ _ _ _ _ 72 _ _ _ _ _ 73 _ _ _ _ _ r _ _ _ _ _ 69 _ _ _ _ _ 74 _ 76 _ 74 _ 69 _ _ _ _ _ 72 _ _ _ _ _ 73 _ _ _ _ _ r _ _ _ _ _ 69 _ _ _ _ _ 74 _ 76 _ 74 _ 69 _ _ _ _ _ 72 _ _ _ _ _ 73 _ _ _ _ _ r _ _ _ _ _ 69 _ _ _ _ _ 71 _ _ _ _ _ 67 _ _ _ _ _ 67 _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 84 _ _ 83 _ _ 81 _ _ 79 _ _ 78 _ _ 76 _ _ 74 _ _ 68 _ _ 69 _ _ 71 _ _ 72 _ _ 69 _ _ 71 _ _ _ _ _ _ _ _ _ _ _ 71 _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ 69 _ _ _ _ _ 73 _ _ _ _ _ 76 _ _ _ _ _ 78 _ _ _ _ _ 81 _ _ _ _ _ 83 _ _ _ _ _ 80 _ _ _ _ _ 81 _ _ 83 _ _ 81 _ _ 80 _ _ 78 _ _ _ _ _ 77 _ _ _ _ _ 76 _ _ _ _ _ 74 _ _ _ _ _ 73 _ _ _ _ _ 71 _ _ _ _ _ 70 _ _ _ _ _ 79 _ _ _ _ _ 78 _ 79 _ 78 _ 76 _ _ _ _ _ 74 _ _ _ _ _ 66 _ _ _ _ _ 69 _ _ _ _ _ 73 _ _ _ _ _ 71 _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ 84 _ _ _ _ _ 84 _ _ _ _ _ 81 _ _ _ _ _ _ _ _ _ _ _ 79 _ _ _ _ _ 76

In [11]:
all_songs_in_C = []
for song in songs:
    all_songs_in_C.append(transpose_to_C_or_A(song))

In [12]:
print(all_songs_in_C[11].metadata.title)
print(encode_solo(all_songs_in_C[11], minimum_duration))
print(encode_chords(all_songs_in_C[11], minimum_duration))

Blue Bird
64 _ _ _ _ _ 55 _ _ _ _ _ 57 _ _ _ 55 _ _ _ 57 _ _ _ 57 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 55 _ _ _ _ _ 58 _ _ _ _ _ 57 _ _ _ _ _ 58 _ _ _ _ _ 57 _ _ _ _ _ 57 _ _ _ _ _ 56 _ _ _ _ _ _ _ _ _ _ _ 64 _ _ _ _ _ 67 _ _ _ _ _ _ _ _ _ _ _ 64 _ _ 62 _ _ 59 _ _ 55 _ _ 58 _ _ _ _ _ 66 _ _ _ _ _ 66 _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ 68 _ _ 69 _ _ 72 _ _ 69 _ _ 67 _ _ 65 _ _ 69 _ _ 65 _ _ 64 _ _ 62 _ _ 68 _ _ 67 _ _ r _ _ _ _ _ 64 _ _ _ _ _ 64 _ _ _ _ _ 60 _ _ _ 55 _ _ _ 60 _ _ _ 64 _ _ _ _ _ 64 _ _ _ _ _ 60 _ _ _ 55 _ _ _ 60 _ _ _ 64 _ _ _ _ _ 64 _ _ _ _ _ r _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 64 _ _ _ _ _ 64 _ _ _ _ _ 60 _ _ _ 55 _ _ _ 60 _ _ _ 64 _ _ _ _ _ 67 _ _ _ _ _ 69 _ _ _ _ _ 70 _ _ _ _ _ 70 _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 63 _ _ _ _ _ 63 _ _ _ _ _ 60 _ _ _ 55 _ _ _ 60 _ _ _ 63 _ _ _ _ _ 63 _ _ _ _ _ 60 _ _ _ 55 _ _ _ 60 _ _ _ 63 _ _ _ _ _ 63 _ _ _ _ _ r _ _ _ _ _ _ _ _ _ _ _ r _ _ _ _ 

In [40]:
def get_chord_set(songs):
    list = []
    for song in songs:
        for element in song.flatten():
            if isinstance(element, m21.harmony.ChordSymbol):
                list.append(element.figure)
    return sorted(set(list))

In [61]:
print(get_chord_set(songs))
# print(len(get_chord_set(all_songs_in_C)))
print(len(get_chord_set(songs)))

['A', 'A-', 'A-7', 'A-dim', 'A-m', 'A7', 'Am', 'Aø7', 'B', 'B-', 'B-7', 'B-m', 'B7', 'Bdim', 'Bm', 'C', 'C#m', 'C-', 'C-7', 'C-m', 'C/G', 'C6', 'C7', 'Cm', 'Cø7', 'D-', 'D-7', 'D-m', 'D7', 'Dm', 'Dø7', 'E-', 'E-7', 'E-dim', 'E-m', 'E7', 'Edim', 'Em', 'Eø7', 'F', 'F#7', 'F#ø7', 'F7', 'Fdim', 'Fm', 'G', 'G-7', 'G-dim', 'G7', 'Gm', 'Gø7']
51


In [54]:
def create_single_string(songs, sequence_length):
    new_song_delimiter = "/ " * sequence_length
    combined_solos = ""
    combined_chords = ""

    for song in songs:
        solo = encode_solo(song, MINIMUM_DURATION)
        chords = encode_chords(song, MINIMUM_DURATION)
        combined_solos += solo + " " + new_song_delimiter
        combined_chords += chords + " " + new_song_delimiter

    return combined_solos[:-1], combined_chords[:-1]

In [63]:
solos, chords = create_single_string(filtered_songs, SEQUENCE_LENGTH)
with open(".\\solos.txt", "w") as fp:
    fp.write(solos)

with open(".\\chords.txt", "w") as fp:
    fp.write(chords)


In [64]:
print(len(solos))
print(len(chords))

128962
116178


In [90]:
def create_mapping(song_string):
    mappings = {}

    chars = song_string.split()
    vocabulary = list(set(chars))

    for idx, symbol in enumerate(vocabulary):
        mappings[symbol] = idx

    return mappings, len(vocabulary)

In [91]:
lookup_table, VOCABULARY_SIZE = create_mapping(solos + chords)
with open(".\\lookup_table.json", "w") as fp:
    json.dump(lookup_table, fp, indent=4)

In [67]:
def convert_to_int(solos, chords, lookup_table):
    int_solos = []
    int_chords = []

    solo_chars = solos.split()
    for symbol in solo_chars:
        int_solos.append(lookup_table[symbol])

    chords_chars = chords.split()
    for symbol in chords_chars:
        int_chords.append(lookup_table[symbol])

    return int_solos, int_chords
    

In [92]:
int_solos, int_chords = convert_to_int(solos, chords, lookup_table)

print(int_solos[:100])
print(int_chords[:100])

[3, 15, 15, 52, 15, 15, 63, 48, 63, 52, 15, 15, 76, 15, 15, 47, 15, 15, 3, 15, 15, 52, 15, 15, 63, 48, 63, 52, 15, 15, 76, 15, 15, 47, 15, 15, 3, 15, 15, 52, 15, 15, 63, 48, 63, 52, 15, 15, 76, 15, 15, 47, 15, 15, 3, 15, 15, 52, 15, 15, 60, 15, 15, 27, 15, 15, 27, 15, 15, 15, 15, 15, 3, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 45, 58, 40, 19]
[2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 81, 15, 15, 15]


In [95]:
def generate_training_sequences(int_solos, int_chords, sequence_length):
    inputs =  []
    targets = [] 

    num_sequences = len(int_solos) - sequence_length
    for i in range(num_sequences):
        input_sequence = int_chords[i : i + sequence_length + 1] + int_solos[i : i + sequence_length]
        target = int_solos[i + sequence_length]
        inputs.append(input_sequence)
        targets.append(target)

    # vocabulary_size = len(set(int_solos))
    inputs = keras.preprocessing.sequence.pad_sequences(inputs, maxlen = sequence_length + 1, padding = "pre")
    inputs = keras.utils.to_categorical(inputs, num_classes = VOCABULARY_SIZE)
    targets = np.array(targets)

    return inputs, targets

In [96]:
inputs, targets = generate_training_sequences(int_solos, int_chords, SEQUENCE_LENGTH)

In [14]:
def chord_frequency(songs):
    freq_dict  = {}
    for song in songs:
        for element in song.flatten():
            if isinstance(element, m21.harmony.ChordSymbol):
                chord = element.figure
                if chord in freq_dict.keys(): freq_dict[chord] += 1
                else: freq_dict[chord] = 1
    return freq_dict


In [20]:
def key_from_chords(song):
    chord_symbols_stream = m21.stream.Stream()
    for element in song.flatten():
        if isinstance(element, m21.harmony.ChordSymbol):
            chord_symbols_stream.append(element)
    return chord_symbols_stream.analyze("key")


In [22]:
print(key_from_chords(songs[0]))
print(songs[0].analyze("key"))

b minor
B- major


In [None]:
def 