In [1]:
pip install music21 --user

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import music21 as m21
import json
import numpy as np
import setuptools.dist
import tensorflow.keras as keras
from fractions import Fraction

In [2]:
DATASET_PATH = ".\\MusicXml"
TRANSPOSE_PATH = ".\\Transpose"
PREPROCESS_PATH = ".\\Preprocess"
MINIMUM_DURATION = 1/12
SEQUENCE_LENGTH = 48

In [3]:
def load_songs(dataset_path):
    songs = []
    for filename in os.listdir(dataset_path):
        filepath = os.path.join(dataset_path, filename)
        if os.path.isfile(filepath) and filename.endswith(".xml"):
            song = m21.converter.parse(filepath)
            songs.append(song)

    return songs

In [4]:
songs = load_songs(DATASET_PATH)
print(songs[0].metadata.title)
# minimum_duration = get_minimum_duration(songs)

Another Hairdo


In [5]:
ACCEPTABLE_DURATIONS = [
    Fraction(4),
    Fraction(3),
    Fraction(2),
    Fraction(3, 2),
    Fraction(1),
    Fraction(3, 4),
    Fraction(2, 3),
    Fraction(1, 2),
    Fraction(1, 3),
    Fraction(1, 4),
    Fraction(1, 6)
]

In [6]:
def filter_songs_by_duration(song):
    for note in song.flatten().notesAndRests:
        duration = note.duration.quarterLength
        if Fraction(duration).limit_denominator() not in ACCEPTABLE_DURATIONS and note.duration.quarterLength != 0:
            print(note.duration.quarterLength)
            return False
    return True

In [7]:
filtered_songs = []

for song in songs:
    if filter_songs_by_duration(song):
        filtered_songs.append(song) 

print(len(filtered_songs))

4/5
0.125
0.125
0.125
1/12
0.125
0.125
1/5
1/5
0.125
1/12
0.125
1/5
1/5
0.125
0.125
0.125
0.125
1/5
0.125
0.125
0.125
28


In [8]:
def encode_solo(song, minimum_duration):
    encoded_solo = []
    # symbol = None
    # total_duration = 0
    
    for element in song.flatten().notesAndRests:
        if isinstance(element, m21.note.Note): symbol = element.pitch.midi
        elif isinstance(element, m21.note.Rest): symbol = "r"
        # print(symbol, end = " ")
        # print(element.duration.quarterLength)
        duration = element.duration.quarterLength / minimum_duration
        # total_duration += duration
        # print(total_duration, end = " ")
        for i in range(int(duration)):
            if i == 0: encoded_solo.append(symbol)
            else: encoded_solo.append("_")
        # print(len(encoded_solo))

    encoded_solo = " ".join(map(str, encoded_solo))
    return encoded_solo

In [9]:
def encode_chords(song, minimum_duration):
    encoded_chords = []
    current_chord = None
    current_duration = 0

    for element in song.flatten():
        if isinstance(element, m21.harmony.ChordSymbol):
            if current_chord: 
                for i in range(int(current_duration)):
                    if i == 0: encoded_chords.append(current_chord.figure)
                    else: encoded_chords.append("_")
            current_chord = element
            current_duration = 0
        elif isinstance(element, m21.note.Note) or isinstance(element, m21.note.Rest):
            current_duration += element.duration.quarterLength / minimum_duration

    if current_chord:
        for i in range(int(current_duration)):
            if i == 0: encoded_chords.append(current_chord.figure)
            else: encoded_chords.append("_")
    encoded_chords = " ".join(map(str, encoded_chords))
    return encoded_chords

In [10]:
encoded = encode_solo(filtered_songs[0], MINIMUM_DURATION)
print(len(encoded))

6258


In [122]:
def get_chord_set(songs):
    list = []
    for song in songs:
        for element in song.flatten():
            if isinstance(element, m21.harmony.ChordSymbol):
                list.append(element.figure)
    return sorted(set(list))

In [123]:
print(get_chord_set(filtered_songs))
# print(len(get_chord_set(all_songs_in_C)))
print(len(get_chord_set(filtered_songs)))

['A', 'A-', 'A-7', 'A-dim', 'A7', 'Am', 'Aø7', 'B', 'B-', 'B-7', 'B-m', 'B7', 'Bdim', 'Bm', 'C', 'C#m', 'C-m', 'C6', 'C7', 'Cm', 'Cø7', 'D-', 'D-7', 'D-m', 'D7', 'Dm', 'Dø7', 'E-', 'E-7', 'E-dim', 'E-m', 'E7', 'Edim', 'Em', 'Eø7', 'F', 'F#7', 'F#ø7', 'F7', 'Fdim', 'Fm', 'G', 'G-7', 'G-dim', 'G7', 'Gm', 'Gø7']
47


In [11]:
def create_single_string(songs, sequence_length):
    new_song_delimiter = "/ " * sequence_length
    combined_solos = ""
    combined_chords = ""

    for song in songs:
        solo = encode_solo(song, MINIMUM_DURATION)
        chords = encode_chords(song, MINIMUM_DURATION)
        combined_solos += solo + " " + new_song_delimiter
        combined_chords += chords + " " + new_song_delimiter

    return combined_solos[:-1], combined_chords[:-1]

In [129]:
def create_single_list(songs, sequence_length):
    new_song_delimiter = ["/" for _ in range(sequence_length)]
    combined_solos = []
    combined_chords = []

    for song in songs:
        solo = encode_solo(song, MINIMUM_DURATION)
        chords = encode_chords(song, MINIMUM_DURATION)
        combined_solos.extend(solo)
        combined_solos.extend(new_song_delimiter)
        combined_chords.extend(chords)
        combined_chords.extend(new_song_delimiter)
    
    return combined_solos, combined_chords

In [12]:
solos, chords = create_single_string(filtered_songs, SEQUENCE_LENGTH)
with open(".\\solos.txt", "w") as fp:
    fp.write(solos)

with open(".\\chords.txt", "w") as fp:
    fp.write(chords)

In [103]:
solos_from_list, chords_from_list = create_single_list(filtered_songs, SEQUENCE_LENGTH)
with open(".\\solos.txt", "w") as fp:
    fp.write(" ".join(map(str, solos_from_list)))

with open(".\\chords.txt", "w") as fp:
    fp.write(" ".join(map(str, chords_from_list)))

In [13]:
print(len(solos))
print(len(chords))

217043
207172


In [17]:
def create_mapping(song_string):
    mappings = {}

    chars = song_string.split(" ")
    vocabulary = list(set(chars))

    vocabulary = list(set(song_string))

    for idx, symbol in enumerate(vocabulary):
        mappings[symbol] = idx

    return mappings, len(vocabulary)

In [18]:
lookup_table, VOCABULARY_SIZE = create_mapping(solos + chords)
with open(".\\lookup_table.json", "w") as fp:
    json.dump(lookup_table, fp, indent=4)

In [16]:
def convert_to_int(solos, chords, lookup_table):
    int_solos = []
    int_chords = []

    solo_chars = solos.split(" ")
    for symbol in solo_chars:
        int_solos.append(lookup_table[symbol])

    chords_chars = chords.split(" ")
    for symbol in chords_chars:
        int_chords.append(lookup_table[symbol])

    return int_solos, int_chords

In [140]:
int_solos, int_chords = convert_to_int(solos, chords, lookup_table)

KeyError: '58'

In [111]:
def generate_training_sequences(int_solos, int_chords, sequence_length):
    inputs =  []
    targets = [] 

    num_sequences = len(int_solos) - sequence_length
    for i in range(num_sequences):
        input_sequence = int_chords[i : i + sequence_length + 1] + int_solos[i : i + sequence_length]
        target = int_solos[i + sequence_length]
        inputs.append(input_sequence)
        targets.append(target)

    # vocabulary_size = len(set(int_solos))
    # inputs = keras.preprocessing.sequence.pad_sequences(inputs, maxlen = sequence_length + 1, padding = "pre")
    inputs = keras.utils.to_categorical(inputs, num_classes = VOCABULARY_SIZE)
    targets = np.array(targets)

    return inputs, targets

In [112]:
inputs, targets = generate_training_sequences(int_solos_from_list, int_chords_from_list, SEQUENCE_LENGTH)

MemoryError: Unable to allocate 6.04 GiB for an array with shape (9889344, 82) and data type float64

In [14]:
def chord_frequency(songs):
    freq_dict  = {}
    for song in songs:
        for element in song.flatten():
            if isinstance(element, m21.harmony.ChordSymbol):
                chord = element.figure
                if chord in freq_dict.keys(): freq_dict[chord] += 1
                else: freq_dict[chord] = 1
    return freq_dict


In [20]:
def key_from_chords(song):
    chord_symbols_stream = m21.stream.Stream()
    for element in song.flatten():
        if isinstance(element, m21.harmony.ChordSymbol):
            chord_symbols_stream.append(element)
    return chord_symbols_stream.analyze("key")


In [22]:
print(key_from_chords(songs[0]))
print(songs[0].analyze("key"))

b minor
B- major


In [None]:
def 