In [17]:
import os
import pretty_midi
import numpy as np
import torch

def extract_melody_and_chords(pm):
    melody = []
    chords = []
    for instrument in pm.instruments:
        # Sort the notes by their start time
        instrument.notes.sort(key=lambda note: note.start)
        for i in range(len(instrument.notes)):
            if i == 0 or instrument.notes[i].start > instrument.notes[i - 1].end:
                # This note starts a new chord
                melody.append(instrument.notes[i].pitch)
                if i > 0:
                    # Append the previous chord to the chords list
                    chords.append([note.pitch for note in instrument.notes[i - 1::-1] if note.end > instrument.notes[i - 1].start])

                else:
                    # This note is part of the current chord
                    instrument.notes[i - 1].end = max(instrument.notes[i - 1].end, instrument.notes[i].end)
            # Append the last chord to the chords list
            chords.append([note.pitch for note in instrument.notes[::-1] if note.end > instrument.notes[-1].start])
        return melody, chords


def transpose_to_c_major(pm):
    key = pm.key_signature_changes
    if len(key) > 0:
        key = key[0].key_number
    else:
        key = 0
    semitones = -key
    for instrument in pm.instruments:
        for note in instrument.notes:
            note.pitch += semitones
    return pm

def encode_melody_and_chords(melody, chords):
    # Flatten the chords list and get unique notes
    unique_notes = list(set(melody + [note for chord in chords for note in chord]))
    note_to_int = {note: i for i, note in enumerate(unique_notes)}
    
    # Encode the melody
    melody_encoded = [note_to_int[note] for note in melody]
    
    # Encode the chords
    chords_encoded = [[note_to_int[note] for note in chord] for chord in chords]
    
    return melody_encoded, chords_encoded

In [5]:
# Directory where the Lakh MIDI dataset is stored
midi_dir = 'C:\\Users\\samdd\\Desktop\\ChordGen\\ChordGen\\lakhData\\archive'

badcnt = 0
goodcnt = 0
goodfiles = []

#Good / Bad MIDI file filtering
# Iterate over all MIDI files in the directory
for foldername in os.listdir(midi_dir):
    # print(midi_dir + '\\' + foldername, "1")
    for filename in os.listdir(midi_dir + '\\' + foldername):
        # print(filename, "2")
        if filename.endswith('.mid'):
            # Load the MIDI file
            try:
                pm = pretty_midi.PrettyMIDI(os.path.join(midi_dir + '\\' + foldername, filename))
                goodcnt += 1
                # print("good", goodcnt)
                goodfiles.append(pm)
            except:
                # print("bad", badcnt)
                badcnt += 1

print(goodfiles)
#Save the list so script doesnt have to be rerun with Pickle
import pickle

with open('goodfiles.pkl','wb') as f:
    pickle.dump(goodfiles, f)



bad 0
bad 1
bad 2
bad 3
bad 4
bad 5
bad 6
bad 7
bad 8
bad 9
bad 10
bad 11
bad 12
bad 13
bad 14
bad 15
bad 16
bad 17
bad 18
bad 19
bad 20
bad 21
bad 22
bad 23
bad 24
bad 25
bad 26
bad 27
bad 28
bad 29
bad 30
bad 31
bad 32
bad 33
bad 34
bad 35
bad 36
bad 37
bad 38
bad 39
bad 40
bad 41
bad 42


In [1]:
import pickle

with open('goodfiles.pkl', 'rb') as f:
    goodfiles = pickle.load(f)


print(goodfiles)

FileNotFoundError: [Errno 2] No such file or directory: 'goodfiles.pkl'

In [None]:
for song in goodfiles:
        melody, chords = extract_melody_and_chords(pm)

        # Transpose to C major
        pm = transpose_to_c_major(pm)

        # Encode melody and chords
        melody_encoded, chords_encoded = encode_melody_and_chords(melody, chords)
        print(melody, " _ ",chords)

In [3]:
#Test block for the case of one file
#Example file Am_I_Blue_AB.mid

test = pretty_midi.PrettyMIDI(os.path.join('C:\\Users\\samdd\\Desktop\\ChordGen\\ChordGen\\SingleFileTest', 'Am_I_Blue_AB.mid'))

In [16]:
melody, chords = extract_melody_and_chords(test)

# Transpose to C major
test = transpose_to_c_major(test)

# Encode melody and chords
melody_encoded, chords_encoded = encode_melody_and_chords(melody, chords)
print(len(melody), melody)
print(len(chords),chords)

print("--")

print(len(melody_encoded), melody_encoded)
print(len(chords_encoded), chords_encoded)

573 [81, 79, 72, 63, 58, 72, 69, 60, 50, 53, 57, 60, 36, 70, 52, 50, 52, 58, 67, 36, 72, 65, 55, 72, 77, 62, 53, 67, 65, 41, 50, 57, 72, 74, 72, 60, 50, 53, 57, 65, 36, 74, 74, 52, 50, 62, 58, 60, 36, 72, 60, 55, 54, 55, 53, 52, 65, 58, 41, 41, 29, 51, 64, 75, 76, 81, 88, 62, 74, 80, 64, 86, 92, 52, 62, 40, 74, 40, 60, 74, 81, 42, 70, 79, 72, 74, 72, 60, 50, 53, 57, 60, 36, 74, 74, 58, 50, 52, 58, 60, 36, 72, 60, 74, 65, 59, 42, 62, 66, 60, 60, 60, 62, 57, 72, 84, 89, 98, 84, 89, 62, 88, 50, 81, 81, 61, 62, 53, 74, 65, 55, 68, 58, 57, 36, 60, 50, 72, 84, 89, 98, 60, 93, 74, 72, 84, 88, 98, 88, 50, 81, 81, 59, 74, 59, 67, 36, 68, 51, 52, 64, 54, 60, 56, 57, 86, 60, 62, 86, 74, 64, 59, 62, 59, 64, 71, 62, 54, 56, 56, 80, 40, 64, 72, 76, 42, 70, 48, 72, 36, 89, 98, 89, 72, 86, 88, 98, 88, 74, 60, 81, 74, 71, 74, 56, 58, 60, 60, 41, 60, 60, 57, 60, 60, 64, 60, 60, 83, 84, 84, 84, 81, 64, 41, 80, 81, 81, 81, 77, 81, 79, 77, 74, 53, 79, 77, 53, 77, 83, 84, 81, 65, 57, 80, 81, 81, 81, 77, 41,