In [1]:
import music21 as m21
import ast
import os

In [None]:
!pip install music21

# Future Scripts and Functions

In [23]:
# midi.py

# helper functions
def gen_md_from_path(mdir, **kwargs):
     
    mdict = {}
    if mdir is None:
        print("No Directory Specified.")
    for filename in os.listdir(mdir):
        if filename.endswith(".mid"):
            mdict[filename[:-4]] = {}
            mdict[filename[:-4]] = process_midi_file(mdir+'/'+filename, **kwargs)
        else:
            continue 
        
    return mdict
            
def null_func(*k):
    return None

def process_midi_file(path, transpose=True, by_measure=True, verbose=True):
    
    vprint = print if verbose else null_func
    # read-in
    mid = m21.converter.parse(path)
    vprint(f"File Read in: {path[7:]}")
    
    # Make measures
    if by_measure:
        mid = m21.stream.makeNotation.makeMeasures(mid)
        vprint(f"Number of Measures: {len(mid)}")
    
    # Transpose
    if transpose:
        key = mid.analyze('key').tonic
        vprint(f"Detected Key: {key}")
        mid = mid.transpose(m21.interval.Interval(key, m21.pitch.Pitch('C')))
    
    return mid

# encoding
class MidiEncoder:
    
    def __init__(self):
 
        # function map
        self.method_map = {
            'pitch_position_strings':self.pitch_position_strings,
            'pitch_position_duration_strings':self.pitch_position_duration_strings
        }
    
    # Core functions, encoding and decoding
    def Encode(self, midi, enc_type='pitch_position_strings'):
        
        if enc_type in self.method_map.keys():
            return self.method_map[enc_type](midi, enc=True)
        else:
            print("Encoding not supported, check spelling or try a different method.")
    
    def pitch_position_strings(self, midi, enc):
        
        encoded_string = []
        for n in midi.notes:
            if type(n) == m21.note.Note:
                encoded_string.append(str(n.pitch) + ':'+ str(n.offset))
            elif type(n) == m21.chord.Chord:
                for i in range(0, len(n)):
                    encoded_string.append(str(n[i].pitch) + ':'+ str(n[i].offset)) 
            else:
                print("No Notes Detected.")
        return encoded_string

        
    def pitch_position_duration_strings(self, midi, enc):
    
        encoded_string = []
        for n in midi.notes:
            if type(n) == m21.note.Note:
                encoded_string.append(str(n.pitch) + ':'+ str(n.offset)+':'+str(n.duration.quarterLength))
            elif type(n) == m21.chord.Chord:
                for i in range(0, len(n)):
                    encoded_string.append(str(n[i].pitch) + ':'+ str(n[i].offset)+':'+str(n[i].duration.quarterLength)) 
            else:
                print("No Notes Detected.")
        return encoded_string

# writing
class MidiWriter:
    
    def __init__(self, enc_mid, enc_type):
        
        # function map
        self.method_map = {
            'pitch_position_strings':self.pitch_position_strings,
            'pitch_position_duration_strings':self.pitch_position_duration_strings
        }

# Encoding and Read-in

In [64]:
# Read in all of the midi files
path = './data'
mdl = gen_md_from_path(path, by_measure=False, verbose=False)
mdm = gen_md_from_path(path, by_measure=True, verbose=False)

In [65]:
# Encode them (as entire piece)
me = MidiEncoder()
mdl_enc = {}
for piece in mdl:
    mdl_enc[piece] = me.Encode(mdl[piece].flat, 'pitch_position_duration_strings')

In [66]:
# Encode them (by measure)
me = MidiEncoder()
mdm_enc = {}
for piece in mdm:
    mdm_enc[piece] = {}
    for i, m in enumerate(mdm[piece]):
        mdm_enc[piece][i] = me.Encode(m, 'pitch_position_duration_strings')

In [67]:
# Vocab Sizes
un = []; count = 0;
for piece in mdl_enc:
    for token in mdl_enc[piece]:
        count += 1
        if token not in un:
            un.append(token)
print(f"{len(un)} / {count}")

12550 / 14921


In [68]:
# Vocab Sizes
un = []; count = 0;
for piece in mdm_enc:
    for meas in mdm_enc[piece]:
        for token in mdm_enc[piece][meas]:
            count += 1
            if token not in un:
                un.append(token)
print(f"{len(un)} / {count}")

3307 / 14921


# Modeling