In [7]:
from music21 import converter, instrument, note, chord
import numpy as np
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import pickle
from collections import defaultdict

In [8]:
#WITHOUT INSTRUMENT
def read_midi(file_path): 
    notes = [] 
    notes_to_parse = None 

    try:
        midi= converter.parse(file_path)
        s2 = instrument.partitionByInstrument(midi)
        for part in s2.parts:

            #select elements of only piano
            if 'Piano' in str(part): 
                notes_to_parse = part.recurse() 
                #finding whether a particular element is note or a chord
                for element in notes_to_parse:
                    #note
                    if isinstance(element, note.Note):
                        notes.append(str(element.pitch))
                    #chord
                    elif isinstance(element, chord.Chord):
                        notes.append('.'.join(str(n) for n in element.normalOrder))

        return np.array(notes)
    except: 
        return np.array([])


def unique(np_array, array_type, unique_set): 
    unique_set_reverse = {n:idx for (idx, n) in unique_set.items()}
    new_array =[] 
    if array_type == 'x':
        for group in np_array: 
            temp = [] 
            for n in group: 
                temp.append(unique_set_reverse[n])
            new_array.append(temp)
    else: 
        for n in np_array: 
            new_array.append(unique_set_reverse[n])
    new_array = np.array(new_array)
    return new_array

def get_tts(base_dir, pick_name = None, sample_num= 1000, timestep = 10): 
    song_files = [f'{base_dir}/{i}' for i in os.listdir(base_dir)][:sample_num]
    songs_notes = [read_midi(i) for i in tqdm(song_files, desc = 'Reading Midi Files')]
    songs_notes = [i for i in songs_notes if len(i) != 0]
    all_notes = [i for note_ in songs_notes for i in note_]
    note_counter = defaultdict(int)
    for i in all_notes: 
        note_counter[i]+= 1
    
    int_to_note = dict((idx, n) for idx, n in enumerate(all_notes))
    all_notes_dict = {idx: n for idx, n in enumerate(set(all_notes))}
    x = [] 
    y = [] 

    pbar = tqdm(songs_notes, desc = 'Creating Timeseries')
    for idx_song, song in enumerate(pbar):
        for idx in range(timestep,len(song)-(timestep), 1): 
            x.append(song[idx:idx+timestep])
            y.append(song[idx + (timestep)])

    x = np.array(x)
    y = np.array(y)  
    
    new_x = unique(x, unique_set = all_notes_dict, array_type = 'x')
    new_y = unique(y, unique_set = all_notes_dict, array_type ='y')
    
    
    x_train, x_test, y_train, y_test = train_test_split(new_x, new_y, train_size = .90, random_state = 10)
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1],1)
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1],1)
    
    if pick_name:
        pick_tup= (x_train, x_test, y_train, y_test, all_notes_dict)
        pickle.dump(pick_tup, open(f'../Pickles/{pick_name}', 'wb'), protocol = 4)
                    
    return x_train, x_test, y_train, y_test, all_notes_dict

x_train, x_test, y_train, y_test, all_notes_dict = get_tts('../Classical', pick_name= 'Classical2.p')

Reading Midi Files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1001/1001 [06:30<00:00,  2.57it/s]
Creating Timeseries: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 813.25it/s]


In [9]:
print(len(all_notes_dict))
all_notes_dict

513


{0: '9.2.3',
 1: 'B-5',
 2: '6.10.1',
 3: '10',
 4: '2.4.7.10',
 5: 'E-7',
 6: '6.9.0.1',
 7: 'B3',
 8: '11.3.5',
 9: '1.4.7.9.10',
 10: '6.9.0',
 11: '9.1',
 12: '10.0.4',
 13: '7.11',
 14: 'A6',
 15: '8.11.1',
 16: '8.10.0.3',
 17: 'B-1',
 18: 'F#6',
 19: '0.2',
 20: '4.8.9',
 21: '2.5.7.8',
 22: '6.10',
 23: '7.9.11.2.4',
 24: '5.7.10.0',
 25: '1.4.7.8',
 26: '2.4.9',
 27: '7.11.1',
 28: '0.1',
 29: '4.6.8.10.0',
 30: 'B-0',
 31: '1.5',
 32: '8.11.1.3',
 33: '5.9.0.1',
 34: '11.0',
 35: '11',
 36: '8.11.1.4',
 37: '8.0.2',
 38: 'G#1',
 39: '2.4.5.9',
 40: '7.8.0.3',
 41: '0.4.8',
 42: '0.5.6',
 43: 'D4',
 44: '8.9.10',
 45: '10.11.2',
 46: '2.4.7.9',
 47: '10.11.1.2',
 48: '6.7.8.9.10',
 49: '9.11.0',
 50: '5.8.10.1',
 51: '3.9',
 52: '7.9.0',
 53: '2.6.9',
 54: 'E-3',
 55: '6.8',
 56: 'E-2',
 57: '3.5.6',
 58: '10.11',
 59: '0.3.4.7',
 60: '2',
 61: '6.11',
 62: 'D6',
 63: '1',
 64: '2.3.5',
 65: '1.3.8',
 66: '10.0',
 67: '2.3.5.8.11',
 68: 'A3',
 69: '3.6.9',
 70: '4.6.7.10',
 71

In [4]:
assert False

AssertionError: 

In [None]:
#WITH INSTRUMNET
def read_midi(file_path): 
    print(file_path)
    notes = [] 
    notes_to_parse = None 
    instruments = []
    midi= converter.parse(file_path)

    s2 = instrument.partitionByInstrument(midi)
    for part in s2.parts:
        str_part = str(part)
        if str_part == '<music21.stream.Part>': 
            continue 
        else:

            cutoff_len = len('<music21.stream.Part ') 
            instru = str_part[cutoff_len:]
            instru = instru[:instru.find('>')]
            notes_to_parse = part.recurse() 
            for element in notes_to_parse: 
                if isinstance(element, note.Note): 
                    notes.append(str(element.pitch))
                    instruments.append(instru)
                elif isinstance(element, chord.Chord): 
                    notes.append('.'.join(str(n) for n in element.normalOrder))      
                    instruments.append(instru)
    return np.array(list(zip(instruments, notes)))

            
                    


def unique(np_array, array_type, unique_set): 
    unique_set_reverse = {n:idx for (idx, n) in unique_set.items()}
    new_array =[] 
    if array_type == 'x':
        for group in np_array: 
            temp = [] 
            for n in group: 
                n = ' '.join(n)
                temp.append(unique_set_reverse[n])
            new_array.append(temp)
    else: 
        for n in np_array: 
            n = ' '.join(n)
            new_array.append(unique_set_reverse[n])
    new_array = np.array(new_array)
    return new_array


def get_tts(base_dir, pick_name = None, timestep = 10): 
    song_files = [f'{base_dir}/{i}' for i in os.listdir(base_dir)]
    songs_notes = [read_midi(i) for i in tqdm(song_files, desc = 'Reading Midi Files')]
    all_notes = [' '.join(i) for note_ in songs_notes for i in note_]
    note_counter = defaultdict(int)
    for i in all_notes: 
        note_counter[i]+= 1
        
    all_notes_dict = {idx: n for idx, n in enumerate(set(all_notes))}
    
    x = [] 
    y = [] 

    pbar = tqdm(songs_notes, desc = 'Creating Timeseries')
    for idx_song, song in enumerate(pbar):
        for idx in range(timestep,len(song)-(timestep), 1): 
            x.append(song[idx:idx+timestep])
            y.append(song[idx + (timestep)])

    x = np.array(x)
    y = np.array(y)  
    
    new_x = unique(x, unique_set = all_notes_dict, array_type = 'x')
    new_y = unique(y, unique_set = all_notes_dict, array_type ='y')
    
    
    x_train, x_test, y_train, y_test = train_test_split(new_x, new_y, train_size = .90, random_state = 10)
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1],1)
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1],1)
    
    if pick_name:
        pick_tup= (x_train, x_test, y_train, y_test, all_notes_dict)
        pickle.dump(pick_tup, open(f'../Pickles/{pick_name}.p', 'wb'), protocol = 4)
                    
    return x_train, x_test, y_train, y_test, all_notes_dict

x_train, x_test, y_train, y_test, notes_dict = get_tts('../Techno', pick_name = 'Techno')

In [None]:
x_train