In [10]:
from music21 import converter, instrument, note, chord, stream, midi, instrument
from scipy import sparse
import time
import tqdm.auto
import glob
import numpy as np
import pandas as pd

In [11]:
path = ".\data\MIDI_Archive\Pirates of the Caribbean - He's a Pirate.mid"

In [12]:
mf = midi.MidiFile()
mf.open(path)
mf.read()
mf.close()

In [13]:
print(mf)

<music21.midi.MidiFile 4 tracks>


In [14]:
s = midi.translate.midiFileToStream(mf)
s.show('midi')

In [15]:
def streamToData(stream_set):
    stream = stream_set.parts[0]
    total_length = np.int(np.round(stream.flat.highestTime / 0.25)) # in semiquavers
    stream_list = []
    output = np.zeros((total_length,128))
    
    for stream in stream_set:
        for element in stream.flat:
            if isinstance(element, note.Note):
                off = np.int(np.round(element.offset / 0.25))
                length = np.int(np.round(element.quarterLength / 0.25))
                pitch = element.pitch.midi
                output[off:off+length,pitch] = 1
            elif isinstance(element, chord.Chord):
                off = np.int(np.round(element.offset / 0.25))
                length = np.int(np.round(element.quarterLength / 0.25))
                pitches = np.array([np.int(np.round(i.midi)) for i in element.pitches])
                times = np.arange(off,off+length)
                output[off:off+length,pitches] = 1
            
    return sparse.csr_matrix(output)


def DataToStream(data_mat, time_signature = 0.25):
    melody_stream = stream.Stream()
    melody_stream.append(instrument.Piano())
    t,k = data_mat.shape
    old_element = None
    counter = 1
    for i in range(t):
        arr = data_mat[i,:]
        r = np.int(np.sum(arr))
        
        if r == 1:
            #print(np.argmax(arr))
            new_element = note.Note(np.argmax(arr))
        elif r > 1:
            #print(arr)
            arr = arr.todense()
            #print(np.where(arr == 1)[1])
            pitches = np.where(arr == 1)[1]
            all_notes = np.array([note.Note(i) for i in pitches])
            new_element = chord.Chord(all_notes)
        else:
            #print(r)
            new_element = note.Rest()
        
        if new_element == old_element and counter < 6:
            counter += 1
        else:
            if old_element:
                old_element.quarterLength = time_signature*counter
                melody_stream.append(old_element)
            counter = 1
            
        old_element = new_element
            
    return melody_stream

In [16]:
s

<music21.stream.Score 0x2596a307b48>

In [17]:
transformed = streamToData(s)
transformed

<1035x128 sparse matrix of type '<class 'numpy.float64'>'
	with 3618 stored elements in Compressed Sparse Row format>

In [18]:
recreated_s = DataToStream(transformed)

In [19]:
s.show('midi')

In [20]:
recreated_s.show('midi')

In [21]:
s.show('text')

{0.0} <music21.stream.Part 0x2596a792648>
    {0.0} <music21.instrument.Instrument ''>
    {0.0} <music21.instrument.Piano 'Piano'>
    {0.0} <music21.tempo.MetronomeMark Quarter=200.0>
    {0.0} <music21.key.Key of F major>
    {0.0} <music21.meter.TimeSignature 6/8>
    {0.0} <music21.note.Note D>
    {1.0} <music21.note.Note D>
    {1.5} <music21.note.Note D>
    {2.5} <music21.note.Note D>
    {3.0} <music21.note.Note D>
    {4.0} <music21.note.Note D>
    {4.5} <music21.note.Note D>
    {5.0} <music21.note.Note D>
    {5.5} <music21.note.Note D>
    {6.0} <music21.note.Note D>
    {7.0} <music21.note.Note D>
    {7.5} <music21.note.Note D>
    {8.5} <music21.note.Note D>
    {9.0} <music21.note.Note D>
    {10.0} <music21.note.Note D>
    {10.5} <music21.note.Note D>
    {11.0} <music21.note.Note D>
    {11.5} <music21.note.Note D>
    {12.0} <music21.note.Note D>
    {13.0} <music21.note.Note D>
    {13.5} <music21.note.Note D>
    {14.5} <music21.note.Note D>
    {15.0} <music21

In [22]:
recreated_s.show('text')

{0.0} <music21.instrument.Piano 'Piano'>
{0.0} <music21.note.Note D>
{1.5} <music21.note.Note D>
{3.0} <music21.note.Note D>
{4.5} <music21.note.Note D>
{6.0} <music21.note.Note D>
{7.5} <music21.note.Note D>
{9.0} <music21.note.Note D>
{10.5} <music21.note.Note D>
{12.0} <music21.chord.Chord D1 D2 D4>
{13.5} <music21.chord.Chord D1 D2 D4>
{14.75} <music21.note.Note D>
{15.0} <music21.chord.Chord D1 D2 D4 B-4>
{16.25} <music21.note.Note D>
{16.5} <music21.chord.Chord D1 D2 D4 A4>
{17.0} <music21.chord.Chord D1 D2 A3 A4>
{17.5} <music21.chord.Chord D1 D2 C4 A4>
{17.75} <music21.chord.Chord C4 A4>
{18.0} <music21.chord.Chord D2 D3 F3 A3 D4>
{18.75} <music21.chord.Chord F3 A3 D4>
{19.0} <music21.chord.Chord D2 D3 F3 A3 D4>
{20.25} <music21.chord.Chord F3 A3 D4>
{20.5} <music21.chord.Chord C2 C3 A3 C4 E4>
{21.0} <music21.chord.Chord B-1 B-2 B-3 D4 F4>
{21.75} <music21.chord.Chord B-3 D4 F4>
{22.0} <music21.chord.Chord B-1 B-2 B-3 D4 F4>
{23.25} <music21.chord.Chord B-3 D4 F4>
{23.5} <music

In [23]:
total_time = 0
z = 0

folder = 'bach'
filename = folder + '_dataset.npz'
midi_files = glob.glob("./data/Classes/"+folder+"/*.mid") 

training_arrays = []
for f in tqdm.auto.tqdm(midi_files):
    try:
        start = time.perf_counter()
        s = converter.parse(f)
        #print(len(s.parts))
        total_time += time.perf_counter() - start
    except:
        continue
    start = time.perf_counter()
    arr = streamToData(s)
    training_arrays.append(arr)
    #print("Converted:", f, "it took", time.clock() - start)
    total_time += time.perf_counter() - start 
    z+=1
training_dataset = np.array(training_arrays, dtype=object)
print('Writing Melody Training Dataset to file...')
np.savez(filename, train=training_dataset)
print('Total number of converted files: ', z)
print('Total conversion time is:', total_time, 'seconds, which is', total_time/60, 'minutes' )

HBox(children=(FloatProgress(value=0.0, max=461.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  441
Total conversion time is: 1238.0152410000017 seconds, which is 20.633587350000028 minutes


In [25]:
def create_dataset(folder):
    total_time = 0
    z = 0
    filename = folder + '_dataset.npz'
    midi_files = glob.glob("./data/Classes/"+folder+"/*.mid") 

    training_arrays = []
    for f in tqdm.auto.tqdm(midi_files):
        try:
            start = time.perf_counter()
            s = converter.parse(f)
            #print(len(s.parts))
            total_time += time.perf_counter() - start
        except:
            continue
        start = time.perf_counter()
        arr = streamToData(s)
        training_arrays.append(arr)
        #print("Converted:", f, "it took", time.clock() - start)
        total_time += time.perf_counter() - start 
        z+=1
    training_dataset = np.array(training_arrays, dtype=object)
    print('Writing Melody Training Dataset to file...')
    np.savez(filename, train=training_dataset)
    print('Total number of converted files: ', z)
    print('Total conversion time is:', total_time, 'seconds, which is', total_time/60, 'minutes' )

In [24]:
#n_classes = ['bach','backstreetboys','beatles','beethoven','brahms','britneyspears',
#             'chopin','coldplay','debussy','haydn','liszt','mendelssohn',
#            'mozart','nirvana','paganini','queen','rachmaninow','schubert',
#            'schumann','tchaikovsky']

In [26]:
for folder in n_classes:
    create_dataset(folder)

HBox(children=(FloatProgress(value=0.0, max=135.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  126
Total conversion time is: 527.5804745000005 seconds, which is 8.793007908333342 minutes


HBox(children=(FloatProgress(value=0.0, max=966.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  957
Total conversion time is: 2291.0069929000006 seconds, which is 38.183449881666675 minutes


HBox(children=(FloatProgress(value=0.0, max=186.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  146
Total conversion time is: 920.3372887000023 seconds, which is 15.338954811666705 minutes


HBox(children=(FloatProgress(value=0.0, max=88.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  63
Total conversion time is: 358.9276908000011 seconds, which is 5.982128180000018 minutes


HBox(children=(FloatProgress(value=0.0, max=131.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  129
Total conversion time is: 438.3195746000001 seconds, which is 7.305326243333336 minutes


HBox(children=(FloatProgress(value=0.0, max=202.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  112
Total conversion time is: 491.195509899997 seconds, which is 8.186591831666616 minutes


HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  50
Total conversion time is: 152.16094509999857 seconds, which is 2.536015751666643 minutes


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  51
Total conversion time is: 125.66412819999368 seconds, which is 2.0944021366665613 minutes


HBox(children=(FloatProgress(value=0.0, max=132.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  113
Total conversion time is: 338.2087835999919 seconds, which is 5.636813059999865 minutes


HBox(children=(FloatProgress(value=0.0, max=49.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  31
Total conversion time is: 484.29771029999847 seconds, which is 8.071628504999975 minutes


HBox(children=(FloatProgress(value=0.0, max=98.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  41
Total conversion time is: 169.43090970000776 seconds, which is 2.8238484950001292 minutes


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  224
Total conversion time is: 946.983350699993 seconds, which is 15.783055844999884 minutes


HBox(children=(FloatProgress(value=0.0, max=154.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  125
Total conversion time is: 260.5675103999947 seconds, which is 4.342791839999912 minutes


HBox(children=(FloatProgress(value=0.0, max=114.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  111
Total conversion time is: 117.95317599999726 seconds, which is 1.965886266666621 minutes


HBox(children=(FloatProgress(value=0.0, max=218.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  217
Total conversion time is: 815.2172914999828 seconds, which is 13.586954858333048 minutes


HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  46
Total conversion time is: 696.8841422999903 seconds, which is 11.61473570499984 minutes


HBox(children=(FloatProgress(value=0.0, max=106.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  57
Total conversion time is: 539.7546411999956 seconds, which is 8.995910686666594 minutes


HBox(children=(FloatProgress(value=0.0, max=136.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  83
Total conversion time is: 489.98895290000473 seconds, which is 8.166482548333413 minutes


HBox(children=(FloatProgress(value=0.0, max=51.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  30
Total conversion time is: 212.36233190000166 seconds, which is 3.539372198333361 minutes


In [27]:
n_classes = ['liszt']

In [28]:
for folder in n_classes:
    create_dataset(folder)

HBox(children=(FloatProgress(value=0.0, max=58.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  33
Total conversion time is: 151.03786419999597 seconds, which is 2.5172977366665994 minutes


HBox(children=(FloatProgress(value=0.0, max=154.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  125
Total conversion time is: 48.196834699994724 seconds, which is 0.8032805783332454 minutes


HBox(children=(FloatProgress(value=0.0, max=114.0), HTML(value='')))


Writing Melody Training Dataset to file...
Total number of converted files:  111
Total conversion time is: 17.669796699979997 seconds, which is 0.2944966116663333 minutes


HBox(children=(FloatProgress(value=0.0, max=218.0), HTML(value='')))




KeyboardInterrupt: 