In [1]:
import joblib
import pretty_midi
import visual_midi
import pandas as pd
import numpy as np
from os import listdir
from os.path import getsize
from MIDIComposingAI.utils import piano_roll_to_pretty_midi
from scipy.sparse import csr_matrix

# Exploring MIDI file through pretty_midi library

## Visualize MIDI

In [2]:
# Let's take some examples
examples_files = []
path = '../raw_data/pretty_midi'
directory = listdir(path)

for file in directory:
    if getsize(f'{path}/{file}') < 300000:
        examples_files.append(joblib.load(f'{path}/{file}'))
    if len(examples_files) >= 10:
        break

In [3]:
examples_files

[<pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d48129a00>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0df43fd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0de78f70>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0deeb100>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0de6cfd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0dea1fa0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0de8dfd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0de98fa0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0dea4fd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7f0d0de3bfd0>]

In [4]:
pd.DataFrame(examples_files[0].get_piano_roll(fs=1_000)).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
pd.DataFrame(examples_files[0].get_chroma(fs=1_000)).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
144712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0
144713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0
144714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0
144715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0


In [6]:
%%capture --no-display

# Let's take a file example
test_file = examples_files[0]

# Plot a piano roll visulasiation of the file
plotter = visual_midi.Plotter()
plotter.show_notebook(test_file)

Exception: Unknown qpm in: (array([  0.        ,   5.373132  ,   6.310632  ,   7.206154  ,
         8.088506  ,   9.026006  ,   9.935096  ,  10.830618  ,
        11.753693  ,  12.623257  ,  14.414301  ,  15.296653  ,
        16.280259  ,  17.175781  ,  18.12816   ,  18.917633  ,
        19.840708  ,  20.778208  ,  21.687298  ,  22.639677  ,
        23.522029  ,  24.391593  ,  25.329093  ,  26.224615  ,
        27.081757  ,  28.019257  ,  28.942332  ,  29.775665  ,
        30.645229  ,  31.628835  ,  32.537925  ,  36.120013  ,
        36.989577  ,  37.885099  ,  38.822599  ,  39.718121  ,
        40.655621  ,  41.525185  ,  42.44826   ,  43.305402  ,
        44.305402  ,  45.972068  ,  46.924447  ,  47.806799  ,
        48.715889  ,  49.611411  ,  50.56379   ,  51.420932  ,
        52.373311  ,  53.282401  ,  54.164753  ,  55.102253  ,
        56.893297  ,  57.861038  ,  59.600166  ,  60.523241  ,
        61.506847  ,  62.459226  ,  63.396726  ,  64.279078  ,
        65.1746    ,  66.056952  ,  66.914094  ,  67.809616  ,
        68.793222  ,  69.662786  ,  70.545138  ,  71.468213  ,
        72.363735  ,  73.301235  ,  74.146305  ,  75.937349  ,
        77.755529  ,  78.651051  ,  79.560141  ,  80.429705  ,
        81.274775  ,  82.212275  ,  83.121365  ,  84.996365  ,
        85.878717  ,  86.735859  ,  88.526903  ,  89.464403  ,
        91.255447  ,  92.125011  ,  93.07739   ,  93.899307  ,
        94.851686  ,  95.774761  ,  97.565805  ,  98.422947  ,
       100.213991  , 101.16637   , 102.061892  , 102.984967  ,
       104.776011  , 105.633153  , 106.528675  , 107.466175  ,
       108.361697  , 109.299197  , 110.222272  , 112.013316  ,
       112.895668  , 113.765232  , 115.556276  , 117.431276  ,
       118.326798  , 119.196362  , 120.133862  , 121.029384  ,
       121.966884  , 124.231034  , 125.126556  , 126.89126   ,
       127.786782  , 128.770388  , 129.62753   , 132.314096  ,
       134.132276  , 135.014628  , 135.91015   , 136.779714  ,
       137.675236  , 138.627615  , 139.523137  , 140.432227  ,
       141.355302  , 141.63850512, 141.79723496, 141.96860576,
       142.06235576, 142.33777918, 142.51019284, 142.65273649,
       142.83130782, 143.01649299, 143.29951174, 143.43172326,
       143.52976243, 143.61904808, 143.72321475, 143.94927845,
       144.07156098, 144.19656095, 144.31283986, 144.59557787,
       144.74801683]), array([67.00002903, 64.        , 67.00002903, 68.00007253, 64.        ,
       66.000066  , 67.00002903, 65.00013542, 69.0000966 , 67.00002903,
       68.00007253, 61.00003457, 67.00002903, 63.00012915, 76.00006587,
       65.00013542, 64.        , 66.000066  , 63.00012915, 68.00007253,
       69.0000966 , 64.        , 67.00002903, 70.00007   , 64.        ,
       65.00013542, 72.0000288 , 69.0000966 , 61.00003457, 66.000066  ,
       67.00002903, 69.0000966 , 67.00002903, 64.        , 67.00002903,
       64.        , 69.0000966 , 65.00013542, 70.00007   , 60.        ,
       72.0000288 , 63.00012915, 68.00007253, 66.000066  , 67.00002903,
       63.00012915, 70.00007   , 63.00012915, 66.000066  , 68.00007253,
       64.        , 67.00002903, 62.00005993, 69.0000966 , 65.00013542,
       61.00003457, 63.00012915, 64.        , 68.00007253, 67.00002903,
       68.00007253, 70.00007   , 67.00002903, 61.00003457, 69.0000966 ,
       68.00007253, 65.00013542, 67.00002903, 64.        , 71.0000355 ,
       67.00002903, 66.000066  , 67.00002903, 66.000066  , 69.0000966 ,
       71.0000355 , 64.        , 66.000066  , 64.        , 68.00007253,
       70.00007   , 67.00002903, 64.        , 67.00002903, 69.0000966 ,
       63.00012915, 73.00007178, 63.00012915, 65.00013542, 67.00002903,
       70.00007   , 67.00002903, 63.00012915, 67.00002903, 65.00013542,
       67.00002903, 70.00007   , 67.00002903, 64.        , 67.00002903,
       64.        , 65.00013542, 67.00002903, 68.00007253, 69.0000966 ,
       67.00002903, 64.        , 67.00002903, 69.0000966 , 64.        ,
       67.00002903, 64.        , 53.00002208, 67.00002903, 68.00007253,
       67.00002903, 61.00003457, 70.00007   , 67.00002903, 66.000066  ,
       68.00007253, 67.00002903, 69.0000966 , 67.00002903, 63.00012915,
       67.00002903, 66.000066  , 65.00013542, 64.        , 63.00012915,
       62.00005993, 60.        , 59.00006687, 58.00004253, 57.0000855 ,
       56.00002987, 54.0000054 , 53.00002208, 52.00000693, 51.0000255 ,
       49.00003185, 48.        , 47.00002742, 46.00002913, 45.00001125,
       43.00005662, 42.0000126 , 41.00001777, 40.        ]))

In [7]:
# Now we want to create a accompaniment and a melody from this file

In [8]:
test_file.get_piano_roll()[:, :10_000].shape

(128, 10000)

In [9]:
def extract_melody(pretty_midi_file, fs=1_000, ratio=0.01, sample_size=10_000, sample_set=0):
    """
    Extract melody from a pretty_midi file.
    
    Args :
        pretty_midi_file : a pretty_midi.Pretty_midi() file
        fs : number of frame per second, use to create the piano roll from the pretty_midi file
        ratio : the ratio above wich we won't accept a note to be extract, it prevents from having jerky melodies
        sample_size : integer, the size of the sample we wan't to extract the melody from.
                      If you want all the piece -> sample_size=piano_roll.shape[1]
        sample_set : integer, it allows you to choose where in the piece you want to extract the melody
                      
    Return : a tuple of pretty_midi.piano_roll variables : (accompaniment, melody)
    """
    piano_roll = pretty_midi_file.get_piano_roll(fs=fs)[:, sample_size*sample_set:sample_size*(sample_set+1)]
    empty_piano_roll = np.zeros(piano_roll.shape)
    nb_instant = 0
    for i in range(sample_size):
        nb_instant += 1
        for j in range(127, 0, -1):
            try:
                if piano_roll[j][i] != 0. and abs(last_played_note - piano_roll[j][i])/nb_instant <= ratio:
                    last_played_note = piano_roll[j][i]
                    empty_piano_roll[j][i] = last_played_note
                    piano_roll[j][i] = 0.
                    nb_instant = 0
                    break
            except:
                if piano_roll[j][i] != 0.:
                    last_played_note = piano_roll[j][i]
                    empty_piano_roll[j][i] = last_played_note
                    piano_roll[j][i] = 0.
                    nb_instant = 0
                    break
    return (piano_roll, empty_piano_roll)

In [10]:
accompaniment, melody = extract_melody(test_file, ratio=0.01)

In [11]:
# We can make those piano_roll pretty_midi files again

melody = piano_roll_to_pretty_midi(melody, fs=1_000)
accompaniment = piano_roll_to_pretty_midi(accompaniment, fs=1_000)

# And store them

joblib.dump(melody, 'melody')
joblib.dump(accompaniment, 'accompaniment')

# And we can hear the result

melody.write('melody.mid')
accompaniment.write('accompaniment.mid')

In [12]:
%%capture --no-display

# Let's see what the melody look like
coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(melody)

In [13]:
%%capture --no-display

# And the accompaniment
coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(accompaniment)

## Explore pretty_midi files attributes
Let's compare initial pretty_midi file with the melody and accompaniment

In [14]:
test_file.get_tempo_changes(), melody.get_tempo_changes(), accompaniment.get_tempo_changes()

((array([  0.        ,   5.373132  ,   6.310632  ,   7.206154  ,
           8.088506  ,   9.026006  ,   9.935096  ,  10.830618  ,
          11.753693  ,  12.623257  ,  14.414301  ,  15.296653  ,
          16.280259  ,  17.175781  ,  18.12816   ,  18.917633  ,
          19.840708  ,  20.778208  ,  21.687298  ,  22.639677  ,
          23.522029  ,  24.391593  ,  25.329093  ,  26.224615  ,
          27.081757  ,  28.019257  ,  28.942332  ,  29.775665  ,
          30.645229  ,  31.628835  ,  32.537925  ,  36.120013  ,
          36.989577  ,  37.885099  ,  38.822599  ,  39.718121  ,
          40.655621  ,  41.525185  ,  42.44826   ,  43.305402  ,
          44.305402  ,  45.972068  ,  46.924447  ,  47.806799  ,
          48.715889  ,  49.611411  ,  50.56379   ,  51.420932  ,
          52.373311  ,  53.282401  ,  54.164753  ,  55.102253  ,
          56.893297  ,  57.861038  ,  59.600166  ,  60.523241  ,
          61.506847  ,  62.459226  ,  63.396726  ,  64.279078  ,
          65.1746    ,  6

In [15]:
test_file.key_signature_changes, melody.key_signature_changes, accompaniment.key_signature_changes

([KeySignature(key_number=0, time=0.0)], [], [])

In [16]:
# For the key signature, we have to investigate further through different files.
for i, file in enumerate(examples_files):
    accompaniment, melody = extract_melody(file)
    accompaniment = piano_roll_to_pretty_midi(accompaniment, fs=1_000)
    melody = piano_roll_to_pretty_midi(melody, fs=1_000)
    print(f'file number {i+1} :\n')
    print(f'Initial file : {file.key_signature_changes}')
    print(f'Melody : {melody.key_signature_changes}')
    print(f'Accompaniment : {accompaniment.key_signature_changes}')
    print('......................................................\n')

file number 1 :

Initial file : [KeySignature(key_number=0, time=0.0)]
Melody : []
Accompaniment : []
......................................................

file number 2 :

Initial file : []
Melody : []
Accompaniment : []
......................................................

file number 3 :

Initial file : [KeySignature(key_number=1, time=0.0)]
Melody : []
Accompaniment : []
......................................................

file number 4 :

Initial file : [KeySignature(key_number=1, time=0.0), KeySignature(key_number=3, time=44.571436), KeySignature(key_number=0, time=102.85716), KeySignature(key_number=3, time=147.428596)]
Melody : []
Accompaniment : []
......................................................

file number 5 :

Initial file : []
Melody : []
Accompaniment : []
......................................................

file number 6 :

Initial file : [KeySignature(key_number=0, time=0.0)]
Melody : []
Accompaniment : []
...............................................

### Further investigation on chroma vectors

In [17]:
chroma_vectors = test_file.get_chroma(fs=1_000)
chroma_vectors.shape

(12, 144717)

In [18]:
melody_chroma_vectors = melody.get_chroma(fs=1_000) # Here we simply take the melody from the last loop
pd.DataFrame(melody_chroma_vectors).T.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0


In [19]:
# Let's compare the chroma_vectors to the piano_roll visualisation

In [20]:
%%capture --no-display

coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(melody)

In [21]:
test_file.get_pitch_class_histogram()

array([0.11359026, 0.00811359, 0.19878296, 0.0040568 , 0.0872211 ,
       0.        , 0.0872211 , 0.23732252, 0.        , 0.11764706,
       0.        , 0.14604462])

# Create a simple dataset for DL

In [22]:
from MIDIComposingAI.create_csv_dataset import create_simple_dataset

In [24]:
def separate_pitch_velocity(target):
    """
    Separate pitch and velocity within the target
    """
    sample_velocities = []
    sample_pitches = []
    for sample in target:
        velocities = []
        pitches = []
        for frame in sample.T:
            frame = list(frame)
            velocity = np.sum(frame)
            velocities.append(velocity)
            pitches.append(frame.index(velocity))
        sample_velocities.append(velocities)
        sample_pitches.append(pitches)
    
    return (sample_pitches, sample_velocities)

def create_data_file(file, name, mode='separate'):
    X, y = create_simple_dataset(file)
    df = pd.DataFrame(columns=['accompaniment', 'melody_pitches', 'melody_velocities'])
    df['accompaniment'] = [csr_matrix(accompaniment) for accompaniment in X]
    pitches, velocities = separate_pitch_velocity(y)
    df['melody_pitches'] = [csr_matrix(pitch) for pitch in pitches]
    df['melody_velocities'] = [csr_matrix(velocity) for velocity in velocities]
    if mode == 'separate':
        joblib.dump(df, f'../raw_data/pandas_dataframes/simple_dataset/{name}')
    if mode == 'concatenate':
        pass
    del([X, y, pitches, velocities, df])

In [25]:
# X, y = create_simple_dataset(examples_files[0])

In [None]:
# /!\ KERNEL RAM ISSUE

# for i, file in enumerate(examples_files):
#     create_data_file(file, f'sample{i}')

In [None]:
# df = joblib.load('../raw_data/pandas_dataframes/simple_dataset/sample0')

In [None]:
# df