In [1]:
import joblib
import pretty_midi
import visual_midi
import pandas as pd
import numpy as np
from os import listdir
from os.path import getsize
from MIDIComposingAI.create_dataset import extract_accompaniment_melody
from MIDIComposingAI.utils import piano_roll_to_pretty_midi
from scipy.sparse import csr_matrix

# Exploring MIDI file through pretty_midi library

## Visualize MIDI

In [2]:
# Let's take some examples
examples_files = []
path = '../raw_data/pretty_midi'
directory = listdir(path)

for file in directory:
    if getsize(f'{path}/{file}') < 200000:
        examples_files.append(joblib.load(f'{path}/{file}'))
    if len(examples_files) >= 10:
        break

In [4]:
examples_files[0].get_piano_roll(fs=50).shape

(128, 7235)

In [3]:
examples_files

[<pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4ed1d580>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4ea8efd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4eabffa0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4e9eff70>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4ea3efd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4ea36f70>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4ea01fd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4ea05fd0>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4e9b9f70>,
 <pretty_midi.pretty_midi.PrettyMIDI at 0x7ffa4e9bef70>]

In [4]:
pd.DataFrame(examples_files[0].get_piano_roll(fs=1_000)).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
pd.DataFrame(examples_files[0].get_chroma(fs=1_000)).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
144712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0
144713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0
144714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0
144715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0


In [6]:
%%capture --no-display

# Let's take a file example
test_file = examples_files[3]

# Plot a piano roll visulasiation of the file
plotter = visual_midi.Plotter()
plotter.show_notebook(test_file)

In [7]:
# Now we want to create a accompaniment and a melody from this file

In [8]:
test_file.get_piano_roll()[:, :10_000].shape

(128, 8200)

In [9]:
def extract_melody(pretty_midi_file, fs=1_00, ratio=0.01, sample_size=10_000, sample_set=0):
    """
    Extract melody from a pretty_midi file.
    
    Args :
        pretty_midi_file : a pretty_midi.Pretty_midi() file
        fs : number of frame per second, use to create the piano roll from the pretty_midi file
        ratio : the ratio above wich we won't accept a note to be extract, it prevents from having jerky melodies
        sample_size : integer, the size of the sample we wan't to extract the melody from.
                      If you want all the piece -> sample_size=piano_roll.shape[1]
        sample_set : integer, it allows you to choose where in the piece you want to extract the melody
                      
    Return : a tuple of pretty_midi.piano_roll variables : (accompaniment, melody)
    """
    piano_roll = pretty_midi_file.get_piano_roll(fs=fs)[:, sample_size*sample_set:sample_size*(sample_set+1)]
    empty_piano_roll = np.zeros(piano_roll.shape)
    nb_instant = 0
    for i in range(sample_size):
        nb_instant += 1
        for j in range(127, 0, -1):
            try:
                if piano_roll[j][i] != 0. and abs(last_played_note - piano_roll[j][i])/nb_instant <= ratio:
                    last_played_note = piano_roll[j][i]
                    empty_piano_roll[j][i] = last_played_note
                    piano_roll[j][i] = 0.
                    nb_instant = 0
                    break
            except:
                if piano_roll[j][i] != 0.:
                    last_played_note = piano_roll[j][i]
                    empty_piano_roll[j][i] = last_played_note
                    piano_roll[j][i] = 0.
                    nb_instant = 0
                    break
    return (piano_roll, empty_piano_roll)

In [10]:
accompaniment, melody = extract_accompaniment_melody(test_file, fs=50, sample_length=10, ratio=0.01)

In [11]:
# We can make those piano_roll pretty_midi files again

melody = piano_roll_to_pretty_midi(melody, fs=50)
accompaniment = piano_roll_to_pretty_midi(accompaniment, fs=50)

# And store them

joblib.dump(melody, 'melody')
joblib.dump(accompaniment, 'accompaniment')

# And we can hear the result

melody.write('melody.mid')
accompaniment.write('accompaniment.mid')

In [12]:
%%capture --no-display

# Let's see what the melody look like
coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(melody)

In [13]:
%%capture --no-display

# And the accompaniment
coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(accompaniment)

## Explore pretty_midi files attributes
Let's compare initial pretty_midi file with the melody and accompaniment

In [14]:
test_file.get_tempo_changes(), melody.get_tempo_changes(), accompaniment.get_tempo_changes()

((array([0.]), array([120.])),
 (array([0.]), array([120.])),
 (array([0.]), array([120.])))

In [15]:
test_file.key_signature_changes, melody.key_signature_changes, accompaniment.key_signature_changes

([KeySignature(key_number=0, time=0.0)], [], [])

In [16]:
# For the key signature, we have to investigate further through different files.
for i, file in enumerate(examples_files):
    accompaniment, melody = extract_melody(file)
    accompaniment = piano_roll_to_pretty_midi(accompaniment, fs=50)
    melody = piano_roll_to_pretty_midi(melody, fs=50)
    print(f'file number {i+1} :\n')
    print(f'Initial file : {file.key_signature_changes}')
    print(f'Melody : {melody.key_signature_changes}')
    print(f'Accompaniment : {accompaniment.key_signature_changes}')
    print('......................................................\n')

file number 1 :

Initial file : [KeySignature(key_number=0, time=0.0)]
Melody : []
Accompaniment : []
......................................................



IndexError: index 8400 is out of bounds for axis 0 with size 8400

### Further investigation on chroma vectors

In [17]:
chroma_vectors = test_file.get_chroma(fs=1_000)
chroma_vectors.shape

(12, 82000)

In [18]:
melody_chroma_vectors = melody.get_chroma(fs=1_000) # Here we simply take the melody from the last loop
pd.DataFrame(melody_chroma_vectors).T.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
# Let's compare the chroma_vectors to the piano_roll visualisation

In [20]:
%%capture --no-display

coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(melody)

In [21]:
test_file.get_pitch_class_histogram()

array([0.2244489 , 0.        , 0.1002004 , 0.00200401, 0.04809619,
       0.15430862, 0.04208417, 0.13627255, 0.        , 0.20641283,
       0.01002004, 0.0761523 ])

# Create a simple dataset for DL

In [22]:
from MIDIComposingAI.create_csv_dataset import create_nparray_dataset

In [23]:
def separate_pitch_velocity(target):
    """
    Separate pitch and velocity within the target
    """
    sample_velocities = []
    sample_pitches = []
    for sample in target:
        velocities = []
        pitches = []
        for frame in sample.T:
            frame = list(frame)
            velocity = np.sum(frame)
            velocities.append(velocity)
            pitches.append(frame.index(velocity))
        sample_velocities.append(velocities)
        sample_pitches.append(pitches)
    
    return (sample_pitches, sample_velocities)

def create_data_file(file, name, mode='separate'):
    X, y = create_simple_dataset(file)
    df = pd.DataFrame(columns=['accompaniment', 'melody_pitches', 'melody_velocities'])
    df['accompaniment'] = [csr_matrix(accompaniment) for accompaniment in X]
    pitches, velocities = separate_pitch_velocity(y)
    df['melody_pitches'] = [csr_matrix(pitch) for pitch in pitches]
    df['melody_velocities'] = [csr_matrix(velocity) for velocity in velocities]
    if mode == 'separate':
        joblib.dump(df, f'../raw_data/pandas_dataframes/simple_dataset/{name}')
    if mode == 'concatenate':
        pass
    del([X, y, pitches, velocities, df])

In [24]:
# X, y = create_simple_dataset(examples_files[0])

In [25]:
# /!\ KERNEL RAM ISSUE

for i, file in enumerate(examples_files):
    create_nparray_dataset(file, directory='simple_dataset', name=f'nparray{i}')

In [26]:
# df = joblib.load('../raw_data/pandas_dataframes/simple_dataset/sample0')

In [None]:
# df