#### Import Statements

In [None]:
!sudo apt update
!sudo apt install musescore -y
!pip install arvo

Password:

In [3]:
!pwd

/Users/jessicakalip/Desktop/Apps/piano_transcription/src/notebooks


In [1]:
%load_ext autoreload
%autoreload 2
    
from mido import MidiFile, MidiTrack, Message, MetaMessage
from os import listdir
from os.path import isfile, split, join
import argparse
from thepkg.ml_logic.preprocessor import convert_midi_to_wav, spectogram_stft, spectogram_cqt
from midi2audio import FluidSynth
import matplotlib.pyplot as plt
import librosa
import numpy as np
import pandas as pd
import os
import librosa
import string
from music21 import converter, corpus, instrument, midi, note, chord, pitch
from music21 import * # import everything from music21
from arvo import tools, isorhythm, minimalism, tintinnabuli
from sklearn.model_selection import train_test_split




ModuleNotFoundError: No module named 'thepkg'

# Data Preprocessing

## Download 100 MIDI files and make them into 15 second chunks, and saved in data/short_midis

In [None]:
def split_midi_into_15_second_parts(src_midi_path='data/midis/enchanted.mid', dest_midi_base_path='data/short_midis/short_midis'):
    src_midi = MidiFile(src_midi_path)

    # Assuming a default tempo of 500,000 microseconds per beat
    # Adjust this if your MIDI file specifies a different initial tempo
    default_tempo = 500000
    ticks_per_second = src_midi.ticks_per_beat * (default_tempo / 1000000)
    ticks_for_2_seconds = ticks_per_second * 15

    current_segment = 0
    current_ticks_in_segment = 0

    # Initialize the first segment MIDI file
    segment_midi = MidiFile()
    segment_midi.ticks_per_beat = src_midi.ticks_per_beat

    for track in src_midi.tracks:
        new_track = MidiTrack()
        segment_midi.tracks.append(new_track)
        for msg in track:
            # Adjust for tempo changes if the message is a tempo change
            if msg.type == 'set_tempo':
                ticks_per_second = src_midi.ticks_per_beat * (msg.tempo / 1000000)
                ticks_for_2_seconds = ticks_per_second * 15

            # Check if adding the current message would exceed the 2-second limit for this segment
            if current_ticks_in_segment + msg.time > ticks_for_2_seconds:
                # Save the current segment MIDI file
                segment_midi.save(f'{dest_midi_base_path}_part_{current_segment}.mid')

                # Start a new segment MIDI file
                segment_midi = MidiFile()
                segment_midi.ticks_per_beat = src_midi.ticks_per_beat
                new_track = MidiTrack()
                segment_midi.tracks.append(new_track)

                # Reset the tick count for the new segment and increment the segment counter
                current_ticks_in_segment = 0
                current_segment += 1

            # Add the current message to the track and update the tick count
            new_track.append(msg)
            current_ticks_in_segment += msg.time

    # Save the last segment if it has any content
    if current_ticks_in_segment > 0:
        segment_midi.save(f'{dest_midi_base_path}_part_{current_segment}.mid')

In [None]:
## TODO: Iterate over 100 MIDI files and call split_midi_into_15_second_parts()
midis = os.listdir('data/midis')

for midi in midis[0:10]:
    midi_path = 'data/midis/' + midi
    split_midi_into_15_second_parts(midi_path, 'data/short_midis/short_midis')

## X_train: convert short MIDI to wav, and then to STFT array

In [None]:
## TODO: run spectogram_stft() on all short_midis and append to array X
X = spectogram_stft('./data/short_midis/', "./soundfont/FluidR3_GM.sf2")

In [None]:
len(X)

In [None]:
type(X)

In [None]:
X[0].shape

In [None]:
## Attempt to reshape X before inputting to model
reshaped_X = np.array(X).reshape(100, 1025, 500)
reshaped_X.shape

## y_train: convert short MIDI's to a number array

### midi to array function

In [None]:
def msg2dict(msg):
    result = {}
    if 'note_on' in msg:
        on_ = True
    elif 'note_off' in msg:
        on_ = False
    else:
        on_ = None
    result['time'] = int(msg[msg.rfind('time'):].split(' ')[0].split('=')[1].translate(
        str.maketrans({a: None for a in string.punctuation})))

    if on_ is not None:
        for k in ['note', 'velocity']:
            result[k] = int(msg[msg.rfind(k):].split(' ')[0].split('=')[1].translate(
                str.maketrans({a: None for a in string.punctuation})))
    return [result, on_]

def switch_note(last_state, note, velocity, on_=True):
    # piano has 88 notes, corresponding to note id 21 to 108, any note out of this range will be ignored
    result = [0] * 88 if last_state is None else last_state.copy()
    if 21 <= note <= 108:
        result[note-21] = velocity if on_ else 0
    return result

def get_new_state(new_msg, last_state):
    new_msg, on_ = msg2dict(str(new_msg))
    new_state = switch_note(last_state, note=new_msg['note'], velocity=new_msg['velocity'], on_=on_) if on_ is not None else last_state
    return [new_state, new_msg['time']]
def track2seq(track):
    # piano has 88 notes, corresponding to note id 21 to 108, any note out of the id range will be ignored
    result = []
    last_state, last_time = get_new_state(str(track[0]), [0]*88)
    for i in range(1, len(track)):
        new_state, new_time = get_new_state(track[i], last_state)
        if new_time > 0:
            result += [last_state]*new_time
        last_state, last_time = new_state, new_time
    return result

def mid2arry(mid, min_msg_pct=0.1):
    tracks_len = [len(tr) for tr in mid.tracks]
    min_n_msg = max(tracks_len) * min_msg_pct
    # convert each track to nested list
    all_arys = []
    for i in range(len(mid.tracks)):
        if len(mid.tracks[i]) > min_n_msg:
            ary_i = track2seq(mid.tracks[i])
            if len(ary_i)==0:
                pass
            else:
                all_arys.append(ary_i)
                # make all nested list the same length
                max_len = max([len(ary) for ary in all_arys])
                for i in range(len(all_arys)):
                    if len(all_arys[i]) < max_len:
                        all_arys[i] += [[0] * 88] * (max_len - len(all_arys[i]))
                all_arys = np.array(all_arys)
                all_arys = all_arys.max(axis=0)
                # trim: remove consecutive 0s in the beginning and at the end
                sums = all_arys.sum(axis=1)
                ends = np.where(sums > 0)[0]
                return all_arys[min([0] if len(ends) == 0 else ends): max([0] if len(ends) == 0 else ends)]

### Convert short_midis to midi_num_arrays

In [None]:
short_midis = os.listdir('data/short_midis')

midi_arrays = []

for midi in short_midis[0:100]:
    thefile='./data/short_midis/' + midi
    mid = MidiFile(thefile, clip=True)
    myarray=mid2arry(mid)
    midi_arrays.append(myarray)

y = midi_arrays
y

In [None]:
print('len y:', len(y))
y[0].shape

In [None]:
## Notes: X is an stft dataframe (for GRU/LSTM model) & X is a spectogram (for CNN model), y is a numbered array of a MIDI 
## TODO: Ensure that X and y are of the same length
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Modeling (GRU)

In [None]:
# The GRU architecture
model = Sequential()
# First GRU layer with Dropout regularisation
model.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation=‘tanh’))
model.add(Dropout(0.2))
# Second GRU layer
model.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation=‘tanh’))
model.add(Dropout(0.2))
# Third GRU layer
model.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation=‘tanh’))
model.add(Dropout(0.2))
# Fourth GRU layer
model.add(GRU(units=50, activation=‘tanh’))
model.add(Dropout(0.2))
# The output layer
model.add(Dense(units=1))
# Compiling the RNN
model.compile(optimizer=SGD(lr=0.01, decay=1e-7, momentum=0.9, nesterov=False),loss=‘mean_squared_error’)
# Fitting to the training set
model.fit(X_train,y_train,epochs=50,batch_size=150)

# Modeling (LSTM)

# Result: Converting y_pred MIDI to Music Score

In [None]:
## TODO: return a y_pred (number array midi) from our model and convert to MIDI, then convert to music score

In [None]:
def arry2mid(ary, tempo=500000):
    # get the difference
    new_ary = np.concatenate([np.array([[0] * 88]), np.array(ary)], axis=0)
    changes = new_ary[1:] - new_ary[:-1]
    # create a midi file with an empty track
    mid_new = MidiFile()
    track = MidiTrack()
    mid_new.tracks.append(track)
    track.append(MetaMessage('set_tempo', tempo=tempo, time=0))
    # add difference in the empty track
    last_time = 0
    for ch in changes:
        if set(ch) == {0}:  # no change
            last_time += 1
        else:
            on_notes = np.where(ch > 0)[0]
            on_notes_vol = ch[on_notes]
            off_notes = np.where(ch < 0)[0]
            first_ = True
            for n, v in zip(on_notes, on_notes_vol):
                new_time = last_time if first_ else 0
                track.append(Message('note_on', note=n + 21, velocity=v, time=new_time))
                first_ = False
            for n in off_notes:
                new_time = last_time if first_ else 0
                track.append(Message('note_off', note=n + 21, velocity=0, time=new_time))
                first_ = False
            last_time = 0
    return mid_new

In [None]:
mid_new = arry2mid(result_array, 545455)
mid_new.save('mid_new.mid')

In [None]:
environment.set("musescoreDirectPNGPath", '/opt/homebrew/bin/mscore') # tell music21 where MuseScore is installed

def open_midi(midi_path):
    # There is an one-line method to read MIDIs
    # but to remove the drums we need to manipulate some
    # low level MIDI events.
    mf = midi.MidiFile()
    mf.open(midi_path)
    mf.read()
    mf.close()

    return midi.translate.midiFileToStream(mf)
    
base_midi = open_midi("ahmed2.mid")
base_midi

In [None]:
## Works on Linux...not Mac, should run hopefully :) 
base_midi.show()
base_midi.show('midi')