In [70]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import mido
import time

model_name = "kobimusic/esecutore-4-0619"
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'mps'

  from .autonotebook import tqdm as notebook_tqdm


In [71]:
model = AutoModelForCausalLM.from_pretrained(model_name)
model.to('mps')
tokenizer = AutoTokenizer.from_pretrained(model_name)



Just collect and write all the notes in a relative way (simplifies code a lot)

In [None]:
notes = []
rel_time = True

try:
    with mido.open_input('Launchkey Mini MK3 MIDI Port') as inport:
        start_time = time.time()
        for msg in inport:
            if msg.type == 'note_on' and msg.velocity > 0:
                time_ = time.time() - start_time
                notes.append(mido.Message(
                    'note_on',
                    note=msg.note, 
                    velocity=msg.velocity, 
                    channel=0,
                    time = time_
                ))
                if rel_time:
                    start_time = time.time()
            elif msg.type == 'note_off'  or (msg.type == 'note_on' and msg.velocity == 0):
                time_ = time.time() - start_time
                notes.append(mido.Message(
                    'note_off',
                    note=msg.note, 
                    velocity=msg.velocity, 
                    channel=0,
                    time = time_
                ))
                if rel_time:
                    start_time = time.time()
except KeyError as e:
    print(e)
    pass

Convert the relative inputs into inputs for the AI

In [10]:
def conv_channel(channel):
    channel_map = {
        0: "%",
        1: "^",
        2: "&",
        3: "*",
        4: ";",
        5: ":",
        6: "'",
        7: '"',
        9: ")",
        10: "{",
        11: "}",
        12: "[",
        13: "]",
        14: "(",
    }

    if isinstance(channel, int):
        if channel in channel_map.keys():
            return channel_map[channel]
        return "%"
    elif isinstance(channel, str):
        # Invert the mapping
        channel_map = {
            v: k for k, v in channel_map.items()
        }
        if channel in channel_map.keys():
            return channel_map[channel]
        return 0
    else:
        raise "Wrong type for channel"

def conv_velocity(velocity):
    velocity_map = {
        48: "!",
        60: "@",
        100: "#",
    }
    if isinstance(velocity, int):
        for i in velocity_map.keys():
            if velocity <= i:
                return velocity_map[i]
        return "@"
    elif isinstance(velocity, str):
        # Invert the mapping
        velocity_map = {
            v: k for k, v in velocity_map.items()
        }
        return velocity_map[velocity]
    else:
        raise "Wrong type for velocity"
    
from dataclasses import dataclass

@dataclass
class Note():
    start: float
    end: float
    pitch: int
    velocity: int
    channel: int


In [None]:
import pickle as pkl

def conv_pm_to_str(midi_):
    ticks_per_beat = 480 / 24
    bpm = ((60) / 120) *  1000000

    rel_notes = []
    last_time = 0

    for msg in midi_:
        delta = msg.end - msg.start
        rel_notes.append(
            str(mido.second2tick(msg.start - last_time, ticks_per_beat, bpm)) +
            str(conv_velocity(msg.velocity)) +
            str(mido.second2tick(delta, ticks_per_beat, bpm)) +
            str(conv_channel(msg.channel)) +
            str(msg.pitch) + "|"
        )

        last_time = msg.start

    str_conv = ''.join(rel_notes)
    return str_conv

with open('./test.pkl', 'rb') as f:
    piano, click = pkl.load(f)

midi_ = []
for note in piano:
    midi_.append(Note(
        start = note.start,
        end = note.end,
        pitch = note.pitch,
        velocity = note.velocity,
        channel = 0
    ))
for note in click:
    midi_.append(Note(
        start = note.start,
        end = note.end,
        pitch = note.pitch,
        velocity = note.velocity,
        channel = 9
    ))

midi_.sort(key = lambda x: x.start)
inp_str = conv_pm_to_str(midi_)

In [149]:
str_conv = ''.join(inp_str)
formatted = f". classical |{str_conv}"
formatted

'. classical |100@73%48|0#19)75|1!14%63|0!10%60|9@13%55|10#19)56|1@11%60|0!11%63|8@14%55|10#19)56|1@11%60|0!11%63|10!12%55|8#19)56|1!12%63|0!9%60|8!14%55|9!73%50|1#19)75|1#13%65|0#10%60|9@14%55|9#19)56|1#13%65|1!11%60|8!15%55|10#19)56|1#15%65|1@12%60|9#13%55|9#19)56|1#11%65|1!10%60|7#8%55|10@72%48|0@13%58|0@12%63|1#19)75|10@11%55|9@9%63|0!10%58|1#19)56|8@11%55|10@12%58|0@9%63|1#19)56|8#8%55|11#19)56|1@7%58|0@7%63|9#6%55|0!5%58|9#19)75|'

In [None]:
formatted = '. classical |0#240%62|0#240*74|240#240%64|0#240*76|0#240%65|0#240*77|240#240%67|0#240*79|240#40%62|0#40*74|40#440%64|0#440*76|440#240%60|0#240^56|0#240^53|0#240^51|0#240*72|240#120%62|0#120^58|0#120^53|0#120^48|0#120*74|240#80&70|80#80&72|80#80&70|80#80&69|80#80&70|80#80&69|80#80&68|160#80&63|80#80&62|80#80&63|80#80&62|80#80&57|80#80&58|80#80&60|80#80&65|80#80&69|80#80^58|0#80^62|0#80^65|0#80^69|0#80&72|80#320^57|0#320^64|0#320^60|0#320^67|0#320^71|0#320&76|480#240%60|0#240*72|240#240%62|0#240*74|240#240%63|0#240*75|240#240%65|0#240*77|240#40%60|0#40*72|40#440%62|0#440*74|440#240%58|0#240^54|0#240^51|0#240^49|0#240*70|240#120%60|0#120^56|0#120^51|0#120^46|0#120*72|'

In [150]:
ins = tokenizer.encode(formatted)
ins = torch.tensor([ins], device='mps')
decs = []
for i in range(10):
    res = model.generate(
        ins,
        use_cache=False,
        max_new_tokens=6,
        do_sample=True,
        temperature=0.89,
        top_p=1.0,
        num_return_sequences=1,
    )
    decs.append(tokenizer.batch_decode(res[:, ins.shape[1]:])[0])
    ins = torch.cat((ins, res[:, ins.shape[1]:]), dim=1)
print(decs)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151645 for open-end gene

['6#12)56|', '2@12%63|', '6@12%60|', '0!12%55|', '7@7%55|', '9#14)56|', '4@13%65|', '0@19%58|', '19#19)56|', '0!10%58|']


In [2]:
decs = ['6#12)56|', '2@12%63|', '6@12%60|', '0!12%55|', '7@7%55|', '9#14)56|', '4@13%65|', '0@19%58|', '19#19)56|', '0!10%58|']

In [11]:
import re
pattern = re.compile(r"(\d+)(\D)(\d+)(\D)(\d+)\|")
m = pattern.match(decs[0])

Note(
    start = int(m.group(1)),
    velocity = conv_velocity(m.group(2)),
    end = int(m.group(1)) + int(m.group(3)),
    channel = conv_channel(m.group(4)),
    pitch = int(m.group(5))
)

Note(start=6, end=18, pitch=56, velocity=100, channel=9)

In [155]:
import re
pattern = re.compile(r"(\d+)(\D)(\d+)(\D)(\d+)\|")
notes = []
prev_start = 0
for note in decs:
    m = pattern.match(note)
    notes.append(Note(
        start = int(m.group(1)) + prev_start,
        velocity = conv_velocity(m.group(2)),
        end = int(m.group(1)) + int(m.group(3)),
        channel = conv_channel(m.group(4)),
        pitch = int(m.group(5))
    ))
    prev_start += int(m.group(1))

In [1]:
notes

NameError: name 'notes' is not defined

In [None]:
notes

In [73]:
decoded = tokenizer.batch_decode(res)
decoded

['. classical |42$71%48|1@11%63|1!7%60|7!11%55|10@9%60|0#12%63|9@11%55|10!11%63|0!14%60|11!9%55|8@11%60|1!9%63|7@10%55|8$54%50|3@10%60|0!10%65|7#13%55|11#11%60|0#12%65|8#13%55|10@11%60|0@12%65|9#7%55|9$5%58|0$6%48|1$4%63|8#5%50|0#6%55|1#4%60|1#3%63|4$15%56']

In [21]:
import mido

# Create a new MIDI file with 480 ticks per beat
mid = mido.MidiFile(ticks_per_beat=480)
track = mido.MidiTrack()
mid.tracks.append(track)

# Set tempo (500000 µs per beat = 120 BPM) and time signature
track.append(mido.MetaMessage('set_tempo', tempo=500000, time=0))
track.append(mido.MetaMessage('time_signature', numerator=4, denominator=4, time=0))

# Start two notes simultaneously at time 0
# Note 60 will last 3 seconds (6 beats * 480 = 2880 ticks)
# Note 64 will last 3.5 seconds (7 beats * 480 = 3360 ticks)
track.append(mido.Message('note_on', note=60, velocity=64, time=0))
track.append(mido.Message('note_on', note=64, velocity=64, time=0))
track.append(mido.Message('note_on', note=50, velocity=64, time=200))
track.append(mido.Message('note_on', note=70, velocity=64, time=200))
track.append(mido.Message('note_on', note=80, velocity=64, time=400))
track.append(mido.Message('note_on', note=81, velocity=64, time=400))

track.append(mido.Message('note_off', note=64, velocity=64, time=480))
track.append(mido.Message('note_off', note=60, velocity=64, time=2880))
track.append(mido.Message('note_off', note=70, velocity=64, time=240))
track.append(mido.Message('note_off', note=50, velocity=64, time=4000))
track.append(mido.Message('note_off', note=80, velocity=64, time=600))
track.append(mido.Message('note_off', note=81, velocity=64, time=800))

# Mark end of track
track.append(mido.MetaMessage('end_of_track', time=8000))

# Save the MIDI file
mid.save('long_notes.mid')