# Generating Music with an Anticipatory Music Transformer

## Set up the runtime environment

In [1]:
import sys,time

import midi2audio
import transformers
from transformers import AutoModelForCausalLM

from IPython.display import Audio

from anticipation import ops
from anticipation.sample import generate
from anticipation.tokenize import extract_instruments
from anticipation.convert import events_to_midi,midi_to_events
from anticipation.visuals import visualize
from anticipation.config import *
from anticipation.vocab import *

from mlc_chat import ChatModule

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
!git lfs install

Updated Git hooks.
Git LFS initialized.


In [8]:
!git clone https://huggingface.co/mlc-ai/mlc-chat-stanford-crfm-music-medium-800k-q0f32 ./mlc_music_models/music-medium-800k-q0f32/params

Cloning into './mlc_music_models/music-medium-800k-q0f32/params'...
remote: Enumerating objects: 71, done.[K
remote: Counting objects: 100% (68/68), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 71 (delta 0), reused 0 (delta 0), pack-reused 3[K
Unpacking objects: 100% (71/71), 15.25 KiB | 1.09 MiB/s, done.
Filtering content: 100% (65/65), 1.54 GiB | 107.52 MiB/s, done.


In [2]:
SMALL_MODEL = '/ssd1/cfruan/models/music-small-800k'     # faster inference, worse sample quality
MEDIUM_MODEL = '/ssd1/cfruan/models/music-medium-800k'   # slower inference, better sample quality

model_hf = AutoModelForCausalLM.from_pretrained(MEDIUM_MODEL).cuda()

model = ChatModule(
    model="./mlc_music_models/music-medium-800k-q0f32/params/",
    device="cuda",
    model_lib_path="./mlc_music_models/music-medium-800k-q0f32/music-medium-800k-q0f32.so",
    use_tokenizer=False
)

# a MIDI synthesizer
fs = midi2audio.FluidSynth('/usr/share/sounds/sf2/FluidR3_GM.sf2')

# the MIDI synthesis script
def synthesize(fs, tokens):
    mid = events_to_midi(tokens)
    mid.save('tmp.mid')
    fs.midi_to_audio('tmp.mid', 'tmp.wav')
    return 'tmp.wav'

## Simple Interaction with the Anticipatory Music Transformer

### Unconditional generation

Let's begin by generating some unconditional music from the model. We achieve this by calling the `generate` function with `start_time=0` and `end_time=10`, asking the model to generate 10 seconds of music starting from time 0. The `top_p` nucleus sampling parameter controls how conservative sampling will be: lower values will tend to promote more boring, repetitive generation whereas higher values might encourage the model to be too experimental.

In [3]:
# Using HF to generate
length = 10
unconditional_tokens = generate(model_hf, start_time=0, end_time=length, top_p=.98, use_MLC=False)
Audio(synthesize(fs, unconditional_tokens))

 98%|█████████▊| 979/1000 [00:32<00:00, 30.38it/s]


FluidSynth runtime version 2.1.1
Copyright (C) 2000-2020 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of E-mu Systems, Inc.

Rendering audio to file 'tmp.wav'..


In [4]:
# Using MLC to generate
length = 10
unconditional_tokens = generate(model, start_time=0, end_time=length, top_p=.98, use_MLC=True)
Audio(synthesize(fs, unconditional_tokens))

  1%|▏         | 13/1000 [00:00<00:23, 41.20it/s]


FluidSynth runtime version 2.1.1
Copyright (C) 2000-2020 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of E-mu Systems, Inc.

Rendering audio to file 'tmp.wav'..
