In [None]:
from audiocraft.models import MusicGen

# Using small model, better results would be obtained with `medium` or `large`.
model = MusicGen.get_pretrained('small')

In [None]:
model.set_generation_params(
    use_sampling=True,
    top_k=250,
    duration=5
)

### Unconditional Generation

In [None]:
from audiocraft.utils.notebook import display_audio

output = model.generate_unconditional(num_samples=2, progress=True)
display_audio(output, sample_rate=32000)

### Music Continuation

In [None]:
import math
import torchaudio
import torch
from audiocraft.utils.notebook import display_audio

def get_bip_bip(bip_duration=0.125, frequency=440,
                duration=0.5, sample_rate=32000, device="cuda"):
    """Generates a series of bip bip at the given frequency."""
    t = torch.arange(
        int(duration * sample_rate), device="cuda", dtype=torch.float) / sample_rate
    wav = torch.cos(2 * math.pi * 440 * t)[None]
    tp = (t % (2 * bip_duration)) / (2 * bip_duration)
    envelope = (tp >= 0.5).float()
    return wav * envelope


In [None]:
# Here we use a synthetic signal to prompt both the tonality and the BPM
# of the generated audio.
res = model.generate_continuation(
    get_bip_bip(0.125).expand(2, -1, -1), 
    32000, ['Jazz jazz and only jazz', 
            'Heartful EDM with beautiful synths and chords'], 
    progress=True)
display_audio(res, 32000)

In [None]:
# You can also use any audio from a file. Make sure to trim the file if it is too long!
prompt_waveform, prompt_sr = torchaudio.load("./assets/bach.mp3")
prompt_duration = 2
prompt_waveform = prompt_waveform[..., :int(prompt_duration * prompt_sr)]
output = model.generate_continuation(prompt_waveform, prompt_sample_rate=prompt_sr, progress=True)
display_audio(output, sample_rate=32000)

### Text-conditional Generation

In [None]:
from audiocraft.utils.notebook import display_audio

output = model.generate(
    descriptions=[
        '80s pop track with bassy drums and synth',
        '90s rock song with loud guitars and heavy drums',
    ],
    progress=True
)
display_audio(output, sample_rate=32000)