<a href="https://colab.research.google.com/github/AinzOwl/mysticai-colab/blob/main/MusicGen_Mystic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pipeline-ai torch audiocraft torchaudio

In [None]:


# Using small model, better results would be obtained with `medium` or `large`.
model = MusicGen.get_pretrained('melody')
segment_duration = 30
model.set_generation_params(
    use_sampling=True,
    top_k=250,
    duration=segment_duration
)

total_duration = 40
overlap = 5

desc = ['80s pop track with bassy drums and synth']

segment = model.generate(descriptions=desc, progress=True)
total_duration -= segment_duration
while total_duration > 0:
    last_sec = segment[:, :, -overlap*model.sample_rate:]
    next_segment = model.generate_continuation(last_sec, model.sample_rate, descriptions=desc, progress=True)
    segment = torch.cat([segment[:, :, :-overlap*model.sample_rate], next_segment], 2)
    total_duration -= segment_duration - overlap
    if total_duration < segment_duration:
        segment_duration = total_duration + overlap
        model.set_generation_params(
            use_sampling=True,
            top_k=250,
            duration=segment_duration
        )

output = segment.detach().cpu().float()[0]
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
    audio_write(
        file.name, output, model.sample_rate, strategy="loudness",
        loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
    print(f'Saved to {file.name}')

In [None]:
key = input("Enter your mystic.ai API key: ")
!pipeline cluster login catalystapi {key} -u https://www.mystic.ai -a

In [None]:
import torch
import torchaudio
import subprocess

from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
from audiocraft.utils.notebook import display_audio
from tempfile import NamedTemporaryFile

from pipeline import Pipeline, Variable, entity, pipe
from pipeline.cloud import compute_requirements, environments, pipelines
from pipeline.objects import File


In [None]:
@entity
class MusicgenModel:
    def __init__(self):
        ...

    @pipe(on_startup=True, run_once=True)
    def load(self):
        import torchaudio
        from audiocraft.models import MusicGen
        self.model = MusicGen.get_pretrained('facebook/musicgen-melody')


    @pipe
    def predict(self, prompt: str, duration: int, samples: int, melodySample: File) -> File:
        from audiocraft.data.audio import audio_write

        segment_duration = 30
        self.model.set_generation_params(
            use_sampling=True,
            top_k=250,
            duration=segment_duration
        )

        total_duration = duration
        if (total_duration > 30):
          genduration = 30
        else:
          genduration = total_duration
        overlap = 5

        self.model.set_generation_params(duration=genduration)
        wav = self.model.generate_unconditional(samples)
        descriptions = prompt
        wav = self.model.generate(descriptions)

        melody, sr = torchaudio.load(str(melodySample.path), format="wav")
        segment = self.model.generate_with_chroma(descriptions, melody[None].expand(genduration, -1, -1), sr)

        total_duration -= segment_duration
        while total_duration > 0:
            last_sec = segment[:, :, -overlap*self.model.sample_rate:]
            next_segment = self.model.generate_continuation(last_sec, model.sample_rate, descriptions=prompt, progress=True)
            segment = torch.cat([segment[:, :, :-overlap*self.model.sample_rate], next_segment], 2)
            total_duration -= segment_duration - overlap
            if total_duration < segment_duration:
                segment_duration = total_duration + overlap
                self.model.set_generation_params(
                    use_sampling=True,
                    top_k=250,
                    duration=segment_duration
                )

        output = segment.detach().cpu().float()[0]
        with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
            file_path = f"{file.name}"
            audio_write(
                file.name, output, self.model.sample_rate, strategy="loudness",
                loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)

        output_file = File(path=file_path, allow_out_of_context_creation=True)
        return output_file

In [None]:
 @entity
# class MusicgenModel:
#     def __init__(self):
#         ...

#     @pipe(on_startup=True, run_once=True)
#     def load(self):
#         import torchaudio
#         from audiocraft.models import MusicGen

#         self.model = MusicGen.get_pretrained("facebook/musicgen-melody")

     @pipe
#     def predict(self, prompt: str, duration: int, samples: int, melodySample: File) -> File:
#         from audiocraft.data.audio import audio_write

#         self.model.set_generation_params(duration=duration)
#         wav = self.model.generate_unconditional(samples)
#         descriptions = prompt
#         wav = self.model.generate(descriptions)

#         melody, sr = torchaudio.load(str(melodySample.path), format="wav")
#         wav = self.model.generate_with_chroma(descriptions, melody[None].expand(duration, -1, -1), sr)

#         for idx, one_wav in enumerate(wav):
#             file_path = f"/tmp/{idx}"
#             # Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
#             audio_write(
#                 file_path,
#                 one_wav.cpu(),
#                 self.model.sample_rate,
#                 strategy="loudness",
#                 loudness_compressor=True,
#             )

#         output_file = File(path=file_path + ".wav", allow_out_of_context_creation=True)
#         return output_file

In [None]:
with Pipeline() as builder:
    prompt = Variable(
        str,
        title="Prompt",
        description='Describe the music to be generated, \
        e.g. "rock song with a long guitar solo"',
    )
    melodySample = Variable(
        File,
        title= "Melody Sample",
        description = "The audio melody that will be passed to model, \
        file must in wav format."
    )
    duration = Variable(
        int,
        title="Duration",
        description="Length of the music in seconds, \
        generation can take long so keep numbers low",
    )
    samples = Variable(
        int,
        title="Samples number",
        description="Length of the music in seconds, \
        generation can take long so keep numbers low",
    )

    model = MusicgenModel()

    model.load()

    output = model.predict(prompt, duration, samples, melodySample)

    builder.output(output)

    my_pl = builder.get_pipeline()
    environments.create_environment(
        "Ainzoil/musicgen",
        python_requirements=[
            "torch==2.0.1",
            "git+https://github.com/facebookresearch/audiocraft#egg=audiocraft",
            "torchaudio==2.0.2"
        ],
    )


In [None]:
remote_pipeline = pipelines.upload_pipeline(
    my_pl,
    "Ainzoil/musicgen_large",
    environment_id_or_name="Ainzoil/musicgen",
    required_gpu_vram_mb=30_000,
    accelerators=[
        compute_requirements.Accelerator.nvidia_a100,
    ],
)

In [None]:
!nvidia-smi

In [None]:
output = my_pl.run(
    "mj, cinematic close up photo of an ethereal neural network organism, divine woman, anatomical face, biomechanical details",
    File(path="CantinaBand60.wav"),
    45,
    1
  )

display_audio(output, 32000)