# Music Generation using Meta's MusicGen 

### Library imports

In [1]:
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import torchaudio
import gradio as gr
import warnings
warnings.filterwarnings('ignore')

    PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.0.1+cpu)
    Python  3.9.13 (you have 3.9.8)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


### Model imports

In [None]:
model = MusicGen.get_pretrained('melody')

Downloading (…)ssion_state_dict.bin:   0%|          | 0.00/236M [00:00<?, ?B/s]

Downloading state_dict.bin:   0%|          | 0.00/2.77G [00:00<?, ?B/s]

Downloading: "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th" to C:\Users\rjret/.cache\torch\hub\checkpoints\955717e8-8726e21a.th
100%|██████████| 80.2M/80.2M [01:38<00:00, 851kB/s] 


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [None]:
model.set_generation_params(
    use_sampling=True,
    top_k=250,
    duration=5
)

### Text-conditional Generation

In [None]:
def genfromtext(description, duration):
    model.set_generation_params(use_sampling=True, duration=duration)   
    audio_values = model.generate(descriptions= [description])
    return (32000, audio_values.cpu().detach().numpy())

### Melody-conditional Generation

In [None]:
def genfromaudio(melody, description, duration):
    model.set_generation_params(use_sampling=True, duration=duration)
    melody_waveform, sr = torchaudio.load(melody)
    melody_waveform = melody_waveform.unsqueeze(0).repeat(1, 1, 1)
    audio_values = model.generate_with_chroma(descriptions=[description],melody_wavs=melody_waveform, melody_sample_rate=sr,progress=True)
    return (32000, audio_values.cpu().detach().numpy())

### Gradio Interface

In [None]:
interface1 = gr.Interface(
    genfromtext, inputs=[
        gr.Text(label="Input Text", interactive=True),
        gr.Number(minimum=1, maximum=10, value=5, label="Duration", interactive=True)
    ], outputs=[
        gr.Audio(type="numpy",label="Generated Music")
    ],
    description="Text-Conditional Generation")

In [None]:
interface2 = gr.Interface(
    genfromaudio, inputs= [
        gr.Audio(source="upload", type="filepath", label="Melody (Upload an audio with a max duration of 30sec)", interactive=True),
        gr.Text(label="Input Text", interactive=True),
        gr.Number(minimum=1, maximum=10, value=5, label="Duration", interactive=True)
    ], outputs=[
        gr.Audio(type="numpy", label="Generated Music")
    ],
    description="Melody-Conditional Generation")

In [None]:
title = "Music Generation using Meta's MusicGen"
final_interface = gr.TabbedInterface([interface1, interface2], ["Text-Conditional", "Melody-Conditional (Upload mp3 audio not exceeding 30sec)"], title=title,theme=gr.themes.Default())
final_interface.launch(inbrowser=True)