# AudioLDM
Basado en 
* https://huggingface.co/docs/diffusers/api/pipelines/audioldm
* https://huggingface.co/cvssp
* https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation

## Preparación del entorno

In [None]:
!pip install diffusers==0.16.0
!pip install transformers
!pip install accelerate

In [None]:
from diffusers import AudioLDMPipeline, DPMSolverMultistepScheduler
import torch
from scipy.io.wavfile import write
from IPython.display import Audio
from google.colab import files

if torch.cuda.is_available():
    device = "cuda"
    torch_dtype = torch.float16
else:
    device = "cpu"
    torch_dtype = torch.float32

#repo_id = "cvssp/audioldm" # audioldm-s-full
repo_id = "cvssp/audioldm-s-full-v2"
#repo_id = "cvssp/audioldm-m-full"
#repo_id = "cvssp/audioldm-l-full"

pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
pipe = pipe.to(device)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

generator = torch.Generator(device)

## Generación

In [None]:
prompt = "trance swell, swelling, crescendo" #@param {type:"string"}
audio_length = 10 #@param{type:"slider", min:5.0, max:30.0, step:5.0}
inference_steps = 10 #@param {type:"slider", min:10, max:50, step:10}
guidance_scale = 2.5 #@param{type:"slider", min:q.5, max:5.0, step:0.5}
negative_prompt = "low quality, average quality"# @param {type:"string"}
num_waveforms = 2 #@param{type:"slider", min:1, max:16, step:1}
seed = 3849653847 #@param{type:"number"}

audiodata = pipe(prompt = prompt, 
                  audio_length_in_s=audio_length,
                  num_inference_steps=inference_steps, 
                  guidance_scale=guidance_scale, 
                  negative_prompt=negative_prompt, 
                  num_waveforms_per_prompt=num_waveforms, 
                  generator=generator.manual_seed(int(seed))
                  ).audios

for index, a in enumerate(audiodata):
  audiofilename = f"audio_{index+1}.wav"
  write(audiofilename, 16000, a.astype('float32'))  

zipfilename = f"{prompt[:40].replace(' ', '_')}.zip"
!zip -r $zipfilename *.wav
files.download(zipfilename)

Audio(data=audiodata[0], rate=16000)



In [None]:
!rm *.zip
!rm *.wav