In [None]:
!git clone https://github.com/facebookresearch/audiocraft.git
%cd audiocraft
!pip install -e .
!pip install dora-search
!pip install numba

fatal: destination path 'audiocraft' already exists and is not an empty directory.
/content/audiocraft
Obtaining file:///content/audiocraft
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: audiocraft
  Attempting uninstall: audiocraft
    Found existing installation: audiocraft 1.4.0a1
    Uninstalling audiocraft-1.4.0a1:
      Successfully uninstalled audiocraft-1.4.0a1
  Running setup.py develop for audiocraft
Successfully installed audiocraft-1.4.0a1


##**Load Dataset into MusicGen**

In [None]:
use_existing_json = True

import os
import json
import random
import librosa

os.makedirs("/content/audiocraft/egs/train", exist_ok=True)
os.makedirs("/content/audiocraft/egs/eval", exist_ok=True)

dataset_folder = "/content/drive/MyDrive/preprocessed_dataset"
train_manifest_path = "/content/audiocraft/egs/train/data.jsonl"
eval_manifest_path = "/content/audiocraft/egs/eval/data.jsonl"

dataset_len = 0
train_len = 0
eval_len = 0

train_file = open(train_manifest_path, 'w')
eval_file = open(eval_manifest_path, 'w')

for filename in os.listdir(dataset_folder):
    if filename.endswith(".mp3"):
        dataset_len += 1

        entry = None

        if use_existing_json:
            base_filename = os.path.splitext(filename)[0]
            json_filepath = os.path.join(dataset_folder, base_filename + "_metadata.json")
            if os.path.exists(json_filepath):
                with open(json_filepath, 'r') as json_file:
                    entry = json.load(json_file)
                    entry["path"] = os.path.join(dataset_folder, filename)
            else:
                print(f'Error loading JSON: could not find {json_filepath}')
                continue
        else:
            y, sr = librosa.load(os.path.join(dataset_folder, filename))
            length = librosa.get_duration(y=y, sr=sr)

            entry = {
                "key": "",
                "artist": "",
                "sample_rate": 44100,
                "file_extension": "wav",
                "description": "",
                "keywords": "",
                "duration": length,
                "bpm": "",
                "genre": "cailuong",
                "title": "",
                "name": "",
                "instrument": "Mix",
                "moods": [],
                "path": os.path.join(dataset_folder, filename),
            }

        if entry:
            if random.random() < 0.85:
                train_len += 1
                train_file.write(json.dumps(entry) + '\n')
            else:
                eval_len += 1
                eval_file.write(json.dumps(entry) + '\n')

train_file.close()
eval_file.close()

print(f'dataset length: {dataset_len} audio clips')
print(f'train length: {train_len} audio clips')
print(f'eval length: {eval_len} audio clips')

Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/A5_metadata.json
Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/CH_metadata.json
Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/HE_metadata.json
Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/G6_metadata.json
Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/LB_metadata.json
Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/MB_metadata.json
Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/2C_metadata.json
Error loading JSON: could not find /content/drive/MyDrive/preprocessed_dataset/7W_metadata.json
dataset length: 808 audio clips
train length: 684 audio clips
eval length: 116 audio clips


In [None]:
config_path = "/content/audiocraft/config/dset/audio/train.yaml"

data_path = "egs/train"
eval_data_path = "egs/eval"

package = "package"
yaml_contents = f"""#@{package} __global__

datasource:
  max_channels: 2
  max_sample_rate: 44100

  evaluate: {eval_data_path}
  generate: {data_path}
  train: {data_path}
  valid: {eval_data_path}
"""

with open(config_path, 'w') as yaml_file:
    yaml_file.write(yaml_contents)

##**Training MusicGen**

In [None]:
%env USER=lyra

command = (
    "dora -P audiocraft run"
    " solver=musicgen/musicgen_base_32khz"
    " model/lm/model_scale=small"
    " continue_from=//pretrained/facebook/musicgen-small"
    " conditioner=text2music"
    " dset=audio/train"
    " dataset.num_workers=2"
    " dataset.valid.num_samples=1"
    " dataset.batch_size=2"
    " schedule.cosine.warmup=8"
    " optim.optimizer=adamw"
    " optim.lr=1e-4"
    " optim.epochs=10"
    " optim.updates_per_epoch=2000"
    " optim.adam.weight_decay=0.01"
    " generate.lm.prompted_samples=False"
    " generate.lm.gen_gt_samples=True"
)

!{command}

env: USER=lyra
2024-09-25 16:54:46.169274: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-25 16:54:46.491686: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-25 16:54:46.577620: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-25 16:54:47.080355: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Dora directory: /tmp/audiocraf

In [None]:
from numba import cuda
device = cuda.get_current_device()
device.reset()

##**Save, Load, and Export**

In [None]:
from audiocraft.utils import export
from audiocraft import train
sig = "aec0903f"
# from https://github.com/facebookresearch/audiocraft/blob/main/docs/MUSICGEN.md#importing--exporting-models
xp = train.main.get_xp_from_sig(sig)
export.export_lm(xp.folder / 'checkpoint.th', '/content/checkpoints/finetune/state_dict.bin')
export.export_pretrained_compression_model('facebook/encodec_32khz', '/content/checkpoints/finetune/compression_state_dict.bin')

In [None]:
from audiocraft.models import MusicGen
musicgen = MusicGen.get_pretrained('/content/checkpoints/finetune')

In [None]:
sig = "aec0903f"

command = (
    "dora run solver=musicgen/musicgen_base_32khz"
    " model/lm/model_scale=small"

    f" continue_from=//SIG/{sig}"

    f" continue_from=/tmp/audiocraft_lyra/xps/{sig}/checkpoint.th"

    " conditioner=text2music"
    " dset=audio/train"
    " dataset.num_workers=2"
    " dataset.valid.num_samples=1"
    " dataset.batch_size=2"
    " schedule.cosine.warmup=8"
    " optim.optimizer=adamw"
    " optim.lr=1e-4"
    " optim.epochs=5"
    " optim.adam.weight_decay=0.01"
)

!{command}

In [None]:
import shutil
sig = "aec0903f"

source_path = f'/tmp/audiocraft_lyra/xps/{sig}/checkpoint.th'
destination_path = '/content/drive/MyDrive/musicgen_finetunes/checkpoints/new'
os.makedirs(destination_path, exist_ok=True)
shutil.copy(source_path, destination_path)

In [None]:
sig = "aec0903f"

try:
    !{command}
except:
    import shutil
    source_path = f'/tmp/audiocraft_lyra/xps/{sig}/checkpoint.th'
    destination_path = '/content/drive/MyDrive/musicgen_finetunes/checkpoints/'
    os.makedirs(destination_path, exist_ok=True)
    shutil.copy(source_path, destination_path)

##**Generate Samples**

In [None]:
from audiocraft.data.audio import audio_write
import IPython.display as ipd
from audiocraft.models import MusicGen
import numpy as np

musicgen = MusicGen.get_pretrained('/content/checkpoints/finetune')
musicgen.set_generation_params(duration=16)

In [None]:
wavs = musicgen.generate_unconditional(4)

for idx, one_wav in enumerate(wavs):
    audio_write(f'{idx}', one_wav.cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
    ipd.display(ipd.Audio(one_wav.cpu(), rate=32000))

In [None]:
wavs = musicgen.generate([
    'disco',
    'slide guitar bluegrass',
    'breakbeat, amen break',
    'epic orchestral strings'
])

for idx, one_wav in enumerate(wavs):
    audio_write(f'{idx}', one_wav.cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
    ipd.display(ipd.Audio(one_wav.cpu(), rate=32000))

In [None]:
# RUN THIS BEFORE RUNNING THE NEXT CELLS!
import julius, torch

def normalize_audio(audio_data):
    max_value = torch.max(torch.abs(audio_data))
    audio_data /= max_value
    return audio_data

def convert_audio_channels(wav: torch.Tensor, channels: int = 2) -> torch.Tensor:
    *shape, src_channels, length = wav.shape
    if src_channels == channels:
        pass
    elif channels == 1:
        wav = wav.mean(dim=-2, keepdim=True)
    elif src_channels == 1:
        wav = wav.expand(*shape, channels, length)
    elif src_channels >= channels:
        wav = wav[..., :channels, :]
    else:
        raise ValueError('The audio file has less channels than requested but is not mono.')
    return wav

def convert_audio(wav: torch.Tensor, from_rate: float, to_rate: float, to_channels: int) -> torch.Tensor:
    wav = julius.resample_frac(wav, int(from_rate), int(to_rate))
    wav = convert_audio_channels(wav, to_channels)
    return wav

def generate_audio_continuation(musicgen, sample, generation_length, segment_length=60, overlap=10):
    overlap_samples = overlap * 32000
    segment_samples = segment_length * 32000
    output = np.array([])
    output = np.concatenate((output, sample.cpu().squeeze().numpy().astype(np.float32)))
    init_length = len(output) / 32000

    while len(output) / 32000 < generation_length:
        musicgen.set_generation_params(duration=segment_length)
        prompt = torch.tensor(np.array([output[-overlap_samples:]]), dtype=torch.float32)
        res = musicgen.generate_continuation(prompt=prompt, prompt_sample_rate=32000)
        res = res.cpu().squeeze().numpy().astype(np.float32)
        output = np.concatenate((output, res[overlap_samples:]))

    return output

In [None]:
from google.colab import files
uploaded = files.upload()

input_audio_filename = next(iter(uploaded.keys()))
sample, sample_sr = torchaudio.load(input_audio_filename)
sample = normalize_audio(sample)
sample = convert_audio(sample, sample_sr, 32000, 1)

wav = generate_audio_continuation(musicgen, sample, 60)

audio_write('continuation', output.cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
ipd.display(ipd.Audio(output, rate=32000))

In [None]:
wavs = musicgen.generate_unconditional(4)

for idx, wav in enumerate(wavs):

    wav = generate_audio_continuation(musicgen, wav, 60)

    audio_write(f'{idx}', wav.cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
    ipd.display(ipd.Audio(wav.cpu(), rate=32000))

In [None]:
from audiocraft.models import MusicGen, MultiBandDiffusion
mbd = MultiBandDiffusion.get_mbd_musicgen()

wavs, tokens = musicgen.generate_unconditional(4, return_tokens=True)
wavs_diffusion = mbd.tokens_to_wav(tokens)

for idx, one_wav in enumerate(wavs):
    audio_write(f'{idx}', one_wav.cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
    audio_write(f'{idx}_diffusion', wavs_diffusion[idx].cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)

    print('default decoder:')
    ipd.display(ipd.Audio(one_wav.cpu(), rate=32000))
    print('multiband diffusion:')
    ipd.display(ipd.Audio(wavs_diffusion[idx].cpu().cpu(), rate=32000))

In [None]:
# Stereo inference
from audiocraft.models import MusicGen
import IPython.display as ipd

model = MusicGen.get_pretrained("facebook/musicgen-stereo-medium")
model.set_generation_params(duration=8)

wavs = model.generate([
    'disco',
    'slide guitar bluegrass',
    'breakbeat, amen break',
    'epic orchestral strings'
])

for idx, one_wav in enumerate(wavs):
    ipd.display(ipd.Audio(one_wav.cpu(), rate=model.sample_rate))
