In [None]:
# Google Drive를 마운트하여 데이터셋 파일에 접근
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/projects/carecruise_intern/audiocraft

/content/drive/MyDrive/projects/carecruise_intern/audiocraft


In [None]:
# Install necessary system dependencies
!sudo apt-get update && sudo apt-get install -y ffmpeg libsndfile1

!pip install -e .

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:9 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [2,454 kB]
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,619 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:13 http://archive.ubuntu.com/ubuntu jammy-u

In [None]:
%env USER=choihj
from audiocraft.utils import export
from audiocraft import train



# 모델 내보내기
export.export_lm('/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/xps/efa9cb0e/checkpoint.th', '/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/efa9cb0e/checkpoints/finetune/state_dict.bin')
export.export_pretrained_compression_model('facebook/encodec_32khz', '/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/efa9cb0e/checkpoints/finetune/compression_state_dict.bin')

env: USER=choihj


In [None]:
from audiocraft.data.audio import audio_write
import IPython.display as ipd
from audiocraft.models import MusicGen
import numpy as np

# load your finetune
musicgen = MusicGen.get_pretrained('/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/efa9cb0e/checkpoints/finetune')
musicgen.set_generation_params(duration=30)



wavs = musicgen.generate([
    'A peaceful melody blending gentle ocean waves and soft birdsong at dawn. Subtle piano and string harmonies play in the background, creating a calming and serene atmosphere. Extremely slow tempo and soft tones ensure deep relaxation and restful sleep.',
    'Low, warm synthesizer pads and soft piano melodies designed for deep breathing and meditation. Repetitive and slow rhythms provide a sense of security, while ambient wind-like synthesizer effects quietly flow in the background.'
])

# 샘플 저장 및 출력
for idx, one_wav in enumerate(wavs):
    audio_write(f'{idx}', one_wav.cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
    ipd.display(ipd.Audio(one_wav.cpu(), rate=32000))

Output hidden; open in https://colab.research.google.com to view.

In [7]:
from audiocraft.models import MusicGen
import torch
import IPython.display as ipd
from audiocraft.data.audio import audio_write

# load your fine-tuned model
musicgen = MusicGen.get_pretrained('/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/efa9cb0e/checkpoints/finetune')

# MusicGen 객체 내부의 모든 텐서를 CPU로 이동
if hasattr(musicgen, 'devices'):
    musicgen.devices = ['cpu']  # 내부 디바이스 설정을 CPU로 변경
else:
    print("Warning: Could not set devices attribute. Ensure model is CPU-compatible.")

# 생성 파라미터 설정
musicgen.set_generation_params(duration=30)

# 텍스트 프롬프트 설정
prompts = [
    'A peaceful melody blending gentle ocean waves and soft birdsong at dawn. Subtle piano and string harmonies play in the background, creating a calming and serene atmosphere. Extremely slow tempo and soft tones ensure deep relaxation and restful sleep.',
    'Low, warm synthesizer pads and soft piano melodies designed for deep breathing and meditation. Repetitive and slow rhythms provide a sense of security, while ambient wind-like synthesizer effects quietly flow in the background.'
]

# 추론 수행
with torch.no_grad():  # 추론 시 그래디언트 비활성화
    wavs = musicgen.generate(prompts)

# 결과 저장 및 출력
for idx, one_wav in enumerate(wavs):
    one_wav = one_wav.cpu()  # 텐서를 CPU로 이동
    audio_write(f'{idx}', one_wav, musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
    ipd.display(ipd.Audio(one_wav.numpy(), rate=32000))

Output hidden; open in https://colab.research.google.com to view.

In [8]:
import json

# wandb 설정 데이터를 Python 딕셔너리로 변환
musicgen_config = {
    "transformer_lm.norm_first": {"value": True},
    "wandb.with_media_logging": {"value": True},
    "generate.lm.prompt_duration": {"value": "None"},
    "slurm.time": {"value": 3600},
    "fuser.cross": {"value": "['description']"},
    "fsdp.per_block": {"value": True},
    "fsdp.buffer_dtype": {"value": "float32"},
    "autocast": {"value": True},
    "fsdp.param_dtype": {"value": "float16"},
    "optim.eager_sync": {"value": True},
    "transformer_lm.emb_lr": {"value": "None"},
    "channels": {"value": 1},
    "optim.ema.use": {"value": True},
    "dataset.shuffle": {"value": False},
    "generate.every": {"value": 25},
    "codebooks_pattern.modeling": {"value": "delay"},
    "metrics.text_consistency.clap.model_arch": {"value": "HTSAT-base"},
    "generate.audio.loudness_headroom_db": {"value": 14},
    "fuser.sum": {"value": "[]"},
    "conditioners.description.t5.word_dropout": {"value": 0.3},
    "dora.dir": {"value": "/checkpoint/choihj/experiments/audiocraft/outputs"},
    "tensorboard.with_media_logging": {"value": True},
    "generate.audio.format": {"value": "wav"},
    "logging.level": {"value": "INFO"},
    "slurm.gpus": {"value": 4},
    "dataset.min_segment_ratio": {"value": 0.8},
    "interleave_stereo_codebooks.use": {"value": False},
    "codebooks_pattern.unroll.flattening": {"value": "[0, 1, 2, 3]"},
    "transformer_lm.two_step_cfg": {"value": False},
    "optim.updates_per_epoch": {"value": 100},
    "transformer_lm.depthwise_init": {"value": "current"},
    "transformer_lm.past_context": {"value": "None"},
    "metrics.chroma_cosine.chroma_base.sample_rate": {"value": 32000},
    "fuser.cross_attention_pos_emb_scale": {"value": 1},
    "optim.epochs": {"value": 100},
    "transformer_lm.bias_attn": {"value": False},
    "datasource.valid": {"value": "/content/drive/MyDrive/projects/carecruise_intern/audiocraft/egs/eval"},
    "tensorboard.sub_dir": {"value": "None"},
    "generate.num_workers": {"value": 5},
    "metrics.fad.tf.bin": {"value": "None"},
    "fsdp.reduce_dtype": {"value": "float32"},
    "dataset.train.merge_text_p": {"value": 0.25},
    "schedule.step.gamma": {"value": "None"},
    "transformer_lm.kv_repeat": {"value": 1},
    "wandb.group": {"value": "None"},
    "cache.write": {"value": False},
    "transformer_lm.causal": {"value": True},
    "generate.lm.remove_prompts": {"value": False},
    "metrics.fad.tf.model_path": {"value": "//reference/fad/vggish_model.ckpt"},
    "evaluate.metrics.base": {"value": False},
    "generate.num_samples": {"value": 5},
    "autocast_dtype": {"value": "float16"},
    "classifier_free_guidance.inference_coef": {"value": 3},
    "codebooks_pattern.delay.flatten_first": {"value": 0},
    "dataset.segment_duration": {"value": 30},
    "slurm.mem_per_gpu": {"value": 40},
    "datasource.train": {"value": "/content/drive/MyDrive/projects/carecruise_intern/audiocraft/egs/train"},
    "transformer_lm.layer_scale": {"value": "None"},
    "num_threads": {"value": 1},
    "optim.ema.device": {"value": "cuda"},
    "metrics.text_consistency.use_gt": {"value": False},
    "schedule.inverse_sqrt.warmup_init_lr": {"value": 0},
    "evaluate.metrics.text_consistency": {"value": False},
    "schedule.polynomial_decay.end_lr": {"value": 0},
    "transformer_lm.num_heads": {"value": 16},
    "metrics.chroma_cosine.chroma_base.n_chroma": {"value": 12},
    "dtype": {"value": "float32"},
    "metrics.kld.model": {"value": "passt"},
    "evaluate.truncate_audio": {"value": "None"},
    "checkpoint.save_last": {"value": True},
    "evaluate.metrics.kld": {"value": False},
    "optim.optimizer": {"value": "adamw"},
    "dataset.train.drop_other_p": {"value": 0.5},
    "transformer_lm.activation": {"value": "gelu"},
    "evaluate.every": {"value": 25},
    "fsdp.use": {"value": False},
    "tokens.padding_with_special_token": {"value": False},
    "transformer_lm.qk_layer_norm": {"value": False},
    "device": {"value": "cuda"},
    "fsdp.sharding_strategy": {"value": "shard_grad_op"},
    "dataset.train.shuffle": {"value": True},
    "optim.adam.betas": {"value": "[0.9, 0.95]"},
    "metrics.kld.use_gt": {"value": False},
    "dataset.generate.return_info": {"value": True},
    "dataset.batch_size": {"value": 1},
    "dataset.sample_on_duration": {"value": False},
    "schedule.inverse_sqrt.warmup": {"value": "None"},
    "fuser.prepend": {"value": "[]"},
    "efficient_attention_backend": {"value": "torch"},
    "codebooks_pattern.unroll.delays": {"value": "[0, 0, 0, 0]"},
    "schedule.cosine.warmup": {"value": 8},
    "schedule.lr_scheduler": {"value": "cosine"},
    "dataset.valid.num_samples": {"value": 1},
    "transformer_lm.hidden_scale": {"value": 4},
    "schedule.exponential.lr_decay": {"value": "None"},
    "show": {"value": False},
    "transformer_lm.card": {"value": 2048},
    "fuser.cross_attention_pos_emb": {"value": False},
    "conditioners.description.model": {"value": "t5"},
    "generate.path": {"value": "samples"},
    "codebooks_pattern.delay.delays": {"value": "[0, 1, 2, 3]"},
    "transformer_lm.xpos": {"value": False},
    "logging.log_tensorboard": {"value": True},
    "benchmark_no_load": {"value": False},
    "schedule.cosine.lr_min_ratio": {"value": 0},
    "transformer_lm.custom": {"value": False},
    "evaluate.metrics.chroma_cosine": {"value": False},
    "cache.write_shard": {"value": 0},
    "schedule.polynomial_decay.power": {"value": 1},
    "generate.audio.strategy": {"value": "loudness"},
    "transformer_lm.dim": {"value": 1024},
    "compression_model_checkpoint": {"value": "//pretrained/facebook/encodec_32khz"}
}

# JSON으로 저장
with open("/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/efa9cb0e/checkpoints/finetune/config.json", "w") as f:
    json.dump(musicgen_config, f, indent=4)

In [16]:
import locale
import os

# 강제로 UTF-8 로케일 설정
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"
locale.setlocale(locale.LC_ALL, "C.UTF-8")

# 확인
print(locale.getpreferredencoding())

ANSI_X3.4-1968


In [17]:
!pip install huggingface_hub

NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968

In [11]:
import locale
import os

# 강제로 UTF-8 로케일 설정
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"
locale.setlocale(locale.LC_ALL, "C.UTF-8")

# 확인
print(locale.getpreferredencoding())

ANSI_X3.4-1968


In [12]:
!huggingface-cli login

NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968

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

In [None]:
from huggingface_hub import HfApi, HfFolder, Repository

# Hugging Face 로그인 (필요하면 실행)
!huggingface-cli login

# Hugging Face에 저장소 생성
repository_name = "your_model_name"  # 저장소 이름 지정
api = HfApi()
repo_url = api.create_repo(repo_id=repository_name, private=False)

# 저장소 클론
repo = Repository(local_dir=repository_name, clone_from=repo_url)

# 필요한 파일 복사
import shutil

files_to_upload = [
    '/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/efa9cb0e/checkpoints/finetune/state_dict.bin',
    '/content/drive/MyDrive/projects/carecruise_intern/audiocraft/logs/efa9cb0e/checkpoints/finetune/compression_state_dict.bin'
]
for file_path in files_to_upload:
    shutil.copy(file_path, repository_name)

# config.json 생성 (필요한 경우 수정)
config = {
    "model_type": "MusicGen",
    "sample_rate": 32000,
    "description": "Fine-tuned MusicGen model with Encodec compression for high-quality audio generation.",
    "generation_params": {"duration": 30}
}

with open(f"{repository_name}/config.json", "w") as f:
    import json
    json.dump(config, f)

# Hugging Face에 파일 업로드
repo.push_to_hub(commit_message="Add fine-tuned MusicGen model and compression model")

In [None]:
from huggingface_hub import HfApi, Repository

repo_name = "musicgen-finetuned"
api = HfApi()
repo_url = api.create_repo(repo_id=repo_name, private=False)

repo = Repository(local_dir=repo_name, clone_from=repo_url)

# 필요한 파일 복사
import shutil
shutil.copy("path/to/state_dict.bin", repo_name)
shutil.copy("path/to/compression_state_dict.bin", repo_name)
with open(f"{repo_name}/config.json", "w") as f:
    f.write(config_json)  # 위에서 작성한 config.json 문자열
with open(f"{repo_name}/preprocessor_config.json", "w") as f:
    f.write(preprocessor_config_json)  # preprocessor_config.json 문자열

# 업로드
repo.push_to_hub(commit_message="Upload fine-tuned MusicGen model")