In [1]:
import torch
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import os # 导入 os 模块用于文件路径操作

# 1. 设置设备
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device: {device}")


A matching Triton is not available, some optimizations will not be enabled.
Error caught was: No module named 'triton'
  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


In [4]:
try:
    # 2. 加载预训练模型
    #    可以选择不同的模型, 例如: 'facebook/musicgen-small', 'facebook/musicgen-medium', 'facebook/musicgen-melody'
    print("Loading MusicGen model (this may take a moment)...")
    model = MusicGen.get_pretrained('facebook/musicgen-small', device=device)
    print("Model loaded successfully.")

    # 3. 设置生成参数
    #    可以调整时长 (duration) 等参数
    model.set_generation_params(duration=8)  # 生成 8 秒的音频
    print("Set generation parameters.")

    # 4. 提供文本描述来生成音乐
    descriptions = [
        'An 80s anime music',
        # 'A cheerful country song with acoustic guitars and a steady beat',
        # 'epic orchestral score with heroic brass and soaring strings'
    ]
    print(f"Generating audio for: {descriptions[0]}")
    # unconditional_tokens = None
    # progress = True #
    # return_tokens = False #

    # 根据你的 Audiocraft 版本，这里可能是 `generate` 或 `generate_unconditional` 等
    # 如果是基于文本描述，通常是 `generate`
    # wav = model.generate(descriptions, progress=True, return_tokens=False)
    # 如果你只想生成无条件的，或者想测试不同的API，可以查阅 audiocraft 文档
    
    # 尝试使用 generate 方法
    output = model.generate(
        descriptions=descriptions,
        progress=True, # 显示进度条
        return_tokens=False
    )
    wav = output[0] # output 通常是一个包含所有生成样本的张量列表或张量，这里取第一个

    print("Audio generation complete.")

    # 5. 保存生成的音频
    output_folder = 'musicgen_output'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # audio_write 函数期望文件名、张量、采样率、可选的归一化等参数
    # wav 张量的形状可能是 (num_channels, num_frames) 或 (batch_size, num_channels, num_frames)
    # 如果是 (batch_size, num_channels, num_frames)，我们需要选择一个样本，例如 wav[0]
    # audio_write 返回写入的文件路径列表
    
    # 假设 wav 是一个 [num_channels, num_frames] 的张量
    # 或者如果模型返回的是批次，则 wav[0] 是第一个样本
    # file_path = audio_write(f'{output_folder}/generated_audio_0', wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
    
    # 适配 audiocraft.data.audio.audio_write 的调用
    # 它会自动处理文件名后缀，比如 .wav
    # audio_write(stem_name: str, waveform: torch.Tensor, sample_rate: int, ...)
    
    # 为每个描述生成的文件命名
    file_stem = descriptions[0].replace(' ', '_')[:30] # 取描述的前30个字符并替换空格为下划线作为文件名的一部分
    
    print(f"Saving audio to {output_folder}/{file_stem}_0.wav ...")
    # wav已经是单个样本的张量了 [num_channels, num_frames]
    actual_filename = audio_write(
        os.path.join(output_folder, f'{file_stem}_0'),  # stem_name 作为第一个位置参数
        wav.cpu(),                                     # 音频波形张量作为第二个位置参数
        model.sample_rate,                             # 采样率作为第三个位置参数
        strategy="loudness",                           # 关键字参数
        loudness_compressor=True                       # 关键字参数
    )
    
    # audio_write 返回的是一个包含完整文件名的列表，我们取第一个
    if actual_filename: # 确保返回了文件名
        print(f"Audio saved successfully as {actual_filename}")
    else:
        print("Error saving audio or audio_write returned an empty list.")


except Exception as e:
    print(f"An error occurred: {e}")
    import traceback
    traceback.print_exc()


Loading MusicGen model (this may take a moment)...




Model loaded successfully.
Set generation parameters.
Generating audio for: An 80s anime music
Audio generation complete.
Saving audio to musicgen_output/An_80s_anime_music_0.wav ...
Audio saved successfully as musicgen_output\An_80s_anime_music_0.wav


In [1]:
import os
import soundfile as sf # 用于读取音频文件信息

def get_audio_info(filepath):
    try:
        info = sf.info(filepath)
        duration = info.duration
        sample_rate = info.samplerate
        channels = info.channels
        return duration, sample_rate, channels
    except Exception as e:
        print(f"错误：无法读取文件 {os.path.basename(filepath)} 的信息。原因: {e}")
        return None, None, None

def process_directory_and_print_info(audio_directory):
    """
    处理指定目录下的所有 .wav 文件，并打印其信息。
    """
    print(f"正在扫描目录: {audio_directory}\n")
    print(f"{'文件名':<60} | {'时长 (秒)':<15} | {'采样率 (Hz)':<15} | {'声道数':<10}")
    print("-" * 100) # 调整了分隔线长度

    found_wav_files = False
    if not os.path.isdir(audio_directory):
        print(f"错误：提供的路径 '{audio_directory}' 不是一个有效的目录或不存在。")
        return

    for filename in os.listdir(audio_directory):
        if filename.lower().endswith(".wav"):
            found_wav_files = True
            filepath = os.path.join(audio_directory, filename)
            duration, sample_rate, channels = get_audio_info(filepath)
            if duration is not None:
                print(f"{filename:<60} | {duration:<15.3f} | {sample_rate:<15} | {channels:<10}")
    
    if not found_wav_files:
        print(f"在目录 '{audio_directory}' 中没有找到 .wav 文件。")
audio_folder_path = r"C:\Users\Zhang\Desktop\assignment2\audiocraft\dataset\music" 
process_directory_and_print_info(audio_folder_path)

正在扫描目录: C:\Users\Zhang\Desktop\assignment2\audiocraft\dataset\music

文件名                                                          | 时长 (秒)          | 采样率 (Hz)        | 声道数       
----------------------------------------------------------------------------------------------------
Avid.wav                                                     | 30.037          | 48000           | 2         
Bios.wav                                                     | 30.037          | 48000           | 2         
Butter-Fly.wav                                               | 30.037          | 48000           | 2         
Merry-go-round-of-life.wav                                   | 30.037          | 48000           | 2         
One-Last-Kiss.wav                                            | 30.037          | 48000           | 2         
This-Game.wav                                                | 30.037          | 48000           | 2         
zenzenzense.wav                                             

In [2]:
import glob
import json

# Re-encode all JSON metadata files to UTF-8
for json_file in glob.glob('dataset/music/*.json'):
    # Load with strict UTF-8 decoding
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    # Write back as UTF-8 with explicit ensure_ascii=False
    with open(json_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

print("Re-encoded all JSON metadata files to UTF-8")

Re-encoded all JSON metadata files to UTF-8
