In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import pandas as pd
import torchaudio
from torchaudio.transforms import Resample
import torch

# 입력 및 출력 경로
MAESTRO_DIR = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0"
PROCESSED_DIR = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/processed_musicgen"
METADATA_FILE = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/maestro-v3.0.0.csv"

# 변환 설정
TARGET_SAMPLE_RATE = 16000
FIXED_LENGTH_SEC = 10  # 오디오 길이를 10초로 고정

# 출력 디렉토리 생성
os.makedirs(PROCESSED_DIR, exist_ok=True)

def preprocess_audio(file_path, output_path, target_sample_rate, fixed_length_sec):
    """
    오디오 파일을 MusicGen 형식으로 전처리합니다.

    Args:
        file_path (str): 원본 오디오 파일 경로.
        output_path (str): 저장할 경로.
        target_sample_rate (int): 목표 샘플링 레이트.
        fixed_length_sec (int): 고정 길이(초 단위).
    """
    # 오디오 로드
    waveform, sample_rate = torchaudio.load(file_path)

    # 채널 변환 (스테레오 → 모노)
    if waveform.size(0) > 1:
        waveform = torch.mean(waveform, dim=0, keepdim=True)

    # 리샘플링
    if sample_rate != target_sample_rate:
        resampler = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
        waveform = resampler(waveform)

    # 길이 고정 (패딩 또는 자르기)
    num_samples = target_sample_rate * fixed_length_sec
    if waveform.size(1) > num_samples:  # 자르기
        waveform = waveform[:, :num_samples]
    elif waveform.size(1) < num_samples:  # 패딩
        padding = num_samples - waveform.size(1)
        waveform = torch.nn.functional.pad(waveform, (0, padding))

    # 저장
    torchaudio.save(output_path, waveform, target_sample_rate)

# 메타데이터 로드
metadata = pd.read_csv(METADATA_FILE)

# 처리된 데이터 저장
processed_metadata = []
for idx, row in metadata.iterrows():
    audio_filename = row['audio_filename']
    audio_path = os.path.join(MAESTRO_DIR, audio_filename)
    output_path = os.path.join(PROCESSED_DIR, audio_filename)

    # 오디오 파일 처리
    try:
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        preprocess_audio(audio_path, output_path, TARGET_SAMPLE_RATE, FIXED_LENGTH_SEC)
        processed_metadata.append({
            "audio": output_path,
            "text": f"Composer: {row['canonical_composer']}, Title: {row['canonical_title']}"
        })
        print(f"Processed: {audio_filename}")
    except Exception as e:
        print(f"Failed to process {audio_filename}: {e}")

# 처리된 메타데이터 저장
processed_metadata_file = os.path.join(PROCESSED_DIR, "processed_metadata.csv")
pd.DataFrame(processed_metadata).to_csv(processed_metadata_file, index=False)
print(f"Processed metadata saved to {processed_metadata_file}")

Processed: 2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1.wav
Processed: 2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MID--AUDIO_03_R2_2008_wav--2.wav
Processed: 2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-split_07-07-17_Piano-e_3-02_wav--3.wav
Processed: 2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MID--AUDIO_21_R1_2004_01_Track01_wav.wav
Processed: 2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MID--AUDIO_17_R1_2006_04_Track04_wav.wav
Processed: 2009/MIDI-Unprocessed_07_R1_2009_04-05_ORIG_MID--AUDIO_07_R1_2009_07_R1_2009_04_WAV.wav
Processed: 2009/MIDI-Unprocessed_11_R1_2009_06-09_ORIG_MID--AUDIO_11_R1_2009_11_R1_2009_07_WAV.wav
Processed: 2013/ORIG-MIDI_03_7_8_13_Group__MID--AUDIO_19_R2_2013_wav--3.wav
Processed: 2009/MIDI-Unprocessed_02_R1_2009_03-06_ORIG_MID--AUDIO_02_R1_2009_02_R1_2009_04_WAV.wav
Processed: 2009/MIDI-Unprocessed_02_R1_2009_03-06_ORIG_MID--AUDIO_02_R1_2009_02_R1_2009_05_WAV.wav
Processed: 2011/MIDI-Unprocessed_15_R1_2011_MID--AUDIO_R1-D6_09_Track09_wav.w

In [None]:
import os
import pandas as pd
import torchaudio
from torchaudio.transforms import Resample
import torch
from tqdm import tqdm  # tqdm 추가

# 입력 및 출력 경로
MAESTRO_DIR = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0"
PROCESSED_DIR = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/processed_musicgen"
METADATA_FILE = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/maestro-v3.0.0.csv"

# 변환 설정
TARGET_SAMPLE_RATE = 16000
FIXED_LENGTH_SEC = 10  # 오디오 길이를 10초로 고정

# 출력 디렉토리 생성
os.makedirs(PROCESSED_DIR, exist_ok=True)

def preprocess_audio(file_path, output_path, target_sample_rate, fixed_length_sec):
    """
    오디오 파일을 MusicGen 형식으로 전처리합니다.

    Args:
        file_path (str): 원본 오디오 파일 경로.
        output_path (str): 저장할 경로.
        target_sample_rate (int): 목표 샘플링 레이트.
        fixed_length_sec (int): 고정 길이(초 단위).
    """
    # 오디오 로드
    waveform, sample_rate = torchaudio.load(file_path)

    # 채널 변환 (스테레오 → 모노)
    if waveform.size(0) > 1:
        waveform = torch.mean(waveform, dim=0, keepdim=True)

    # 리샘플링
    if sample_rate != target_sample_rate:
        resampler = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
        waveform = resampler(waveform)

    # 길이 고정 (패딩 또는 자르기)
    num_samples = target_sample_rate * fixed_length_sec
    if waveform.size(1) > num_samples:  # 자르기
        waveform = waveform[:, :num_samples]
    elif waveform.size(1) < num_samples:  # 패딩
        padding = num_samples - waveform.size(1)
        waveform = torch.nn.functional.pad(waveform, (0, padding))

    # 저장
    torchaudio.save(output_path, waveform, target_sample_rate)

# 메타데이터 로드
metadata = pd.read_csv(METADATA_FILE)

# 처리된 데이터 저장
processed_metadata = []

# tqdm을 사용한 진행 바
with tqdm(total=len(metadata), desc="Processing audio files") as pbar:
    for idx, row in metadata.iterrows():
        audio_filename = row['audio_filename']
        audio_path = os.path.join(MAESTRO_DIR, audio_filename)
        output_path = os.path.join(PROCESSED_DIR, audio_filename)

        # 오디오 파일 처리
        try:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            preprocess_audio(audio_path, output_path, TARGET_SAMPLE_RATE, FIXED_LENGTH_SEC)
            processed_metadata.append({
                "audio": output_path,
                "text": f"Composer: {row['canonical_composer']}, Title: {row['canonical_title']}"
            })
            pbar.set_postfix({"Last Processed": audio_filename})
        except Exception as e:
            print(f"Failed to process {audio_filename}: {e}")
        finally:
            pbar.update(1)

# 처리된 메타데이터 저장
processed_metadata_file = os.path.join(PROCESSED_DIR, "processed_metadata.csv")
pd.DataFrame(processed_metadata).to_csv(processed_metadata_file, index=False)
print(f"Processed metadata saved to {processed_metadata_file}")

Downloading Part 1: 100%|██████████| 36.0G/36.0G [08:19<00:00, 72.1MB/s]

Part 1 downloaded: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro-v3.0.0.part1





In [7]:
import os

def count_wav_files_in_subdirectories(directory):
    """
    각 서브디렉토리에서 .wav 파일만 계산하고 출력합니다.

    Args:
        directory (str): 최상위 디렉토리 경로.
    """
    for subdir, _, files in os.walk(directory):
        # .wav 파일만 필터링
        wav_files = [file for file in files if file.endswith(".wav")]
        file_count = len(wav_files)
        print(f"Directory: {subdir}, WAV File Count: {file_count}")

# 데이터 디렉토리 경로 설정
MAESTRO_DIR = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0"
PROCESSED_DIR = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/processed_musicgen"

# 원본 데이터 .wav 파일 개수 출력
print("Original Dataset (.wav files only) File Counts:")
count_wav_files_in_subdirectories(MAESTRO_DIR)

# 처리된 데이터 .wav 파일 개수 출력
print("\nProcessed Dataset (.wav files only) File Counts:")
count_wav_files_in_subdirectories(PROCESSED_DIR)

Original Dataset (.wav files only) File Counts:
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0, WAV File Count: 0
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2004, WAV File Count: 264
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2006, WAV File Count: 230
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2009, WAV File Count: 250
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2011, WAV File Count: 326
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2008, WAV File Count: 294
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2014, WAV File Count: 210
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2013, WAV File Count: 254
Directory: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2015, WAV File Count: 258
Directory: /co

In [8]:
original_files = set(
    os.path.relpath(os.path.join(root, file), MAESTRO_DIR)
    for root, _, files in os.walk(MAESTRO_DIR)
    for file in files if file.endswith(".wav")
)
processed_files = set(
    os.path.relpath(os.path.join(root, file), PROCESSED_DIR)
    for root, _, files in os.walk(PROCESSED_DIR)
    for file in files if file.endswith(".wav")
)

missing_files = original_files - processed_files
print(f"Missing WAV files: {len(missing_files)}")
for file in sorted(missing_files):
    print(file)

Missing WAV files: 1276
2004/._MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.wav
2004/._MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.wav
2004/._MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav.wav
2004/._MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_10_Track10_wav.wav
2004/._MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_02_Track02_wav.wav
2004/._MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_03_Track03_wav.wav
2004/._MIDI-Unprocessed_SMF_05_R1_2004_02-03_ORIG_MID--AUDIO_05_R1_2004_06_Track06_wav.wav
2004/._MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_02_Track02_wav.wav
2004/._MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_04_Track04_wav.wav
2004/._MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_06_Track06_wav.wav
2004/._MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_12_Track12_w

In [10]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [11]:
from pydub import AudioSegment
from pydub.playback import play

def play_audio(file_path):
    """
    주어진 경로의 오디오 파일을 재생합니다.
    :param file_path: 오디오 파일 경로
    """
    try:
        # 파일 로드
        print(f"파일을 로드 중입니다: {file_path}")
        audio = AudioSegment.from_file(file_path)

        # 재생
        print("오디오를 재생합니다...")
        play(audio)
        print("재생 완료.")
    except FileNotFoundError:
        print(f"파일을 찾을 수 없습니다: {file_path}")
    except Exception as e:
        print(f"오디오를 재생하는 중 오류가 발생했습니다: {str(e)}")

# 특정 파일 경로
file_path = "/content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2004/._MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.wav"

# 재생 함수 호출
play_audio(file_path)

파일을 로드 중입니다: /content/drive/MyDrive/프로젝트/케어크루즈 인턴/data/maestro_v3/maestro-v3.0.0/2004/._MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.wav
오디오를 재생하는 중 오류가 발생했습니다: Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmp

In [13]:
import os


# MAESTRO_DIR에서 리소스 포크 파일 무시하고 WAV 파일 목록 가져오기
original_files = set(
    os.path.relpath(os.path.join(root, file), MAESTRO_DIR)
    for root, _, files in os.walk(MAESTRO_DIR)
    for file in files if file.endswith(".wav") and not file.startswith("._")
)

# PROCESSED_DIR에서 리소스 포크 파일 무시하고 WAV 파일 목록 가져오기
processed_files = set(
    os.path.relpath(os.path.join(root, file), PROCESSED_DIR)
    for root, _, files in os.walk(PROCESSED_DIR)
    for file in files if file.endswith(".wav") and not file.startswith("._")
)

# 누락된 파일 계산
missing_files = original_files - processed_files

# 결과 출력
print(f"Missing WAV files: {len(missing_files)}")
for file in sorted(missing_files):
    print(file)

Missing WAV files: 0
