In [6]:
# This scipt is used to generate the music noise background 

import os
import pandas as pd
from pydub import AudioSegment

# Define paths
metadata_path = r"D:\DNN\Training_data\Noise\Extracted_FMA_Instrumental\track_genre.csv"
audio_base_path = r"D:\DNN\Training_data\fma_large"
output_folder = r"D:\DNN\Training_data\Noise\Extracted_FMA_Instrumental"

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Load the track metadata (first column = track ID, second column = genre)
df = pd.read_csv(metadata_path, header=None, names=["track_id", "genre"])

# Filter for instrumental tracks
instrumental_genres = ['Classical', 'Jazz', 'Ambient', 'Electronic', 'Instrumental', 'Soundtrack']
df_filtered = df[df["genre"].isin(instrumental_genres)]

# Extract track IDs
track_ids = df_filtered["track_id"].astype(str).tolist()

# Prepare a list to store valid file paths
valid_audio_files = []

# Find matching audio files
for track_id in track_ids:
    folder = track_id.zfill(6)[:3]  # FMA organizes files in subfolders based on track_id
    file_path = os.path.join(audio_base_path, folder, f"{track_id}.mp3")
    if os.path.exists(file_path):
        valid_audio_files.append(file_path)

# Process each valid file to extract 2-second clips
clip_duration_ms = 2000  # 2 seconds
num_clips = 4000  # Define how many clips to extract
bad_files = []  # List to track corrupt MP3s

processed_files = []
for i, file_path in enumerate(valid_audio_files[:num_clips]):
    try:
        # Attempt to load the MP3 file
        audio = AudioSegment.from_file(file_path, format="mp3")
        if len(audio) < clip_duration_ms:
            print(f"Skipping {file_path}: too short!")
            continue  # Skip short files

        # Extract a 2-second clip from the start
        start_time = 0  
        clip = audio[start_time:start_time + clip_duration_ms]
        
        # Save the extracted clip
        output_filename = f"instrumental_clip_{i+1}.wav"
        output_path = os.path.join(output_folder, output_filename)
        clip.export(output_path, format="wav")
        processed_files.append(output_filename)

        if i % 100 == 0:
            print(f"Processed: {output_filename} ({i}/{num_clips})")

    except Exception as e:
        print(f"Skipping {file_path}: {e}")
        bad_files.append(file_path)

# Save a log of bad files for review
bad_files_log = os.path.join(output_folder, "bad_mp3_files.txt")
with open(bad_files_log, "w") as f:
    for bad_file in bad_files:
        f.write(bad_file + "\n")

print(f"All 2-second instrumental clips have been extracted.")
print(f"{len(bad_files)} corrupted MP3 files were skipped. Check the log: {bad_files_log}")




Processed: instrumental_clip_1.wav (0/4000)
Processed: instrumental_clip_101.wav (100/4000)
Processed: instrumental_clip_201.wav (200/4000)
Skipping D:\DNN\Training_data\fma_large\106\106628.mp3: Decoding failed. ffmpeg returned error code: 3199971767

Output from ffmpeg/avlib:

ffmpeg version N-117286-g262e6f8430-20241001 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 14.2.0 (crosstool-NG 1.26.0.106_ed12fa6)
  configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-w64-mingw32- --arch=x86_64 --target-os=mingw32 --enable-gpl --enable-version3 --disable-debug --disable-w32threads --enable-pthreads --enable-iconv --enable-zlib --enable-libfreetype --enable-libfribidi --enable-gmp --enable-libxml2 --enable-lzma --enable-fontconfig --enable-libharfbuzz --enable-libvorbis --enable-opencl --disable-libpulse --enable-libvmaf --disable-libxcb --disable-xlib --enable-amf --enable-libaom --enable-libaribb24 --enable-avisyn