In [1]:
import os
import re
from pydub import AudioSegment

# Define the root directory containing all dataset folders
root_dir = r"C:\Users\welin\Documents\Thesis\Research\ACL19_Release"

def extract_numbers(filename):
    """Extracts exactly two numbers from a filename for sorting (ignoring .mp3/.wav)."""
    filename = filename.lower().replace(".mp3", "").replace(".wav", "")  # Remove file extension
    numbers = re.findall(r'\d+', filename)  # Find all numbers in filename
    
    if len(numbers) >= 2:
        return int(numbers[0]), int(numbers[1])  # Use first two numbers
    elif len(numbers) == 1:
        return int(numbers[0]), 0  # If only one number, assume second is 0
    else:
        return (0, 0)  # Default case (shouldn't happen)

# Walk through all subdirectories
for dirpath, dirnames, filenames in os.walk(root_dir):
    if "CEO" in dirnames:  # Check if "CEO" folder exists in this directory
        ceo_folder_path = os.path.join(dirpath, "CEO")

        # Get the parent folder where "CEO" is located
        parent_folder_path = dirpath  # This is the company folder, NOT "CEO" itself
        parent_folder_name = os.path.basename(parent_folder_path).replace(" ", "_")

        # Save the merged file inside the parent folder (NOT inside CEO)
        output_audio_path = os.path.join(parent_folder_path, f"{parent_folder_name}_merged.mp3")

        # Get and sort all audio files numerically
        audio_files = sorted(
            [f for f in os.listdir(ceo_folder_path) if f.lower().endswith((".mp3", ".wav"))],
            key=extract_numbers
        )

        if not audio_files:
            continue

        # Load and concatenate the audio files
        combined_audio = AudioSegment.empty()
        for audio_file in audio_files:
            file_path = os.path.join(ceo_folder_path, audio_file)

            try:
                audio_segment = AudioSegment.from_file(file_path) 
                combined_audio += audio_segment
            except Exception as e:
                continue  # Skip files that cannot be processed

        # Export merged file
        try:
            combined_audio.export(output_audio_path, format="mp3")
            print(f"Saved merged file: {output_audio_path}")
        except Exception as e:
            continue  # Skip if unable to export


Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\3M Company_20170425\3M_Company_20170425_merged.mp3
Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\3M Company_20170725\3M_Company_20170725_merged.mp3
Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\A.O. Smith Corp_20170726\A.O._Smith_Corp_20170726_merged.mp3
Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\Abbott Laboratories_20171018\Abbott_Laboratories_20171018_merged.mp3
Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\AbbVie Inc._20170427\AbbVie_Inc._20170427_merged.mp3
Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\AbbVie Inc._20170728\AbbVie_Inc._20170728_merged.mp3
Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\AbbVie Inc._20171027\AbbVie_Inc._20171027_merged.mp3
Saved merged file: C:\Users\welin\Documents\Thesis\Research\ACL19_Release\ABIOMED Inc_20170

In [1]:
from pydub import AudioSegment

# Manually set the FFmpeg path
AudioSegment.converter = r"C:\ffmpeg\bin\ffmpeg.exe"

print(f"🔍 Using FFmpeg at: {AudioSegment.converter}")

# Test loading an audio file
try:
    file_path = r"C:\Users\welin\Documents\Thesis\Research\ACL19_Release\3M Company_20170425\CEO\Nicholas C. Gangestad_1_1.mp3"
    audio = AudioSegment.from_file(file_path)
    print("✅ FFmpeg is working with pydub!")
except Exception as e:
    print(f"❌ FFmpeg error: {e}")


🔍 Using FFmpeg at: C:\ffmpeg\bin\ffmpeg.exe
✅ FFmpeg is working with pydub!
