In [1]:
%pip install pydub

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from pydub import AudioSegment

In [3]:
def split_audio_file(file_path, output_directory, part_number=1, max_duration=30*60*1000):
    audio = AudioSegment.from_file(file_path)
    file_length = len(audio)
    
    if file_length <= max_duration:
        print(f"{file_path} is within the allowed duration.")
        return part_number
    
    print(f"{file_path} exceeds the allowed duration. Splitting...")
     
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    ext = os.path.splitext(file_path)[1]
    
    for i in range(0, file_length, max_duration):
        part = audio[i:i+max_duration]
        part_file_name = f"{base_name}_part{part_number}{ext}"
        part_file_path = os.path.join(output_directory, part_file_name)
        part.export(part_file_path, format=ext[1:])  # ext[1:] to remove the dot
        print(f"Exported {part_file_path}")

        # Recursively check if the part itself needs to be split further
        if len(part) > max_duration:
            print(f"Part {part_file_name} still exceeds the allowed duration. Splitting further...")
            part_number = split_audio_file(part_file_path, output_directory, part_number, max_duration)
        
        part_number += 1

    return part_number

def process_directory(input_directory, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        
    for root, _, files in os.walk(input_directory):
        for file in files:
            if file.endswith(('.mp3', '.wav', '.ogg', '.flac')):  # Add other audio file extensions if needed
                file_path = os.path.join(root, file)
                split_audio_file(file_path, output_directory)

# Example usage
input_directory_path = 'C:\\Users\\engma\\OneDrive\\Desktop\\UmKalthoum Dataset'
output_directory_path = 'C:\\Users\\engma\\OneDrive\\Desktop\\um kalthom splitted'
process_directory(input_directory_path, output_directory_path)

C:\Users\engma\OneDrive\Desktop\UmKalthoum Dataset\01.Alqalb Ye3shaq Kol Gameel.mp3 exceeds the allowed duration. Splitting...
Exported C:\Users\engma\OneDrive\Desktop\um kalthom splitted\01.Alqalb Ye3shaq Kol Gameel_part1.mp3
Exported C:\Users\engma\OneDrive\Desktop\um kalthom splitted\01.Alqalb Ye3shaq Kol Gameel_part2.mp3
C:\Users\engma\OneDrive\Desktop\UmKalthoum Dataset\01.Gaddet Hobak.mp3 exceeds the allowed duration. Splitting...
Exported C:\Users\engma\OneDrive\Desktop\um kalthom splitted\01.Gaddet Hobak_part1.mp3
Exported C:\Users\engma\OneDrive\Desktop\um kalthom splitted\01.Gaddet Hobak_part2.mp3
C:\Users\engma\OneDrive\Desktop\UmKalthoum Dataset\01.Hadeeth El Roh.mp3 is within the allowed duration.
C:\Users\engma\OneDrive\Desktop\UmKalthoum Dataset\02.Gamal El Donia.mp3 is within the allowed duration.
C:\Users\engma\OneDrive\Desktop\UmKalthoum Dataset\02.Ghareeb Ala Bab Al Ragaa.mp3 is within the allowed duration.
C:\Users\engma\OneDrive\Desktop\UmKalthoum Dataset\03.Hob Eh