In [1]:
import os
import librosa
import numpy as np
import soundfile as sf
import glob

In [7]:
def split_audio_into_chunks_from_folder(folder_path, chunk_length=60, min_last_chunk_duration=30):
    """
    Splits all audio files (MP3 or WAV) in a given folder into chunks of a specified length (in seconds).
    Pads the last chunk if necessary to make it exactly chunk_length in duration, or deletes it if its duration is shorter
    than a specified minimum duration.

    :param folder_path: Path to the folder containing MP3 or WAV files
    :param chunk_length: Length of each chunk in seconds (default: 60 seconds)
    :param min_last_chunk_duration: Minimum duration in seconds for the last chunk. If the last chunk is shorter than this, it will be deleted (default: 30 seconds).
    """
    # Get all MP3 and WAV files in the folder
    audio_files = glob.glob(os.path.join(folder_path, '*.wav'))
    
    # Iterate over each audio file in the folder
    for file_path in audio_files:
        print(f"Processing {file_path}...")
        
        # Load the audio file using librosa
        y, sr = librosa.load(file_path, sr=None)  # Load audio at original sample rate
        
        # Calculate the number of samples per chunk (1-minute chunk)
        chunk_samples = chunk_length * sr
        
        # Calculate how many chunks we need (including padding if necessary)
        num_chunks = int(np.ceil(len(y) / chunk_samples))
        
        # Create a directory for the chunks if it doesn't exist
        output_dir = os.path.join(folder_path, 'chunks')
        os.makedirs(output_dir, exist_ok=True)
        
        # Extract the filename without the extension to make the chunk names unique
        base_filename = os.path.splitext(os.path.basename(file_path))[0]
        
        # Process and save each chunk
        for i in range(num_chunks):
            start_sample = i * chunk_samples
            end_sample = min((i + 1) * chunk_samples, len(y))
            
            # Extract the chunk
            chunk = y[start_sample:end_sample]
            
            # Check if this is the last chunk
            if len(chunk) < chunk_samples:
                # Calculate the duration of the last chunk in seconds
                last_chunk_duration = len(chunk) / sr
                
                # If the last chunk is shorter than the minimum duration, skip saving it
                if last_chunk_duration < min_last_chunk_duration:
                    print(f"Chunk {i + 1} is shorter than {min_last_chunk_duration} seconds and will be discarded.")
                    continue  # Skip this chunk
                
                # Pad the chunk if it's shorter than chunk_length (but not too short)
                pad_length = chunk_samples - len(chunk)
                chunk = np.pad(chunk, (0, pad_length), mode='constant', constant_values=0)
            
            # Create a unique filename for the chunk
            output_path = os.path.join(output_dir, f'{base_filename}_chunk_{i + 1}.wav')
            
            # Save the chunk as a separate WAV file
            sf.write(output_path, chunk, sr)
            print(f"Saved chunk {i + 1} to {output_path}")



In [None]:
# Example usage: Process all MP3 and WAV files in the folder
folder_path = "wavs/"  # Set the folder containing your audio files (MP3/WAV)
split_audio_into_chunks_from_folder(folder_path)
