In [5]:
import os
import librosa
from pydub import AudioSegment
from pydub.utils import make_chunks
from pydub.silence import split_on_silence

# Define a function to normalize a chunk to a target amplitude.
def match_target_amplitude(aChunk, target_dBFS):
    ''' Normalize given audio chunk '''
    change_in_dBFS = target_dBFS - aChunk.dBFS
    return aChunk.apply_gain(change_in_dBFS)

# Load your audio.
audio_path = input("Input audio path") #path refers to the file
audio_dir = os.path.dirname(audio_path) #dir refers to the folder
loaded_audio = AudioSegment.from_file(audio_path, format="m4a")

# convert stero to mono channel, and normalize sampling rate.
loaded_audio = loaded_audio.set_channels(1)
loaded_audio = loaded_audio.set_frame_rate(22050)

#Get the base audio name.
base_name = os.path.basename(audio_path) #outputs 'file.ext'
#base_name_array = os.path.splitext(base_name) #outputs ('file', '.ext')
base_name_wo_ext = os.path.splitext(base_name)[0] #outputs 'file' only

#make a new directory to store exported audio chunks
new_dir =  audio_dir + "/" + base_name_wo_ext
try:
    os.makedirs(new_dir, exist_ok=False) #Will not remake directory if directory exists
    print("New folder sucesfully created in: " + new_dir)
except OSError as error:
    print("File already exists. Making directory skipped")

# Split track where the silence is longer than "min_silence_len" ms
chunks = split_on_silence (
    loaded_audio, 

    min_silence_len = 4 * 1000,

    # anything under the defined dBFS is considered silence
    # the lower the silence threshold, the less sensitive it can pick silence
    silence_thresh = -30

    # keep 200 ms of leading/trailing silence
    #keep_silence=200
)

# target length defines how many seconds
target_length = 5 * 1000
# Process each chunk with your parametersi
for i, chunk in enumerate(chunks):
    # Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
    #silence_chunk = AudioSegment.silent(duration=0)

    # Add the padding chunk to beginning and end of the entire chunk.
    #audio_chunk = silence_chunk + chunk + silence_chunk

    # Normalize the entire chunk.
    #normalized_chunk = match_target_amplitude(audio_chunk, -20.0)
    normalized_chunk = match_target_amplitude(chunk, -20.0)

    #If chunk is below target_length, adds silence padding to meet the target_length
    if len(chunks[i]) < target_length:
        print('Chunk length: ' + str(len(chunks[i])) + 'ms is under target length. Padding '+ str(target_length-len(chunks[i])) + 'ms of silence...')
        # Create a silence chunk. The duration is target_length - chunk length.
        silence_chunk = AudioSegment.silent(duration=target_length-len(chunks[i]))
        # Add the silence chunk to the end of the entire chunk.
        padded_normalized_chunk = normalized_chunk + silence_chunk
        # Export the chunk
        print("Exporting as: " + base_name_wo_ext + "-{0}-pad.wav.".format(i+1))
        padded_normalized_chunk.export(new_dir + "/" + base_name_wo_ext + "-{0}-pad.wav".format(i+1), format = "wav")

    else:
        print('Chunk length: ' + str(len(chunks[i])) + 'ms is over target length')
        print("Exporting as: " + base_name_wo_ext + "-{0}.wav.".format(i+1))
        normalized_chunk.export(new_dir + "/" + base_name_wo_ext + "-{0}.wav".format(i+1), format = "wav")

        

2
44100
1
22050
New folder sucesfully created in: /Users/michaelnguyen/Projects/environments/audio_segmentation/TEEEEST2
Chunk length: 8682ms is over target length
Exporting as: TEEEEST2-1.wav.
Chunk length: 6630ms is over target length
Exporting as: TEEEEST2-2.wav.
Chunk length: 958ms is under target length. Padding 4042ms of silence...
Exporting as: TEEEEST2-3-pad.wav.
Chunk length: 422ms is under target length. Padding 4578ms of silence...
Exporting as: TEEEEST2-4-pad.wav.


In [2]:
import os
from pydub import AudioSegment
from pydub.utils import make_chunks

defined_length_ms = 5 * 1000

for file in os.listdir(new_dir):
     filename = os.fsdecode(file)
     # only picks up files ending with wav.
     if filename.endswith(".wav") or filename.endswith(".mp3"): 
         
        # get file path name for iteration
        audio_path = os.path.join(new_dir, filename) 
        print("Iterating files: " + audio_path)

        #get directory of file - to export files
        audio_dir = os.path.dirname(audio_path)
     
        #Get just the base name of the files
        base_name = os.path.basename(audio_path) #replaces base_name from new dir
        base_name_wo_ext = os.path.splitext(base_name)[0] #replaced base_name_wo_ext with new audio
      
       

        #Assign directory to Audio segment
        loaded_audio = AudioSegment.from_file(audio_path)
        #checks the length of audio
        audio_length_ms = loaded_audio.duration_seconds * 1000 

        if audio_length_ms > defined_length_ms:
            print(base_name_wo_ext + " is longer than " + str(defined_length_ms) + "ms")
            #splices audio files into chunks with the defined length ms
            chunks = make_chunks(loaded_audio, defined_length_ms) 

            #Exports the spliced audio files
            for i, chunk in enumerate(chunks):
                chunk_name = base_name_wo_ext + "-{0}.wav".format(i+1)
                padded_chunk_name = base_name_wo_ext + "-{0}_pad.wav".format(i+1)
                #If chunk is below target_length, adds silence padding to meet the target_length
                if len(chunks[i]) < target_length:
                    print('Chunk length: ' + str(len(chunks[i])) + 'ms is under target length. Padding '+ str(defined_length_ms-len(chunks[i])) + 'ms of silence...')
                    # Create a silence chunk. The duration is target_length - chunk length.
                    silence_chunk = AudioSegment.silent(duration=defined_length_ms-len(chunks[i]))
                    # Add the silence chunk to the end of the entire chunk.
                    padded_normalized_chunk = chunk + silence_chunk
                    # Export the chunk
                    print ("Padding and exporting as:", padded_chunk_name)
                    padded_normalized_chunk.export(audio_dir + "/" + padded_chunk_name, format = "wav")


                else:
                    print ("Splicing and exporting as:", chunk_name)
                    chunk.export(audio_dir + "/" + chunk_name, format="wav")

        else:
            print(base_name_wo_ext + " is shorter or equal to " + str(defined_length_ms) + "ms")
    

        continue
 



   


 or equal to 5000ms
Iterating files: /Users/michaelnguyen/Library/Mobile Documents/com~apple~CloudDocs/Dataset/Masaki Tanaka/1_Masaki/1_Masaki195_pad.wav
1_Masaki195_pad is shorter or equal to 5000ms
Iterating files: /Users/michaelnguyen/Library/Mobile Documents/com~apple~CloudDocs/Dataset/Masaki Tanaka/1_Masaki/1_Masaki30_pad.wav
1_Masaki30_pad is shorter or equal to 5000ms
Iterating files: /Users/michaelnguyen/Library/Mobile Documents/com~apple~CloudDocs/Dataset/Masaki Tanaka/1_Masaki/1_Masaki99_pad.wav
1_Masaki99_pad is shorter or equal to 5000ms
Iterating files: /Users/michaelnguyen/Library/Mobile Documents/com~apple~CloudDocs/Dataset/Masaki Tanaka/1_Masaki/1_Masaki207_pad.wav
1_Masaki207_pad is shorter or equal to 5000ms
Iterating files: /Users/michaelnguyen/Library/Mobile Documents/com~apple~CloudDocs/Dataset/Masaki Tanaka/1_Masaki/1_Masaki217_pad.wav
1_Masaki217_pad is shorter or equal to 5000ms
Iterating files: /Users/michaelnguyen/Library/Mobile Documents/com~apple~CloudDocs/D