In [184]:
##Segment data

import os
import librosa
import itertools
from math import ceil
from pydub import AudioSegment
from pydub.utils import make_chunks
from pydub.silence import detect_silence
from pydub.silence import split_on_silence
from pydub.silence import detect_nonsilent


#/Users/michael/Desktop/test_demo.m4a


def detect_nonsilent_modified(audio_segment, min_silence_len=1000, silence_thresh=-16, target_length=1000, seek_step=1):
    silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step)
    len_seg = len(audio_segment)

    # if there is no silence, the whole thing is nonsilent
    if not silent_ranges:
        return [[0, len_seg]]

    # short circuit when the whole audio segment is silent
    if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg:
        return []

    #If non silent audio > target length, it will split into several chunks with specified target_length
    prev_end_i = 0
    nonsilent_ranges = []
    for start_i, end_i in silent_ranges:
        nonsilent_len = start_i - prev_end_i

        if nonsilent_len < target_length:
            nonsilent_ranges.append([prev_end_i, start_i])
            prev_end_i = end_i

        else:
            num_of_chunks = ceil(nonsilent_len / target_length) 
            for i in range(int(num_of_chunks)):
                          
                spliced_start = prev_end_i + target_length 
                if spliced_start < nonsilent_len: 
                    nonsilent_ranges.append([prev_end_i, spliced_start]) 
                    prev_end_i = spliced_start
                    print("loop: " + str(nonsilent_ranges)) 
                    
                else:
                    nonsilent_ranges.append([prev_end_i, start_i]) 
                    prev_end_i = end_i
                    spliced_start = end_i
                    print("end: " + str(nonsilent_ranges))
            

    #Appends the len_seg to complete the non silent array. 
    #ALso checks if non silent audio > target length
    if end_i != len_seg:
        last_nonsilent_len = len_seg - end_i

        spliced_start = prev_end_i + target_length # increments by 5000
        if spliced_start < last_nonsilent_len:
            nonsilent_ranges.append([prev_end_i, spliced_start]) 
            prev_end_i = spliced_start
            print("Audio over target length. Splicing.. ") 
            print("loop: " + str(nonsilent_ranges))
        else:
            nonsilent_ranges.append([prev_end_i, len_seg])
            prev_end_i = 0
            spliced_start = 0
            print("end: " + str(nonsilent_ranges))

    if nonsilent_ranges[0] == [0, 0]:
        nonsilent_ranges.pop(0)

    return nonsilent_ranges

#-----------------------------------------------------------------------------------------------------

# Adds the functionality of keep silence within the specified length
def split_on_silence_modified(audio_segment, min_silence_len=1000, silence_thresh=-16, target_length=1000,
                     seek_step=1):

  # from the itertools documentation
    def pairwise(iterable):
        "s -> (s0,s1), (s1,s2), (s2, s3), ..."
        a, b = itertools.tee(iterable)
        next(b, None)
        return zip(a, b)
    
    #keep silence if silence threshold is smaller than 2500 (target length = 5000/2)
    #For example, 7600ms audio file is dected as non silent, but 5400ms is silent. (total 13,000ms)
    #The audio file will be trimmed as 10,000ms. With 7600ms audio, 2400ms as silent
    def keep_silence(end, target_length):

        silence_threshold = target_length/2
        audio_len_per_target_len = end % target_length 
        silence_len_per_target_len = target_length - audio_len_per_target_len

        if silence_len_per_target_len < silence_threshold:
            return (target_length * round(end/target_length))
        else:
            return 0

    output_ranges = [
        [ start, end + int(keep_silence(end, target_length))]
        for (start,end)
            in detect_nonsilent(audio_segment, min_silence_len, silence_thresh, target_length, seek_step)
    ]

    print("---------------------------------------------------")
    print(output_ranges)

    for range_i, range_ii in pairwise(output_ranges):
        last_end = range_i[1]
        print("last_end: " + str(last_end))
        next_start = range_ii[0]
        print("next_start: " + str(next_start))
        print("---------------------------------------------------")
        if next_start < last_end:
            print("next start is less than last end")
            range_i[1] = (last_end+next_start)//2
            print(range_i[1])
            range_ii[0] = range_i[1]

    return [
        audio_segment[ max(start,0) : min(end,len(audio_segment)) ]
        for start,end in output_ranges
    ]

#-----------------------------------------------------------------------------------------------------

# Define a function to normalize a chunk to a target amplitude.
def match_target_amplitude(aChunk, target_dBFS):
    ''' Normalize given audio chunk '''
    change_in_dBFS = target_dBFS - aChunk.dBFS
    return aChunk.apply_gain(change_in_dBFS)

# Load your audio.
audio_path = input("Input audio path") #path refers to the file
audio_dir = os.path.dirname(audio_path) #dir refers to the folder
loaded_audio = AudioSegment.from_file(audio_path, format="m4a")

# convert stero to mono channel, and normalize sampling rate.
loaded_audio = loaded_audio.set_channels(1)
loaded_audio = loaded_audio.set_frame_rate(22050)

#Get the base audio name.
base_name = os.path.basename(audio_path) #outputs 'file.ext'
#base_name_array = os.path.splitext(base_name) #outputs ('file', '.ext')
base_name_wo_ext = os.path.splitext(base_name)[0] #outputs 'file' only

#make a new directory to store exported audio chunks
new_dir =  audio_dir + "/" + base_name_wo_ext
try:
    os.makedirs(new_dir, exist_ok=False) #Will not remake directory if directory exists
    print("New folder sucesfully created in: " + new_dir)
except OSError as error:
    print("File already exists. Making directory skipped")

# Split track where the silence is longer than "min_silence_len" ms

#-----------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------

# Monkey patches pydub split_on_silence
detect_nonsilent = detect_nonsilent_modified
split_on_silence = split_on_silence_modified

chunks = split_on_silence (
    loaded_audio, 

    min_silence_len = 4 * 1000,

    # anything under the defined dBFS is considered silence
    # the lower the silence threshold, the less sensitive it can pick silence
    silence_thresh = -30,

    # keep 200 ms of leading/trailing silence
    target_length = 4 * 1000
)

# target length defines how many seconds
# target_length = 5 * 1000
# Process each chunk with your parametersi
for i, chunk in enumerate(chunks):
    # Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
    #silence_chunk = AudioSegment.silent(duration=0)

    # Add the padding chunk to beginning and end of the entire chunk.
    #audio_chunk = silence_chunk + chunk + silence_chunk

    # Normalize the entire chunk.
    normalized_chunk = match_target_amplitude(chunk, -20.0)

    #If chunk is below target_length, adds silence padding to meet the target_length
    if len(chunks[i]) < target_length:
        print('Chunk length: ' + str(len(chunks[i])) + 'ms is under target length.')
        # Export the chunk
        print("Exporting as: " + base_name_wo_ext + "-{0}-leftover.wav.".format(i+1))
        normalized_chunk.export(new_dir + "/" + base_name_wo_ext + "-{0}-leftover.wav".format(i+1), format = "wav")

    else:
        print('Chunk length: ' + str(len(chunks[i])) + 'ms is over target length')
        print("Exporting as: " + base_name_wo_ext + "-{0}.wav.".format(i+1))
        normalized_chunk.export(new_dir + "/" + base_name_wo_ext + "-{0}.wav".format(i+1), format = "wav")
2839

#-----------------------------------------------------------------------------------------------------


    

        

File already exists. Making directory skipped
loop: [[0, 4000]]
loop: [[0, 4000], [4000, 8000]]
end: [[0, 4000], [4000, 8000], [8000, 11732]]
end: [[0, 4000], [4000, 8000], [8000, 11732], [18032, 21242], [29089, 33007], [41382, 44756]]
---------------------------------------------------
[[0, 4000], [4000, 8000], [8000, 23732], [18032, 21242], [29089, 33007], [41382, 44756]]
last_end: 4000
next_start: 4000
---------------------------------------------------
last_end: 8000
next_start: 8000
---------------------------------------------------
last_end: 23732
next_start: 18032
---------------------------------------------------
next start is less than last end
last_end: 21242
next_start: 29089
---------------------------------------------------
last_end: 33007
next_start: 41382
---------------------------------------------------
Chunk length: 4000ms is under target length.
Exporting as: test_demo-1-leftover.wav.
Chunk length: 4000ms is under target length.
Exporting as: test_demo-2-leftover

2839

In [None]:
#Normalize d

import os
from pydub import AudioSegment
from pydub.utils import make_chunks

defined_length_ms = 5 * 1000

for file in os.listdir(new_dir):
     filename = os.fsdecode(file)
     # only picks up files ending with wav.
     if filename.endswith(".wav") or filename.endswith(".mp3"): 
         
        # get file path name for iteration
        audio_path = os.path.join(new_dir, filename) 
        print("Iterating files: " + audio_path)

        #get directory of file - to export files
        audio_dir = os.path.dirname(audio_path)
     
        #Get just the base name of the files
        base_name = os.path.basename(audio_path) #replaces base_name from new dir
        base_name_wo_ext = os.path.splitext(base_name)[0] #replaced base_name_wo_ext with new audio
      
       

        #Assign directory to Audio segment
        loaded_audio = AudioSegment.from_file(audio_path)
        #checks the length of audio
        audio_length_ms = loaded_audio.duration_seconds * 1000 

        if audio_length_ms > defined_length_ms:
            print(base_name_wo_ext + " is longer than " + str(defined_length_ms) + "ms")
            #splices audio files into chunks with the defined length ms
            chunks = make_chunks(loaded_audio, defined_length_ms) 

            #Exports the spliced audio files
            for i, chunk in enumerate(chunks):
                chunk_name = base_name_wo_ext + "-{0}.wav".format(i+1)
                padded_chunk_name = base_name_wo_ext + "-{0}_pad.wav".format(i+1)
                #If chunk is below target_length, adds silence padding to meet the target_length
                if len(chunks[i]) < target_length:
                    print('Chunk length: ' + str(len(chunks[i])) + 'ms is under target length. Padding '+ str(defined_length_ms-len(chunks[i])) + 'ms of silence...')
                    # Create a silence chunk. The duration is target_length - chunk length.
                    silence_chunk = AudioSegment.silent(duration=defined_length_ms-len(chunks[i]))
                    # Add the silence chunk to the end of the entire chunk.
                    padded_normalized_chunk = chunk + silence_chunk
                    # Export the chunk
                    print ("Padding and exporting as:", padded_chunk_name)
                    padded_normalized_chunk.export(audio_dir + "/" + padded_chunk_name, format = "wav")


                else:
                    print ("Splicing and exporting as:", chunk_name)
                    chunk.export(audio_dir + "/" + chunk_name, format="wav")

        else:
            print(base_name_wo_ext + " is shorter or equal to " + str(defined_length_ms) + "ms")
    

        continue
 



   
