In [None]:
import os
import pandas as pd
from pydub import AudioSegment
from pydub.utils import make_chunks
from tqdm import tqdm

#Input directory example
#/Users/michael/Desktop/test_demo.m4a


In [None]:
#converts all the mp4 files into wav
#Please run this as a seperate cell


folder_path = input("Input audio path")

# gets number of files in a folder
total = len(os.listdir(folder_path))
pbar = tqdm(total = total)

for file in os.listdir(folder_path):
    base_name_wo_ext = os.path.splitext(file)[0] 
    # Ignores any files that is not a mp4
    if ".mp4" in file:
        sound = AudioSegment.from_file(folder_path + "/" + file, format="mp4")
        sound = sound.set_frame_rate(22050)
        sound = sound.set_channels(1)
        sound.export(folder_path + "/" + base_name_wo_ext + ".wav", format="wav")
        pbar.update(1)
    else:
        pbar.update(1)
pbar.close()
print("Conversion complete")


In [None]:
#normalizes the converted audio's amplitude and split it into 5 second in intervals 
#Please run this as a seperate cell 

#Input directory example
#/Users/michael/Desktop/test_demo

folder_path = input("Input audio path")
#strips off trailing slashes and then gives the last part of the path
#used to create a new directory.
base_folder_name = os.path.normpath(os.path.basename(folder_path))

#make a new directory to store exported audio chunks
new_dir =  folder_path + "/" + base_folder_name
try:
    os.makedirs(new_dir, exist_ok=False) #Will not remake directory if directory exists
    print("New folder sucesfully created in: " + new_dir)
except OSError as error:
    print("File already exists. Skipped making directory")

#Calculates the total number of files in the foler
total = len(os.listdir(folder_path))
pbar = tqdm(total = total)

chunk_length_ms = 5000 # pydub calculates in millisec
target_dBFS = -20.0

#Normalizes amplitude
def match_target_amplitude(aChunk, target_dBFS):
    ''' Normalize given audio chunk '''
    change_in_dBFS = target_dBFS - aChunk.dBFS
    return aChunk.apply_gain(change_in_dBFS)

print("Splicing....")
for file in os.listdir(folder_path):
    #Atively searchs for wav files and splics it.
    if ".wav" in file:
        myaudio = AudioSegment.from_file(folder_path + "/" + file, "wav") 
        chunks = make_chunks(myaudio, chunk_length_ms) #Make chunks of one sec
        base_name_wo_ext = os.path.splitext(file)[0] 
        for i, chunk in enumerate(chunks):
            #Normalizes amplitude 
            normalized_chunk = match_target_amplitude(chunk, target_dBFS)
            #If chunk equals to specified length. Export as is
            if len(chunks[i]) == chunk_length_ms:
                chunk_name = new_dir + "/" + base_name_wo_ext + "-{0}.wav".format(i)
                print("exporting", chunk_name)
                normalized_chunk.export(chunk_name, format="wav")
            #If chunk equals is less than the specified length. Export as leftover
            else:
                chunk_name = new_dir + "/" + base_name_wo_ext + "-{0}-leftover.wav".format(i)
                print("exporting", chunk_name)
                normalized_chunk.export(chunk_name, format="wav")
        pbar.update(1)
    #Ignores any files that is not a wav
    else:
        pbar.update(1)
pbar.close()
print("Splice complete")

