# Splice Audio
This file contains basic functions for splicing a small piece out of a larger audio file 
for easy running and debugging with Kaldi

In [6]:
# Import the AudioSegment class for processing audio and the 
# split_on_silence function for separating out silent chunks.
from pydub import AudioSegment
from pydub import effects
from pydub.silence import split_on_silence
from pydub.utils import mediainfo

In [7]:
# Define a function to normalize a chunk to a target amplitude.
def signal_process_for_kaldi(aChunk, target_dBFS=-20):
    """normalize a chunk to a target amplitude"""

    change_in_dBFS = target_dBFS - aChunk.dBFS
    aChunk = aChunk.apply_gain(change_in_dBFS)

    aChunk = aChunk.low_pass_filter(1600)
    aChunk = aChunk.high_pass_filter(200)

    return aChunk

In [11]:
# function to display parameters of a given audio segment
def display_sound_file_parameters(aSegment):
    """display parameters of a given audio segment"""
    
    print("audio length: {} seconds".format(aSegment.duration_seconds))
    print("input average dB: {}".format(aSegment.dBFS) )
    print("frame rate: {} kHz".format(aSegment.frame_rate/1000 ))
    print("channels: {}".format(aSegment.channels))

In [8]:
# define a function to splice out an appropriate audio chunk and write it to a wav file
def splice_out_audio_chunk_by_time(input_sound_file_name, startMin=0, startSec=0, 
    endMin=0, endSec=10, normalize_to_dBFS=-20.0):
    """splice out an appropriate audio chunk and write it to a wav file"""

    # Convert input time to milliseconds
    startTime = startMin*60*1000 + startSec*1000
    endTime = endMin*60*1000 + endSec*1000

    # Load your audio
    print("loading input file: {}".format(input_sound_file_name))
    input_sound_file = AudioSegment.from_mp3(input_sound_file_name + ".mp3")

    # output a few parameters 
    print("input file parameters:")
    display_sound_file_parameters(input_sound_file)

    # do very basic scaling for kaldi 
    input_sound_file = input_sound_file.set_channels(1)
    input_sound_file = input_sound_file.set_frame_rate(16000)

    # do the splice
    extract = input_sound_file[startTime:endTime]

    # figure out the output file name - insert start and stop times into the filename
    output_file_name = input_sound_file_name \
        + "--" + str(startMin).zfill(2) + str(startSec).zfill(2) \
        + "-"  + str(endMin).zfill(2)   + str(endSec).zfill(2)

    # Normalize the entire chunk.
    print("normalizing...")
    extract = signal_process_for_kaldi(extract, -20.0)

    # output the new parameters
    print("output paramaters:")
    display_sound_file_parameters(extract)

    # output the resulting audio file as a wav
    print("writing to disk " + output_file_name + ".wav ...")
    extract.export(output_file_name + ".wav", bitrate = "192k", format="wav")

    return extract

# Run block
Here we can try a sample file to see how it splits things up into a smaller segment for testing and running and debugging; we'll specify exactly a start and stop time

In [9]:
# set your user parameters here - really just the filename of a big mp3 file
# then we'll call functions to split it up
input_sound_file_name = "SWAHILI-NEWS-100820_first12minutes" # mp3 file filename - withOUT the .mp3 suffix
exported_chunk = splice_out_audio_chunk_by_time("SWAHILI-NEWS-100820", # mp3 file filename (w/o suffix)
    startMin = 10, # start time for splice
    startSec = 16,
    endMin = 10, # end time for splice
    endSec = 34)

print("done")

loading input file: SWAHILI-NEWS-100820
audio length: 1783.8265759637188 seconds
input average dB: -21.19383463711162
frame rate: 44.1 kHz
channels: 2
new frame rate (khz): 16.0
new channels: 1
normalizing...
writing to disk SWAHILI-NEWS-100820--1016-1034.wav ...
done


In [12]:
# run this to get parameters and information about a wav file

#sf = AudioSegment.from_wav("wave_file_name_here.wav")
#display_sound_file_parameters(sf)

audio length: 5.22 seconds
input average dB: -19.015677414152567
frame rate: 16.0 kHz
channels: 1
