In [1]:
#pip install pydub

## Audio Segmentation

In [1]:
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence

In [2]:
def get_subdirectories_files(root_dir):
    dir_files_dict = {}
    # Iterate over the items in the root directory
    for subdir_name in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir_name)
        # Check if the item is a directory
        if os.path.isdir(subdir_path):
            file_paths = []
            # Walk through the subdirectory
            for dirpath, dirnames, filenames in os.walk(subdir_path):
                # Collect full file paths
                file_paths.extend([os.path.join(dirpath, f) for f in filenames])
            # Add the subdirectory and its files to the dictionary
            dir_files_dict[subdir_name] = file_paths
    return dir_files_dict

In [3]:
# Define a function to normalize a chunk to a target amplitude.
def match_target_amplitude(aChunk, target_dBFS):
    ''' Normalize given audio chunk '''
    change_in_dBFS = target_dBFS - aChunk.dBFS
    return aChunk.apply_gain(change_in_dBFS)

In [5]:
# Split audio file into smaller multiple files every occurrence of silence
def silence_segment_audio(file_path, output_path):
    song = AudioSegment.from_wav(file_path)

    # Split track where the silence is 2 seconds or more and get chunks using 
    # the imported function.
    chunks = split_on_silence (
        # Use the loaded audio.
        song, 
        # Specify that a silent chunk must be at least 1 seconds or 1000 ms long.
        min_silence_len = 1000,
        # Consider a chunk silent if it's quieter than -16 dBFS.
        # (You may want to adjust this parameter.)
        silence_thresh = -16
    )

    # Process each chunk with your parameters
    for i, chunk in enumerate(chunks):
        # Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
        silence_chunk = AudioSegment.silent(duration=500)

        # Add the padding chunk to beginning and end of the entire chunk.
        audio_chunk = silence_chunk + chunk + silence_chunk

        # Normalize the entire chunk.
        normalized_chunk = match_target_amplitude(audio_chunk, -20.0)

        # Export the audio chunk with new bitrate.
        chunk_filename = f"{output_path}_chunk{i}.wav"
        print(f"Exporting {chunk_filename}")
        normalized_chunk.export(
            chunk_filename,
            bitrate = "192k",
            format = "wav"
        )

In [6]:
# Split audio file into smaller multiple files every x seconds
def interval_segment_audio(file_path, output_path, interval):
    song = AudioSegment.from_wav(file_path)

    # Duration of the audio in milliseconds
    duration_ms = len(song)

    # Chunk length in milliseconds (from interval in seconds)
    chunk_length_ms = interval * 1000

    # Calculate the number of chunks needed
    num_chunks = duration_ms // chunk_length_ms
    if duration_ms % chunk_length_ms != 0:
        num_chunks += 1

    # Process each chunk
    for i in range(num_chunks):
        # Calculate start and end positions for the chunk
        start_ms = i * chunk_length_ms
        end_ms = min(start_ms + chunk_length_ms, duration_ms)

        # Extract the chunk from the original audio
        chunk = song[start_ms:end_ms]

        # Create a silence chunk for padding (0.5 seconds)
        silence_chunk = AudioSegment.silent(duration=500)

        # Add padding to the beginning and end of the chunk
        audio_chunk = silence_chunk + chunk + silence_chunk

        # Normalize the chunk to -20 dBFS
        normalized_chunk = match_target_amplitude(audio_chunk, -20.0)

        # Export the chunk as a WAV file with the specified bitrate
        chunk_filename = f"{output_path}_chunk{i}.wav"
        print(f"Exporting {chunk_filename}")
        normalized_chunk.export(
            chunk_filename,
            bitrate="192k",
            format="wav"
        )

In [7]:
audio_dict = get_subdirectories_files('/code/Wav Files')
audio_dict

{'Alice All Around Tarot': ['./Wav Files\\Alice All Around Tarot\\♒️คนแบบนี้แหละคือ เนื้อคู่ ที่แท้จ.wav',
  './Wav Files\\Alice All Around Tarot\\🎉กราฟชีวิตของคุณจะพุ่งทะยานด้านใด.wav',
  './Wav Files\\Alice All Around Tarot\\💖ดวงความรัก ตุลาคม 2024 2567💖 ลัคนา ราศีธ.wav'],
 'Thai Audio Books For All Blinds': ['./Wav Files\\Thai Audio Books For All Blinds\\(หนังสือเสียง) พี่เลี้ยงมหัศจรรย์ บท.wav',
  './Wav Files\\Thai Audio Books For All Blinds\\(หนังสือเสียง) เด็กอัจฉริยะ ปะทะ คนเ.wav'],
 'THE STANDARD': ['./Wav Files\\THE STANDARD\\อีก 20 ปีจากนี้ คุณจะเสียใจกับ ‘สิ่งท.wav',
  './Wav Files\\THE STANDARD\\เมื่อใดทุกข์ใจให้หยุดนิ่งๆ แล้วถาม.wav'],
 'Understand Thai': ['./Wav Files\\Understand Thai\\A Thai Audiobook： Grimms_ Fairy Tales Ep. 1 ｜ Advanced Thai ｜ Understand Thai.wav',
  './Wav Files\\Understand Thai\\A Thai Audiobook： Harry Potter and the Sorcerer_s Stone Pt.1 ｜ Advanced Thai ｜ Understand Thai.wav',
  './Wav Files\\Understand Thai\\A Thai Audiobook： Harry Potter and the 

In [8]:
# Split by 30 seconds interval
for subfolder, audio_lst in audio_dict.items():
    print(f"Segmenting from {subfolder}")
    print("#"*40)
    interval_subfolder = f'/code/Segmented/Interval/{subfolder}'
    os.makedirs(interval_subfolder, exist_ok=True)

    for i, audio_file in enumerate(audio_lst):
        interval_segment_audio(audio_file, os.path.join(interval_subfolder, f'sample{i}'), 30)
        print(f'{audio_file}: completed')
    print("#"*40)

Segmenting from Alice All Around Tarot
########################################
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk0.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk1.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk2.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk3.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk4.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk5.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk6.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk7.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk8.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk9.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk10.wav
Exporting ./Segmented/Interval/Alice All Around Tarot\sample0_chunk11.wav
Exporting ./Segmented/Interval/Alice All A

In [9]:
# Split by silence
for subfolder, audio_lst in audio_dict.items():
    print(f"Segmenting from {subfolder}")
    print("#"*40)
    silence_subfolder = f'/code/Segmented/Silence/{subfolder}'
    os.makedirs(silence_subfolder, exist_ok=True)

    for i, audio_file in enumerate(audio_lst):
        silence_segment_audio(audio_file, os.path.join(silence_subfolder, f'sample{i}'))
        print(f'{audio_file}: completed')
    print("#"*40)

Segmenting from Alice All Around Tarot
########################################
./Wav Files\Alice All Around Tarot\♒️คนแบบนี้แหละคือ เนื้อคู่ ที่แท้จ.wav: completed
./Wav Files\Alice All Around Tarot\🎉กราฟชีวิตของคุณจะพุ่งทะยานด้านใด.wav: completed
./Wav Files\Alice All Around Tarot\💖ดวงความรัก ตุลาคม 2024 2567💖 ลัคนา ราศีธ.wav: completed
########################################
Segmenting from Thai Audio Books For All Blinds
########################################
Exporting ./Segmented/Silence/Thai Audio Books For All Blinds\sample0_chunk0.wav
Exporting ./Segmented/Silence/Thai Audio Books For All Blinds\sample0_chunk1.wav
Exporting ./Segmented/Silence/Thai Audio Books For All Blinds\sample0_chunk2.wav
Exporting ./Segmented/Silence/Thai Audio Books For All Blinds\sample0_chunk3.wav
Exporting ./Segmented/Silence/Thai Audio Books For All Blinds\sample0_chunk4.wav
Exporting ./Segmented/Silence/Thai Audio Books For All Blinds\sample0_chunk5.wav
Exporting ./Segmented/Silence/Thai Audio Bo

KeyboardInterrupt: 

## Audio Transcribing

In [4]:
# set USE_FLASH_ATTENTION=1
# pip uninstall torch
# pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu124
# pip install build
# pip install cmake
# pip install ninja
# pip install wheel
# pip install flash-attn --no-build-isolation 

In [4]:
import whisper
import json

model = whisper.load_model("turbo")

  checkpoint = torch.load(fp, map_location=device)


In [5]:
interval_dict = get_subdirectories_files('/code/Segmented/Interval')
interval_dict

{'Alice All Around Tarot': ['/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk0.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk1.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk10.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk11.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk12.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk13.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk14.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk15.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk16.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk17.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk18.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk19.wav',
  '/code/Segmented/Interval/Alice All Around Tarot/sample0_chunk2.wav',
  '/code/Segmented/Interval/

In [7]:
transcript_json = {}
transcript_file_path = '/code/Segmented/Interval/transcript.json'

for subfolder, audio_lst in interval_dict.items():
    print(f"Transcribing from {subfolder}")
    transcript_json[subfolder] = {}
    print("#"*40)

    for i, audio_file in enumerate(audio_lst):
        result = model.transcribe(audio_file, language='th')
        filename = os.path.basename(audio_file)
        transcript_json[subfolder][filename] = result['text']
        print(f"| File: {filename} | Text: {result['text']} |")
    print("#"*40)

Transcribing from Alice All Around Tarot
########################################
| File: sample0_chunk0.wav | Text: สวาสดีนะคะ สวาสดีนะคะ พี่พี่เพื่อนเพื่อนน้องน้องชาวSLระ Sow 근 อ.ถ급 oko it 4 กิมส์ทุกท่านนะคะวันนี้เราจะมาเปิด wellness ในหัว profile iiiข้อเรื่อง second nosaltres Ś้าติacion คนแบบน้าย ลักษณαแบบหนηนะคะที่เป็นคู่แท้ หรือถables broth sou ในรู้โกจรถ 19 กระเอง النقط Naw さ Online String What Not wantedโอเค เรามาเปิดกันเลยนะคะโอเคค่ะโอเค เคทั้งสมาตินะคะห้าแบรกออกมา |
| File: sample0_chunk1.wav | Text: We the star คุณรู้ไหมว่า ไพ้ the star เป็นตัวแทงของชาวและขนาดราสีอะไรนะคะคือราสีกุ้มนะ เห็นผู้หญิงที่ถือโม่น้ำ เราลดน้ำไหมอันนี้เลยค่ะ อันนี้นะเป็นการบงบอกนะคะ ว่าเนี่ย ราสีกุ้มนะคะราสีกุ้มเป็นคนแบบนาย รักษณะแบบแม่นะคะเป็นคนชอบเพื่อฟาลนะคะ ช่างจินตนาการณะ แล้ว |
| File: sample0_chunk10.wav | Text: โอเคทีถือถ้วยคน ๆ นี้ เขาต้องเป็นคนที่สนุกสนาดร่าเริงเขาสังคมเก็ง ชั้งพูด ๆ คุยนะคะถึงจะมีความเป็นผู้ใหญ่ก็เหỡแต่มันยังมีความเป็นเด็กอยู่ในตัวไหมถึงว่า ไม่ได้ขึ้มเกิน ถ้าขึ้มเกินเราไม่พู

In [10]:
with open(transcript_file_path, 'w+') as json_file:
    json.dump(transcript_json, json_file, ensure_ascii=False, indent=4)