<a href="https://colab.research.google.com/github/KennedyMen/Quickscripts/blob/main/Whisperx_Subs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install faster-whisper
!pip install ffmpeg-python
!pip install pysubs2
!pip install tqdm

In [22]:
import os
import faster_whisper
import math
from tqdm import tqdm

# Configuration
language = "fr"
input_directory = "/content/drive/MyDrive/Colab_Notebooks/Files/Media"  # Change this to your input folder
output_directory = "/content/drive/MyDrive/Colab_Notebooks/Files/Subs"  # Change this to your output folder

# Load Whisper Model
model = faster_whisper.WhisperModel("large-v2", device="cuda")

def convert_to_hms(seconds: float) -> str:
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    milliseconds = math.floor((seconds % 1) * 1000)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"

def convert_seg(segment: faster_whisper.transcribe.Segment) -> str:
    return f"{convert_to_hms(segment.start)} --> {convert_to_hms(segment.end)}\n{segment.text.lstrip()}\n\n"

# Ensure output directory exists
os.makedirs(output_directory, exist_ok=True)

# Process each audio file in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".mp3") or filename.endswith(".wav"):
        input_path = os.path.join(input_directory, filename)
        output_path = os.path.join(output_directory, f"{os.path.splitext(filename)[0]}.srt")

        print(f"Processing: {filename}")
        segments, info = model.transcribe(input_path, language=language)

        full_txt = []
        timestamps = 0.0  # for progress bar
        with tqdm(total=info.duration, unit=" audio seconds") as pbar:
            for i, segment in enumerate(segments, start=1):
                full_txt.append(f"{i}\n{convert_seg(segment)}")
                pbar.update(segment.end - timestamps)
                timestamps = segment.end
            if timestamps < info.duration:  # silence at the end of the audio
                pbar.update(info.duration - timestamps)

        with open(output_path, mode="w", encoding="UTF-8") as f:
            f.writelines(full_txt)
        print(f"Saved: {output_path}")

print("Processing complete.")

Processing: audio.mp3


59.0 audio seconds [00:11,  5.07 audio seconds/s]                    

Saved: /content/Output/audio.srt
Processing complete.



