https://github.com/SYSTRAN/faster-whisper

https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2


In [None]:
#@title SetUp and Restart Session
!pip install faster-whisper
!pip install gradio
# !rm -rf /content/faster-whisper-large-v3-turbo-ct2

# !git lfs install
# !git clone https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2
# !rm -rf /content/faster-whisper-large-v3-turbo-ct2
# from huggingface_hub import snapshot_download

# repo_id = "deepdml/faster-whisper-large-v3-turbo-ct2"
# local_dir = "faster-whisper-large-v3-turbo-ct2"
# snapshot_download(repo_id=repo_id, local_dir=local_dir, repo_type="model")

import os
import urllib.request
import urllib.error
from tqdm import tqdm
import shutil
def conditional_download(url, download_file_path, redownload=False):
    print(f"Downloading {os.path.basename(download_file_path)}")
    base_path = os.path.dirname(download_file_path)

    # Create the directory if it doesn't exist
    if not os.path.exists(base_path):
        os.makedirs(base_path)

    # Skip download if the file exists and redownload is False
    if os.path.exists(download_file_path) and not redownload:
        print(f"File {download_file_path} already exists. Skipping download.")
        return

    # If redownload is True, remove the existing file
    if os.path.exists(download_file_path) and redownload:
        os.remove(download_file_path)

    # Try opening the URL and get the total file size
    try:
        request = urllib.request.urlopen(url)
        total = int(request.headers.get('Content-Length', 0))
    except urllib.error.URLError as e:
        print(f"Error: Unable to open the URL - {url}")
        print(f"Reason: {e.reason}")
        return

    # Start downloading with a progress bar
    with tqdm(total=total, desc=f"Downloading {os.path.basename(download_file_path)}", unit='B', unit_scale=True, unit_divisor=1024) as progress:
        try:
            urllib.request.urlretrieve(url, download_file_path, reporthook=lambda count, block_size, total_size: progress.update(block_size))
        except urllib.error.URLError as e:
            print(f"Error: Failed to download the file from the URL - {url}")
            print(f"Reason: {e.reason}")
            return

    print(f"Download successful! Saved at: {download_file_path}")


# Step 3: Download the models
def download_whisper_model(base_path, redownload=False):
    print("Starting model downloads...")
    # https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2
    model_urls = [
        ("https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2/resolve/main/config.json", f"{base_path}/faster-whisper-large-v3-turbo-ct2/config.json"),
        ("https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2/resolve/main/model.bin", f"{base_path}/faster-whisper-large-v3-turbo-ct2/model.bin"),
        ("https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2/resolve/main/preprocessor_config.json", f"{base_path}/faster-whisper-large-v3-turbo-ct2/preprocessor_config.json"),
        ("https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2/resolve/main/tokenizer.json", f"{base_path}/faster-whisper-large-v3-turbo-ct2/tokenizer.json"),
        ("https://huggingface.co/deepdml/faster-whisper-large-v3-turbo-ct2/resolve/main/vocabulary.json", f"{base_path}/faster-whisper-large-v3-turbo-ct2/vocabulary.json"),
    ]

    for url, path in model_urls:
        conditional_download(url, path, redownload=redownload)

    print("All models downloaded successfully.")

# base_path = "."
base_path = "/content"
download_whisper_model(base_path, redownload=True)

from IPython.display import clear_output
clear_output()
import time
time.sleep(5)
import os
os.kill(os.getpid(), 9)

After auto-restarting the session, run from the next cell.

In [8]:
#@title load faster-whisper
import math
import torch
import gc
import time
import subprocess
from IPython.display import Audio
from faster_whisper import WhisperModel
import os
import mimetypes
import shutil
import re
import uuid

def clean_file_name(file_path):
    # Get the base file name and extension
    file_name = os.path.basename(file_path)
    file_name, file_extension = os.path.splitext(file_name)

    # Replace non-alphanumeric characters with an underscore
    cleaned = re.sub(r'[^a-zA-Z\d]+', '_', file_name)

    # Remove any multiple underscores
    clean_file_name = re.sub(r'_+', '_', cleaned).strip('_')

    # Generate a random UUID for uniqueness
    random_uuid = uuid.uuid4().hex[:6]

    # Combine cleaned file name with the original extension
    clean_file_path = os.path.join(os.path.dirname(file_path), clean_file_name + f"_{random_uuid}" + file_extension)

    return clean_file_path

def get_audio_file(uploaded_file):
    global base_path
    # ,device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # Detect the file type (audio/video)
    mime_type, _ = mimetypes.guess_type(uploaded_file)
    # Create the folder path to store audio files
    audio_folder = f"{base_path}/subtitle_audio"
    os.makedirs(audio_folder, exist_ok=True)
    # Initialize variable for the audio file path
    audio_file_path = ""
    if mime_type and mime_type.startswith('audio'):
        # If it's an audio file, save it as is
        audio_file_path = os.path.join(audio_folder, os.path.basename(uploaded_file))
        audio_file_path=clean_file_name(audio_file_path)
        shutil.copy(uploaded_file, audio_file_path)  # Move file to audio folder

    elif mime_type and mime_type.startswith('video'):
        # If it's a video file, extract the audio
        audio_file_name = os.path.splitext(os.path.basename(uploaded_file))[0] + ".mp3"
        audio_file_path = os.path.join(audio_folder, audio_file_name)
        audio_file_path=clean_file_name(audio_file_path)

        # Extract the file extension from the uploaded file
        file_extension = os.path.splitext(uploaded_file)[1]  # Includes the dot, e.g., '.mp4'

        # Generate a random UUID and create a new file name with the same extension
        random_uuid = uuid.uuid4().hex[:6]
        new_file_name = random_uuid + file_extension

        # Set the new file path in the subtitle_audio folder
        new_file_path = os.path.join(audio_folder, new_file_name)

        # Copy the original video file to the new location with the new name
        shutil.copy(uploaded_file, new_file_path)
        if device=="cuda":
          command = f"ffmpeg -hwaccel cuda -i {new_file_path} {audio_file_path} -y"
        else:
          command = f"ffmpeg -i {new_file_path} {audio_file_path} -y"

        # if device=="cuda":
        #   command = f"ffmpeg -hwaccel cuda -i {new_file_path} -vn -ab 320k -ar 48000 -c:a copy -y {audio_file_path}"
        # else:
        #   command = f"ffmpeg -i {new_file_path} -vn -ab 320k -ar 48000 -y {audio_file_path} -y"

        subprocess.run(command, shell=True)
        if os.path.exists(new_file_path):
          os.remove(new_file_path)
    # Return the saved audio file path
    return audio_file_path


def is_gpu_memory_over_limit(limit_gb=14.5):
    # Run nvidia-smi and capture the output
    result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used', '--format=csv,nounits,noheader'],
                            stdout=subprocess.PIPE, text=True)

    # Split the result into lines (for each GPU if there are multiple)
    memory_used_mb_list = result.stdout.strip().splitlines()

    # Convert memory used from MB to GB and check each GPU's memory usage
    for i, memory_used_mb in enumerate(memory_used_mb_list):
        memory_used_gb = int(memory_used_mb) / 1024.0
        # print(f"GPU {i}: Current memory allocated: {memory_used_gb:.2f} GB")
        if memory_used_gb > limit_gb:
            # print(f"GPU {i} memory usage exceeds {limit_gb} GB.")
            return True

    # print("GPU memory usage is within safe limits.")
    return False

def convert_seconds_to_hms(seconds):
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    milliseconds = math.floor((seconds % 1) * 1000)
    output = f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"
    return output


def load_whisper_turbo_model():
  global whisper_turbo_model,base_path
  try:
    if whisper_turbo_model is not None:
      del whisper_turbo_model
      whisper_turbo_model=None
    gc.collect()
    torch.cuda.empty_cache()
    time.sleep(2)
  except:
      pass
  # model_name="faster-whisper-large-v3-turbo-ct2"
  model_name=f"{base_path}/faster-whisper-large-v3-turbo-ct2"
  device = "cuda" if torch.cuda.is_available() else "cpu"
  if device == "cuda":
      try:
          whisper_turbo_model = WhisperModel(model_name, device="cuda", compute_type="float16")
      except Exception as e:
          whisper_turbo_model = WhisperModel(model_name, device="cuda", compute_type="int8_float16")
  else:
      whisper_turbo_model = WhisperModel(model_name, device="cpu", compute_type="int8")
  return whisper_turbo_model

def subtitle_maker(input_file):
  global base_path,whisper_turbo_model
  if is_gpu_memory_over_limit(limit_gb=14.5):
    whisper_turbo_model=load_whisper_turbo_model()
  base_name = os.path.splitext(os.path.basename(input_file))[0]
  random_uuid = uuid.uuid4().hex[:4]
  subtitle_folder = f"{base_path}/Generated_Subtitle"
  os.makedirs(subtitle_folder, exist_ok=True)
  srt_file_name =f"{subtitle_folder}/{base_name}.srt"
  srt_file_name=clean_file_name(srt_file_name)
  audio_path=get_audio_file(input_file)
  segments, info = whisper_turbo_model.transcribe(audio_path, beam_size=5,vad_filter=True,vad_parameters=dict(min_silence_duration_ms=500))
  saved_segments = list(segments)
  count = 0
  sts=""
  with open(srt_file_name, 'w',encoding="utf-8") as f:
    for i in saved_segments:
        segment=list(i)
        id=segment[0]
        seek=segment[1]
        start=segment[2]
        end=segment[3]
        text=segment[4]
        sts+=str(text)
        count +=1
        duration = f"{convert_seconds_to_hms(start)} --> {convert_seconds_to_hms(end)}\n"
        text = f"{text.lstrip()}\n\n"
        f.write(f"{count}\n{duration}{text}")
  sts=sts.strip()
  text_path=srt_file_name.replace(".srt",".txt")
  with open(text_path, 'w') as file:
      file.write(sts)
  if os.path.exists(audio_path):
    os.remove(audio_path)
  return str(srt_file_name),str(text_path),sts
base_path="/content"
# base_path="."
whisper_turbo_model=None
whisper_turbo_model=load_whisper_turbo_model()

In [6]:
Audio_Or_Video_File_Path = '/content/yt.MP3'  # @param {type: "string"}
srt_file_name,text_file_name,text=subtitle_maker(Audio_Or_Video_File_Path)
print(f"SRT file save at : {srt_file_name}")
print(f"TEXT file save at : {text_file_name}")
print(f"Speech to text save at : 'text' variable ")

from google.colab import files
files.download(srt_file_name)
# files.download(text_file_name)

SRT file save at : /content/Generated_Subtitle/yt_ff9754.srt
TEXT file save at : /content/Generated_Subtitle/yt_ff9754.txt
Speech to text save at : 'text' variable 


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
# with open(srt_file_name,"r",encoding="utf-8") as f:
#     srt_data = f.read()
# print(srt_data)

In [15]:
# with open(text_file_name,"r",encoding="utf-8") as f:
#     text_data = f.read()
# print(text_data)

In [16]:
# text

In [10]:
#@title Using Gradio Interface
import gradio as gr
# demo_examples = [["/content/audio/a.mp3"]]
gradio_inputs=[gr.File(label="Upload Audio or Video File")]
gradio_outputs=[gr.File(label="Download SRT File",show_label=True),gr.File(label="Download Text File",show_label=True),gr.Textbox(label="Speech To Text")]
demo = gr.Interface(fn=subtitle_maker, inputs=gradio_inputs,outputs=gradio_outputs , title="Whisper-Large-V3-Turbo-Ct2 Subtitle Generator")#,examples=demo_examples)
demo.launch(debug=True,share=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://062848203022ff9412.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://062848203022ff9412.gradio.live


