In [None]:
# @title 🌴 Change the values in this section

# @markdown Select the source of the audio/video file to be transcribed
input_format = "gdrive" #@param ["gdrive"]

# @markdown Enter the URL of the YouTube video or the path of the audio file to be transcribed
# file = "/content/drive/MyDrive/WhisperVideo/audio1217199309.m4a" #@param {type:"string"}

#@markdown Click here if you'd like to save the transcription as text file
plain = True #@param {type:"boolean"}

#@markdown Click here if you'd like to save the transcription as an SRT file
srt = False #@param {type:"boolean"}

#@markdown Click here if you'd like to save the transcription as a VTT file
vtt = False #@param {type:"boolean"}

#@markdown Click here if you'd like to save the transcription as a TSV file
tsv = False #@param {type:"boolean"}

#@markdown Click here if you'd like to download the transcribed file(s) locally
download = False #@param {type:"boolean"}

# 🛠 Set Up

The blocks below install all of the necessary Python libraries (including Whisper), configures Whisper, and contains code for various helper functions.



## 🤝 Dependencies

In [None]:
# Dependencies

#!pip install -q pytube
#!pip install -q git+https://github.com/openai/whisper.git 

import os, re
import torch
from pathlib import Path
from pytube import YouTube

import whisper
from whisper.utils import get_writer

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/57.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone


## 👋 Whisper configuration

This Colab use `medium.en`, [the medium-sized, English-only](https://github.com/openai/whisper#available-models-and-languages) Whisper model.


In [None]:
# Use CUDA, if available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load the desired model
model = whisper.load_model("medium.en").to(DEVICE)

100%|██████████████████████████████████████| 1.42G/1.42G [00:10<00:00, 142MiB/s]


## 💪 YouTube helper functions

Code for helper functions when running Whisper on a YouTube video.

In [None]:
def to_snake_case(name):
    return name.lower().replace(" ", "_").replace(":", "_").replace("__", "_")

# def download_youtube_audio(url,  file_name = None, out_dir = "."):
#     "Download the audio from a YouTube video"
#     yt = YouTube(url)
#     if file_name is None:
#         file_name = Path(out_dir, to_snake_case(yt.title)).with_suffix(".mp4")
#     yt = (yt.streams
#             .filter(only_audio = True, file_extension = "mp4")
#             .order_by("abr")
#             .desc())
#     return yt.first().download(filename = file_name)

# ✍ Transcribing with Whisper

Ultimately, calling Whisper is as easy as one line!
* `result = model.transcribe(file)`

The majority of this new `transcribe_file` function is actually just for exporting the results of the transcription as a text, VTT, or SRT file.

In [None]:
dir = "/content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_27_19/"

In [None]:
def transcribe_file(model, file, plain, srt, vtt, tsv, download):
    """
    Runs Whisper on an audio file

    Parameters
    ----------
    model: Whisper
        The Whisper model instance.
    
    file: str
        The file path of the file to be transcribed.

    plain: bool
        Whether to save the transcription as a text file or not.
    
    srt: bool
        Whether to save the transcription as an SRT file or not.
    
    vtt: bool
        Whether to save the transcription as a VTT file or not.
    
    tsv: bool
        Whether to save the transcription as a TSV file or not.

    download: bool
        Whether to download the transcribed file(s) or not.

    Returns
    -------
    A dictionary containing the resulting text ("text") and segment-level details ("segments"), and
    the spoken language ("language"), which is detected when `decode_options["language"]` is None.
    """
    file_path = dir + file
    print(f"Transcribing file: {file_path}\n")

    output_directory = file_path

    # Run Whisper
    result = model.transcribe(file_path, verbose = False, language = "en")

    if plain:
        # txt_path = file_path.with_suffix(".txt")
        
        audio_path = dir + "Audios/" + file
        text_path =  dir + "Transcriptions/" + file[:-4] + ".txt" # Replace the  extension with .txt

        print(f"\nCreating text file")
        
        with open(text_path, "w", encoding="utf-8") as txt:
            txt.write(result["text"])

        os.rename(file_path, audio_path)
        
    if srt:
        print(f"\nCreating SRT file")
        srt_writer = get_writer("srt", output_directory)
        srt_writer(result, str(file_path.stem))

    if vtt:
        print(f"\nCreating VTT file")
        vtt_writer = get_writer("vtt", output_directory)
        vtt_writer(result, str(file_path.stem))

    if tsv:
        print(f"\nCreating TSV file")

        tsv_writer = get_writer("tsv", output_directory)
        tsv_writer(result, str(file_path.stem))

    if download:
        from google.colab import files
        
        colab_files = Path("/content")
        stem = file_path.stem

        for colab_file in colab_files.glob(f"{stem}*"):
            if colab_file.suffix in [".txt", ".srt", ".vtt", ".tsv"]:
                print(f"Downloading {colab_file}")
                files.download(str(colab_file))
      
    return result

# 💬 Whisper it!

This block actually calls `transcribe_file` 😉


In [None]:
dir = "/home/lowband/dev/quiz_gen"

Mounted at /content/drive


In [None]:
folder = os.listdir(dir)

# Loop through the audio files and transcribe them
# for folder in dir:
for audio_file in folder:
  # Extract the audio from the video file using librosa
  # file = dir + audio_file
  # Skip the file if it is not a video format
  if not audio_file.endswith((".mp3", ".3gp")):
    continue

  # Run Whisper on the specified file
  result = transcribe_file(model, audio_file, plain, srt, vtt, tsv, download)

print("All Done!")
  

Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S5982297334085560336717-2019.02.28.11.32.28.9-2019.02.28.11.33.03.0-audio-index_3-obs_5.3gp



100%|██████████| 55848/55848 [01:32<00:00, 601.74frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.08.34.21.3-2019.02.28.08.35.24.1-audio-index_2-obs_4.3gp



100%|██████████| 39684/39684 [01:02<00:00, 631.40frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.08.42.23.3-2019.02.28.08.43.10.6-audio-index_3-obs_5.3gp



100%|██████████| 32300/32300 [00:45<00:00, 702.69frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.08.58.39.1-2019.02.28.08.59.09.6-audio-index_6-obs_8.3gp



100%|██████████| 13072/13072 [00:27<00:00, 479.07frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.08.48.40.2-2019.02.28.08.49.12.0-audio-index_4-obs_6.3gp



100%|██████████| 31014/31014 [01:07<00:00, 459.41frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.08.54.25.2-2019.02.28.08.55.08.3-audio-index_5-obs_7.3gp



100%|██████████| 20620/20620 [00:35<00:00, 579.24frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.09.01.29.1-2019.02.28.09.01.45.7-audio-index_7-obs_9.3gp



100%|██████████| 34590/34590 [00:57<00:00, 598.65frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.09.07.49.1-2019.02.28.09.08.08.3-audio-index_8-obs_10.3gp



100%|██████████| 9906/9906 [00:14<00:00, 701.04frames/s]



Creating text file
Transcribing file: /content/drive/MyDrive/Meigs_Feb_2019/Jaclyn_02_28_19/Jaclyn-S9732230502477918641758-2019.02.28.09.10.07.6-2019.02.28.09.10.48.2-audio-index_9-obs_11.3gp



100%|██████████| 15514/15514 [00:22<00:00, 694.88frames/s]


Creating text file
All Done!



