# Installing Whisper

The commands below will install the Python packages needed to use Whisper models and evaluate the transcription results.

In [1]:
! pip install git+https://github.com/openai/whisper.git
! pip install jiwer

# Loading the LibriSpeech dataset

The following will load the test-clean split of the LibriSpeech corpus using torchaudio.

In [2]:
import os
import numpy as np

try:
    import tensorflow  # required in Colab to avoid protobuf compatibility issues
except ImportError:
    pass

import torch
import pandas as pd
import whisper
import torchaudio
from google.colab import files

from tqdm.notebook import tqdm


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Check if running GPU or CPU
Running without GPU will take significantly longer (~2x the length of the podcast). Device should be 'cuda' for faster results

In [None]:
DEVICE

# Select Model Strength
Larger models will be more accurate but will take longer to transcribe

In [None]:
model_type = 'base' #@param ["base", "small", "medium", "large"]

# Upload the file to be transcribed
Run the cell below; a button will appear to upload your audio file. Click the upload button and wait for the file to be 100%

In [None]:
uploaded = files.upload()

# Transribe the audio file
The below cell will transcribe the audio

In [None]:
filename=[key for key in uploaded.keys()][0]
model = whisper.load_model(model_type)
result = model.transcribe(filename)

# Formatting the data
Format the timestamps and create a cleaned list of the data to prepare for writing to file

In [None]:
def make_readable(s):
    return '{:02}:{:02}:{:02}'.format(s // 3600, s//60 % 60, s % 60)

srt_list = []
segments = result['segments']
for segment in segments:
  id = segment['id']
  start = int(segment['start'])
  start = make_readable(start)
  end = int(segment['end'])
  end = make_readable(end)
  text = segment['text']
  srt_list.append([id, start, end, text])

# Creating the SRT file
Write the data to transcript file in SRT format.
The file will appear in the Google Colab file list as transcript.srt
Download the file and view in notepad to see results

In [None]:
with open('transcription.srt', 'w') as f:
  for section in srt_list:
    f.write(str(section[0]))
    f.write('\n')
    f.write(str(section[1])+",000 --> "+str(section[2])+",000")
    f.write('\n')
    f.write(section[3])
    f.write('\n')
    f.write('\n')