In [4]:
!pip install moviepy==1.0.3
!pip install -U openai-whisper

Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-non

In [2]:
import whisper
from moviepy.editor import AudioFileClip
import os
import torch

# Define paths
base_path = "/content/sample_data"
audio_path = os.path.join(base_path, "Ignite.mp3")
audio_partition_folder = os.path.join(base_path, "audio_files")
subtitle_path = os.path.join(base_path, "subtitles", "subtitles.txt")

# Ensure directories exist
os.makedirs(audio_partition_folder, exist_ok=True)
os.makedirs(os.path.dirname(subtitle_path), exist_ok=True)

# Load audio and get duration
audio_clip = AudioFileClip(audio_path)
n = round(audio_clip.duration)
audio_clip.close()

# Partitioning parameters
start = 0
index = 60  # 1-minute segments
counter = 0

print("Partitioning the audio clip...")

# Partition the audio into 1-minute segments
while start < n:
    end_time = min(start + index, n)

    # Extract and save subclip
    temp_saving_location = os.path.join(audio_partition_folder, f'temp_{counter}.mp3')
    with AudioFileClip(audio_path) as audio_clip:
        temp = audio_clip.subclip(start, end_time)
        temp.write_audiofile(filename=temp_saving_location, verbose=False, logger=None)
        temp.close()

    start = end_time  # Move to next segment
    counter += 1

print("Partitioning completed.")

# Load Whisper model once
print("Loading Whisper model...")
model = whisper.load_model("large", device='cuda' if torch.cuda.is_available() else 'cpu')

# Transcribing each segment
print("Transcribing audio segments...")
final_list_of_text = []
id_counter = 0
start_time = 0

for index in range(counter):
    path_to_saved_file = os.path.join(audio_partition_folder, f'temp_{index}.mp3')

    # Ensure the file exists
    if not os.path.exists(path_to_saved_file):
        continue

    with AudioFileClip(path_to_saved_file) as audio_clip:
        duration = audio_clip.duration

    # Transcribe using Whisper
    out = model.transcribe(path_to_saved_file)
    list_of_text = out['segments']

    for line in list_of_text:
        line['start'] += start_time
        line['end'] += start_time
        line['id'] = id_counter
        id_counter += 1
        final_list_of_text.append(line)

    start_time += duration  # Update timestamp

    # Delete temporary file to free memory
    os.remove(path_to_saved_file)

# Save subtitles
with open(subtitle_path, 'w') as fp:
    for line in final_list_of_text:
        fp.write("{}\n".format(line.get("text")))

print("Successfully completed execution.")

Partitioning the audio clip...
Partitioning completed.
Loading Whisper model...
Transcribing audio segments...








Successfully completed execution.
