This video is used in this demo: https://www.youtube.com/watch?v=Fh7acREmkGs

All associated video credit should go to "How It's Made"

# Imports & Installations

In [1]:
!pip install openai-whisper
!pip install openai
!pip install pytube

Collecting openai-whisper
  Downloading openai-whisper-20231117.tar.gz (798 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/798.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m788.5/798.6 kB[0m [31m33.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.6/798.6 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->openai-whisper)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torc

In [2]:
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import whisper
import openai
from openai import OpenAI
from pytube import YouTube
import os
import time

# Youtube Video Handling

In [6]:
def download_audio_from_youtube(youtube_url):
    yt = YouTube(youtube_url)
    audio_stream = yt.streams.filter(only_audio=True).first()
    output_path = audio_stream.download(filename='temp_audio.mp3')
    return output_path

# Generating Transcription

In [7]:
def transcribe_audio(audio_file_path):
    model = whisper.load_model("base")
    result = model.transcribe(audio_file_path, verbose=True)
    return result

In [8]:
def seconds_to_timestamp(seconds):
    mins, secs = divmod(int(seconds), 60)
    return f"{mins:02}:{secs:02}"

In [9]:
def filter_transcription(transcription):
    transcription_with_timestamps = []
    for segment in transcription['segments']:
        start = seconds_to_timestamp(segment['start'])
        end = seconds_to_timestamp(segment['end'])
        text = segment['text']
        transcription_with_timestamps.append({
            "start": start,
            "end": end,
            "text": text
        })
    return transcription_with_timestamps

# Prompting LLM

In [10]:
client = OpenAI(
    api_key="" #Redacted
)

def generate_chapters_with_llm(title, num_chapters, transcription):
    prompt = f"Video Title: {title}\n\nDivide the video into {num_chapters} chapters (with the timestamps) and a brief, <10 word summary of each chapter.\n\nVideo Transcription: {transcription}"
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

# Main Pipeline

In [20]:
def process_video(url_or_path, youtube=False):
    if youtube:
        url_or_path = download_audio_from_youtube(url_or_path)
    transcription = str(filter_transcription(transcribe_audio(url_or_path)))
    if youtube:
        os.remove(url_or_path)
    return transcription

In [21]:
#title = "How It's Made: Chocolate Chip Cookies"
#num_chapters = 3
#youtube_url = https://www.youtube.com/watch?v=Fh7acREmkGs
#audio_path = '' #Redacted

def generate_timestamps():
    title = input('What is the title of the video?\n')
    num_chapters = input('How many timestamps/chapters do you want?\n')
    #youtube_url = input('What is the youtube video link?\n')
    video_path = input('What is the video/audio file path?\n')

    #transcription = process_video(youtube_url, youtube=True)
    transcription = process_video(video_path)

    return generate_chapters_with_llm(title, num_chapters, transcription)

In [None]:
chapters = generate_timestamps()

In [23]:
print(chapters)

**Chapter 1: The Origins and Ingredients (00:00 - 01:01)**
*Chocolate chip cookies history and main ingredients.*

**Chapter 2: Mixing and Shaping the Dough (01:01 - 02:32)**
*Industrial mixing and forming dough rounds.*

**Chapter 3: Freezing, Packing, and Baking (02:32 - 05:02)**
*Freezing, packaging, and final baking process.*
