<a href="https://colab.research.google.com/github/NormLorenz/ai-llm-youtube-transcription-utility/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Convert the YouTube Transaction Utility to Colab
## Run in Google Colab in anticipation of using AI to:
* correct noun casing
* correct sentence ending - either a period, a question mark or a exclamation mark
* correct word spelling

In [None]:
# install and run a code formatter
# !pip install jupyter-black
# !pip uninstall jupyter-black
# %load_ext jupyter_black

In [None]:
# installs

!pip install openai python-dotenv google-generativeai anthropic youtube_transcript_api

In [None]:
# imports

import os
import io
import sys
import json
import requests
from openai import OpenAI
import google.generativeai
import anthropic
from IPython.display import Markdown, display, update_display
import gradio as gr
import subprocess
from google.colab import userdata
from youtube_transcript_api import YouTubeTranscriptApi
import re
import pprint
from typing import List, Dict, Union, Tuple

In [None]:
# get the utilities module from github

%%bash
wget https://raw.githubusercontent.com/NormLorenz/ai-llm-youtube-transcription-utility/refs/heads/main/utilities.py
wget https://raw.githubusercontent.com/NormLorenz/ai-llm-youtube-transcription-utility/refs/heads/main/README.md

In [None]:
# add /content to the Python path to allow importing local modules.
# this ensures that Python can find 'utilities.py' as a module.
sys.path.append("/content")

# list content of /content for verification (optional).
display(os.listdir("/content"))

# verify utilities.py exists before importing
utilities_path = os.path.join("/content", "utilities.py")
if os.path.exists(utilities_path):
    import utilities

    print("utilities.py imported successfully.")
else:
    print(
        f"Error: utilities.py not found at {utilities_path}. Please ensure 'wget' in cell MefIb11pw0AX ran successfully."
    )

In [None]:
# keys

openai_api_key = userdata.get("OPENAI_API_KEY")
claude_api_key = userdata.get("ANTHROPIC_API_KEY")
google_api_key = userdata.get("GOOGLE_API_KEY")
hugging_face_token = userdata.get("HF_TOKEN")

In [None]:
# initialize

openai = OpenAI(api_key=openai_api_key)
claude = anthropic.Anthropic(api_key=claude_api_key)
google.generativeai.configure(api_key=google_api_key)

OPENAI_MODEL = "gpt-4o-mini"
CLAUDE_MODEL = "claude-3-5-haiku-latest"
GOOGLE_MODEL = "gemini-2.5-flash-lite"

MODELS = [
    "OPENAI (gpt-4)",
    "OPENAI (gpt-4o-mini)",
    "ANTHROPIC (claude-3-5-haiku-latest)",
    "GOOGLE (gemini-2.5-flash-lite)",
]

In [None]:
def system_message() -> str:
    """Create a system message."""
    system_message = "You are a German language tutor. Your task is to review and correct a Python list of dictionaries that contain "
    system_message += "a time stamp and a German sentence. The sentence may have incorrect casing for German nouns and may have incorrect "
    system_message += "or missing period, question mark or exclamation point. Also please correct any casing at the start of the sentence and "
    system_message += "any misspelled words. The expected output format should be a Python list of dictionaries such as: "
    system_message += '[{"start": "00:01", "text": "Ich habe ein Buch gelesen."}, {"start": "00:05", "text": "Das ist ein Haus."}]'
    return system_message

In [None]:
def user_prompt(items: List[Dict[str, str]]):
    """Create a user prompt."""
    user_prompt = "Please review and correct the following: \n"
    user_prompt += str(items)
    return user_prompt

In [None]:
def stream_gpt(items: List[Dict[str, str]]):
    """Stream an OpenAI response."""
    messages = [
        {"role": "system", "content": system_message()},
        {"role": "user", "content": user_prompt(items)},
    ]
    stream = openai.chat.completions.create(
        model=OPENAI_MODEL, messages=messages, stream=True
    )
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        yield reply.replace("```python\n", "").replace("```", "")

In [None]:
# sample_data = [
#     {"start": "00:01", "text": "ich habe ein buch gelesen"},
#     {"start": "00:05", "text": "dass ist ein haus"},
#     {"start": "00:10", "text": "wie geht es dir"},
#     {"start": "00:15", "text": "seien sie vorsicht"},
# ]

# print("Calling stream_gpt with sample data...")
# for chunk in stream_gpt(sample_data):
#     print(chunk)

In [None]:
def stream_claude(items: List[Dict[str, str]]):
    """Stream a Claude response."""
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=2000,
        system=system_message(),
        messages=[{"role": "user", "content": user_prompt(items)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            yield reply.replace("```python\n", "").replace("```", "")

In [None]:
# sample_data = [
#     {"start": "00:01", "text": "ich habe ein buch gelesen"},
#     {"start": "00:05", "text": "dass ist ein haus"},
#     {"start": "00:10", "text": "wie geht es dir"},
#     {"start": "00:15", "text": "seien sie vorsicht"},
# ]

# print("Calling stream_claude with sample data...")
# for chunk in stream_claude(sample_data):
#     print(chunk)

In [None]:
def stream_google(items: List[Dict[str, str]]):
    """Stream a Google response."""
    reply = ""
    gemini = google.generativeai.GenerativeModel(
        model_name=GOOGLE_MODEL, system_instruction=system_message()
    )
    response = gemini.generate_content(user_prompt(items), stream=True)
    for chunk in response:
        if chunk.text:
            reply += chunk.text
            yield reply.replace("```python\n", "").replace("```", "")

In [None]:
# sample_data = [
#     {"start": "00:01", "text": "ich habe ein buch gelesen"},
#     {"start": "00:05", "text": "dass ist ein haus"},
#     {"start": "00:10", "text": "wie geht es dir"},
#     {"start": "00:15", "text": "seien sie vorsicht"},
# ]

# print("Calling stream_google with sample data...")
# for chunk in stream_google(sample_data):
#     print(chunk)

In [None]:
def optimize(python, model):
    """Optimize the generated Python code."""
    if model == "GPT":
        result = stream_gpt(python)
    elif model == "Claude":
        result = stream_claude(python)
    elif model == "Gemini":
        result = stream_google(python)
    else:
        raise ValueError("Unknown model")
    for stream_so_far in result:
        yield stream_so_far

In [None]:
def execute_python(code):
    """Execute Python code and return the output."""
    try:
        output = io.StringIO()
        sys.stdout = output
        exec(code)
    finally:
        sys.stdout = sys.__stdout__
    return output.getvalue()

In [None]:
def non_stream_google(model: str, items: List[Dict[str, str]])-> List[Dict[str, str]]:
    """Generate a non-streaming Google response."""
    gemini = google.generativeai.GenerativeModel(
        model_name=model, system_instruction=system_message()
    )
    response: str = gemini.generate_content(user_prompt(items), stream=False).text

    # Extract the list using regex
    match = re.search(r"\[.*\]", response)
    if match:
        list_str = match.group(0).replace("'", '"')
        return json.loads(list_str)
    else:
        return [{"start": "00:00", "text": "Wasn't able to get a decent response."}]

In [None]:
"""Gradio UI for the YouTube Transcript Utility."""

selected_model: str = ""
selected_provider: str = ""

css: str = """
  #my_html_box { border: 2px solid blue; padding: 10px; }
"""

js: str = """
function() {
    const table = document.querySelector('table');
    const range = document.createRange();
    range.selectNode(table);
    window.getSelection().removeAllRanges();
    window.getSelection().addRange(range);
    document.execCommand('copy');
    alert('Table copied to clipboard!');
    window.getSelection().removeAllRanges();
    return [];
    }
"""

sample_url: str = (
    "https://www.youtube.com/watch?v=L6HnBjnkKmM&list=PLCCi8icw2DAO1qZGH7heUyTB8bJP1ICZ5&index=28"
)


def clear_fields() -> Tuple[None, str, str]:
    """Clear all fields."""
    return (None, "", "")


def mock_original_data(url: str) -> List[Dict[str, str]]:
    """Mock the original transcript data."""
    return [
        {"start": "00:01", "text": "ich habe ein buch gelesen"},
        {"start": "00:05", "text": "dass ist ein haus"},
        {"start": "00:10", "text": "wie geht es dir"},
        {"start": "00:10", "text": "seien sie vorsicht"},
    ]


def mock_corrected_data(url: str) -> List[Dict[str, str]]:
    """Mock the corrected transcript data."""
    return [
        {"start": "00:01", "text": "Ich habe ein Buch gelesen."},
        {"start": "00:05", "text": "Das ist ein Haus."},
        {"start": "00:10", "text": "Wie geht es dir?"},
        {"start": "00:10", "text": "Seien Sie vorsichtig!"},
    ]


def build_table(items: List[Dict[str, str]]) -> str:
    """Build an HTML table from the list of dictionaries."""
    html: str = "<table width=100%>\n"
    html += "  <tbody>\n"
    for item in items:
        html += f"    <tr><td>{item['start']}</td><td>{item['text']}</td></tr>\n"
    html += "  </tbody>\n"
    html += "</table>"
    return html


def build_text(items: List[Dict[str, str]]) -> str:
    """Build a text string from the list of dictionaries."""
    return pprint.pformat(items, indent=4)


def get_corrected_transcript(model: str, provider: str, items: List[Dict[str, str]]) -> List[Dict[str, str]]:
    """Get the corrected transcript."""
    response = non_stream_google(model, items)
    return response

def fetch_transcript(url: str, mock: bool, model: str) -> Tuple[str, str]:
    """Fetch the transcript."""
    if not url:
        return (
            "",
            "<p style='color: orange;'>The YouTube Video URL field can't be empty!</p>",
        )
    else:
        transcript_original: List[Dict[str, str]] = (
            mock_original_data(url) if mock else utilities.get_transcript(url)
        )
        transcript_corrected: List[Dict[str, str]] = (
            mock_corrected_data(url)
            if mock
            else get_corrected_transcript(
                selected_model, selected_provider, transcript_original
            )
        )
        list_original: str = build_text(transcript_original)
        html_corrected: str = build_table(transcript_corrected)
        return list_original, html_corrected


def display_model(model: str) -> None:
    """Display the selected model."""
    global selected_provider
    selected_provider = model.split(" ")[0]
    global selected_model
    selected_model = model.split(" ")[1].replace("(", "").replace(")", "")


with gr.Blocks(css=css) as ui:

    gr.Markdown("## YouTube Transcript Utility")
    gr.Markdown("### Creates a HTML table that can be copied and pasted into a Windows OneNote application")

    with gr.Row():
        url = gr.Textbox(label="YouTube Video URL:", value=sample_url)
    with gr.Row():
        model = gr.Dropdown(MODELS, label="AI Model Name:", value=MODELS[3])
    with gr.Row():
        mock = gr.Checkbox(label="Use Mock Data", value=True)
    with gr.Row():
        fetch = gr.Button("Fetch", variant="primary")
        copy = gr.Button("Copy")
        clear = gr.Button("Clear")
    with gr.Row():
        text = gr.TextArea(label="Original Transcript")
        html = gr.HTML(elem_id="my_html_box")

    fetch.click(fetch_transcript, inputs=[url, mock, model], outputs=[text, html])
    copy.click(fn=None, inputs=[], outputs=[], js=js)
    clear.click(clear_fields, inputs=[], outputs=[url, text, html])
    model.change(display_model, inputs=[model], outputs=[])

    # trigger manually on launch
    ui.load(display_model, [model], [])


ui.launch(inbrowser=True, debug=True)