In [1]:
pip install openai langchain faiss-cpu youtube-transcript-api tiktoken


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting youtube-transcript-api
  Downloading youtube_transcript_api-1.0.3-py3-none-any.whl.metadata (23 kB)
Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading youtube_transcript_api-1.0.3-py3-none-any.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu, you

In [6]:
# 📦 Imports
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import os

# 🔐 OpenAI API Key Setup
os.environ["OPENAI_API_KEY"] = "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"  # Replace this with your key

# 📽️ 1. Get Transcript from YouTube
def get_transcript(video_url):
    video_id = video_url.split("v=")[-1]
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        full_transcript = " ".join([t['text'] for t in transcript_list])
        return full_transcript
    except Exception as e:
        return f"Transcript error: {str(e)}"

# 🧠 2. Chunk & Embed Transcript
def chunk_and_embed(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_text(text)

    embeddings = OpenAIEmbeddings()
    vectordb = FAISS.from_texts(chunks, embedding=embeddings)
    return vectordb

# 🤖 3. Create LangChain RAG Summarizer
def get_summary_chain(vectordb):
    retriever = vectordb.as_retriever(search_kwargs={"k": 4})
    llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
    chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return chain

# 📋 4. Summarize YouTube Video
def summarize_youtube_video(video_url):
    print("[+] Fetching transcript...")
    transcript = get_transcript(video_url)
    if transcript.startswith("Transcript error"):
        return transcript

    print("[+] Splitting & embedding transcript...")
    vectordb = chunk_and_embed(transcript)

    print("[+] Running summarization chain...")
    chain = get_summary_chain(vectordb)

    print("[+] Generating summary...\n")
    summary = chain.run("Give me a detailed summary of this video.")
    return summary

# 🚀 Run the Summarizer
if __name__ == "__main__":
    video_url = input("Enter YouTube Video URL: ")
    result = summarize_youtube_video(video_url)
    print("\n📌 Summary:\n")
    print(result)


Enter YouTube Video URL: https://www.youtube.com/shorts/99_Vp95c3sE
[+] Fetching transcript...

📌 Summary:

Transcript error: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=https://www.youtube.com/shorts/99_Vp95c3sE! This is most likely caused by:

You provided an invalid video id. Make sure you are using the video id and NOT the url!

Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`
Instead run: `YouTubeTranscriptApi.get_transcript("1234")`

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!


In [3]:
pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

In [4]:
pip install -U langchain langchain-community


