In [None]:
from yt_dlp import YoutubeDL
import json

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

load_dotenv()

True

## Indexing


In [3]:
video_url = "https://www.youtube.com/watch?v=LPZh9BOjkQs"

ydl_opts = {
    "skip_download": True,        # don't download video
    "writesubtitles": True,       # write subtitles
    "writeautomaticsub": True,    # also include auto-generated subtitles
    "subtitlesformat": "vtt",     # format: .vtt or .srt
    "subtitleslangs": ["en"],     # select language
    "quiet": True,
}

with YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(video_url, download=False)
    subtitles = info.get("subtitles") or info.get("automatic_captions")

    if not subtitles:
        print("❌ No subtitles found.")
    else:
        # pick first English subtitle
        subtitle_url = subtitles["en"][0]["url"]
        import requests
        response = requests.get(subtitle_url)
        if response.ok:
            print("✅ Transcript fetched successfully!\n")
            # print(response.text)
        else:
            print("⚠️ Failed to download subtitle content.")


         player = https://www.youtube.com/s/player/7dc3db36/player_ias.vflset/en_US/base.js
         n = 7pby3wnH3Y1MxdaIq4 ; player = https://www.youtube.com/s/player/7dc3db36/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


✅ Transcript fetched successfully!



In [4]:
print(response.text[:500])

{
  "wireMagic": "pb3",
  "pens": [ {
  
  } ],
  "wsWinStyles": [ {
  
  } ],
  "wpWinPositions": [ {
  
  } ],
  "events": [ {
    "tStartMs": 1140,
    "dDurationMs": 2836,
    "segs": [ {
      "utf8": "Imagine you happen across a short movie script that"
    } ]
  }, {
    "tStartMs": 3976,
    "dDurationMs": 3164,
    "segs": [ {
      "utf8": "describes a scene between a person and their AI assistant."
    } ]
  }, {
    "tStartMs": 7480,
    "dDurationMs": 5580,
    "segs": [ {
      "ut


In [5]:
transcript = json.loads(response.text)

In [6]:
text_list = [
    seg["utf8"]
    for event in transcript.get("events", [])
        if "segs" in event
        for seg in event["segs"]
            if "utf8" in seg
]

In [7]:
transcript = ["\n".join(text_list)]

In [9]:
transcript

["Imagine you happen across a short movie script that\ndescribes a scene between a person and their AI assistant.\nThe script has what the person asks the AI, but the AI's response has been torn off.\nSuppose you also have this powerful magical machine that can take\nany text and provide a sensible prediction of what word comes next.\nYou could then finish the script by feeding in what you have to the machine,\nseeing what it would predict to start the AI's answer,\nand then repeating this over and over with a growing script completing the dialogue.\nWhen you interact with a chatbot, this is exactly what's happening.\nA large language model is a sophisticated mathematical function\nthat predicts what word comes next for any piece of text.\nInstead of predicting one word with certainty, though,\nwhat it does is assign a probability to all possible next words.\nTo build a chatbot, you lay out some text that describes an interaction between a user\nand a hypothetical AI assistant, add on 

## Text Splitting


In [10]:
splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
)
chunks = splitter.create_documents(transcript)

In [11]:
len(chunks)

10

## Embedding


In [13]:
embedding= GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

vector_store = FAISS.from_documents(chunks, embedding, ids=[str(i) for i in range(len(chunks))])
vector_store.save_local("youtube_transcript_vector_store")

In [14]:
vector_store.index_to_docstore_id

{0: '0',
 1: '1',
 2: '2',
 3: '3',
 4: '4',
 5: '5',
 6: '6',
 7: '7',
 8: '8',
 9: '9'}

## Retrival


In [15]:
retriver = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

## Augumentation


In [16]:
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

In [17]:
prompt = PromptTemplate(
	template="""
	You are a helpful assistant.
        Answer ONLY from the provided transcript context.
        If the context is insufficient, just say you don't know.

        {context}
        Question: {question}
 """,
	input_variables=["context", "question"]
)

In [18]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [19]:
parser = StrOutputParser()

In [20]:
parallel_chain = RunnableParallel({
    'context': retriver | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [21]:
main_chain = parallel_chain | prompt | llm | parser

## Generation


In [26]:
result = main_chain.invoke('Can you summarize the video')
print(result)

The video discusses how large language models (LLMs) and transformers work. It explains that LLMs are sophisticated mathematical functions that predict the next word for any piece of text by assigning probabilities to all possible next words. This process is used to build chatbots, where the model repeatedly predicts the next word to complete a dialogue.

Internally, transformers associate each word with a list of numbers, as the training process works with continuous values. A key feature of transformers is the "attention" operation, which allows these numbers to communicate and refine their encoded meanings based on context, all in parallel. They also include feed-forward neural networks.

The transcript notes that while researchers design the framework, the specific behavior of LLMs is an emergent phenomenon from tuning billions of parameters during training, making it challenging to determine why exact predictions are made. LLMs are trained on enormous amounts of text, with an exam

In [25]:
res = main_chain.invoke('waht is RLHF ?')
print(res)

RLHF stands for reinforcement learning with human feedback. It is a type of training that chatbots undergo to address the difference between auto-completing random text and being a good AI assistant. In this process, workers flag unhelpful or problematic predictions, and their corrections further change the model's parameters, making them more likely to give predictions that users prefer.


In [28]:
res = main_chain.invoke('waht is Attention ?')
print(res)

Attention is a special operation that gives lists of numbers, which encode the meaning of words, a chance to talk to one another and refine the meanings they encode based on the surrounding context, all done in parallel.
