In [1]:
! pip install -q youtube_transcript_api langchain-community faiss-cpu langchain_google_genai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_text_splitters import RecursiveCharacterTextSplitter

  import pynvml  # type: ignore[import]


**1. Document Ingestion**

In [3]:
# https://www.youtube.com/watch?v=y3cw_9ELpQw
video_id = "y3cw_9ELpQw"
try:
  yt = YouTubeTranscriptApi()
  transcript = yt.fetch(video_id,languages=['en'])
  combined_text = " ".join(chunk.text for chunk in transcript)
  # combined all of the chunked transcripts into one string
  # print(combined_text)
except TranscriptsDisabled:
  print("No transcript is available for this video!")

**2. Text Splitting**

In [4]:
from langchain_core.documents import Document

def split_documents(docs,chunk_size=1000,chunk_overlap=200):
  text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )

  doc = Document(page_content=docs)#creating a document object cause that is what splitter accepts
  text_chunks = text_splitter.split_documents([doc])
  return text_chunks

# print(len(combined_text))
split_chunks = split_documents(combined_text)
print(split_chunks[1])

page_content='podcast the supported please check out our sponsors in the description and now dear friends here's Andrew strominger you are part of the Harvard black hole initiative which has theoretical physicists experimentalists and even philosophers so let me ask the big question what is a black hole from a theoretical from an experimental uh maybe even from a philosophical perspective so a black hole is defined theoretically as a region of space-time from which light can never Escape therefore it's black now that's just the starting point many weird things uh follow from that basic definition but that is that is the basic definition what is light they can't escape from a black hole well light is uh you know the stuff that comes out of the Sun that stuff that goes into your eyes light is one of the the stuff that disappears when the lights go off this is stuff that appears when the lights come on um of course that could give you a Beth a medical definition but or physical mathematic

**3. Storing the chunks in a vector store**

In [5]:
from langchain_community.embeddings import HuggingFaceEmbeddings

def download_embeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"):
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    return embeddings

embeddings = download_embeddings()
print("Embeddings model downloaded successfully.")

  embeddings = HuggingFaceEmbeddings(model_name=model_name)


Embeddings model downloaded successfully.


In [6]:
from langchain_community.vectorstores import FAISS
vector_store = FAISS.from_documents(split_chunks,embeddings)
#convert the given chunks to respective vectors ; the vector ids are different every time!

# print(vector_store.index_to_docstore_id)
# chunks are respe

In [7]:
vector_store.get_by_ids(['e5be311d-0356-4186-a8ed-574653cc8126'])

[]

**RETRIEVER**

In [8]:
retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
#using same vector store as a retriever which searches for semantic similarity and outputs 3 relevant blocks ;
retriever.invoke("what is blackhole ? ")

[Document(id='daab8d97-56c6-4c5e-afa2-975de2c21330', metadata={}, page_content="podcast the supported please check out our sponsors in the description and now dear friends here's Andrew strominger you are part of the Harvard black hole initiative which has theoretical physicists experimentalists and even philosophers so let me ask the big question what is a black hole from a theoretical from an experimental uh maybe even from a philosophical perspective so a black hole is defined theoretically as a region of space-time from which light can never Escape therefore it's black now that's just the starting point many weird things uh follow from that basic definition but that is that is the basic definition what is light they can't escape from a black hole well light is uh you know the stuff that comes out of the Sun that stuff that goes into your eyes light is one of the the stuff that disappears when the lights go off this is stuff that appears when the lights come on um of course that cou

**Setting up LLM**

In [9]:
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate

from dotenv import load_dotenv
import os
load_dotenv()

api_key = os.getenv("GEMINI_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key


llm = GoogleGenerativeAI(model="gemini-2.5-flash")

# print(result)

prompt_template = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer ONLY from the provided transcript context of the video.
    If the context is insufficient, just say that you donot know the answer.
    Context: {context}
    Question: {question}
    """,
    input_variables=["context", "question"]
)
#efficient prompt for llm questioning!

In [10]:
retrieved_docs = retriever.invoke("is blackhole discussed here ?")
context_text = "\n\n".join(content.page_content for content in retrieved_docs)
print(context_text)
question = "is blackhole discussed in the video ? "
#so basically if the context retrieved is not flawless, the answer of the LLM wouldn't be good ; so it all depends upon the context provided by the retriever!

theory of general relativity and corrected it but we couldn't do any better with black holes than um before my work with command we couldn't do any better than Einstein and tortill had done now um one of the puzzles um you know if you look at the Hawking's headstone and also Boltzmann's headstone and you put them together you get a formula for their really Central equations in 20th century physics I don't think there are many equations that made it to headstones and and they're really Central equations and you put them together and you get a formula for the number of gigabytes in a black hole now a short shelves description the black hole is literally a hole in space and there's no place to store the gigabytes and it's not too hard to and this really was wheeler and beckenstein and wheeler beckenstein and Hawking to come to the conclusion that if there isn't a sense in which a black hole can store some large number of gigabytes that quantum mechanics and gravity can't be consistent we


In [11]:
prompt = prompt_template.invoke({"context":context_text,"question":question})

GENERATION

In [12]:
answer = llm.invoke(prompt)
print(answer)

Yes, black holes are discussed in the video. The definition of a black hole is provided, and its relationship to string theory and general relativity is mentioned.
