In [1]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_community.document_loaders import YoutubeLoader

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
load_dotenv()

True

## Step 1 : INDEXING

#### Loading documents

In [3]:
loader = YoutubeLoader.from_youtube_url(
    "https://www.youtube.com/watch?v=Gfr50f6ZBvo",
    add_video_info=False  #  skip pytube metadata
)
docs = loader.load()

print("✅ Transcript loaded successfully!")
print(docs[0].page_content[:500])


✅ Transcript loaded successfully!
the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful 


#### Splitting

In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)

In [5]:
len(chunks)

168

In [6]:
chunks[100]

Document(metadata={'source': 'Gfr50f6ZBvo'}, page_content="and and kind of come up with descriptions of the electron clouds where they're gonna go how they're gonna interact when you put two elements together uh and what we try to do is learn a simulation uh uh learner functional that will describe more chemistry types of chemistry so um until now you know you can run expensive simulations but then you can only simulate very small uh molecules very simple molecules we would like to simulate large materials um and so uh today there's no way of doing that and we're building up towards uh building functionals that approximate schrodinger's equation and then allow you to describe uh what the electrons are doing and all materials sort of science and material properties are governed by the electrons and and how they interact so have a good summarization of the simulation through the functional um but one that is still close to what the actual simulation would come out with so what um how dif

#### Creating Embedding and storing them at Vector Stores

In [7]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)

In [8]:
vector_store.index_to_docstore_id

{0: 'acc6b2d0-6600-42e7-a04c-5665d23e586d',
 1: '94ab31cc-41c1-49a9-8b57-4e7c2b6481e1',
 2: '078b2de3-8901-4589-a08e-bcc69a243100',
 3: '6e068547-aa15-4272-a3c6-d2479026f5fb',
 4: 'e72daa64-2fb8-43dd-8e69-5bdb0eab4fa0',
 5: '4c0164a6-bd2b-4b4c-af87-08a8db99c322',
 6: '0590632c-a796-4d83-83b9-10d7bf9eda97',
 7: '437d59d7-e0e3-4591-8dc9-aedb574aaa46',
 8: '50893918-d57f-45d4-a63f-846100525a24',
 9: 'ab05d55b-6eda-42ba-b109-1411b08cf1ad',
 10: '146ba36a-f112-4762-81dc-de960a29fe33',
 11: 'aca897f4-e4a4-485e-ae01-f324436426e7',
 12: '3ead734d-363c-4a0d-8782-18dea86f0d06',
 13: 'a8640a09-3a87-4283-aa94-6ea40e072891',
 14: 'dda7668a-1f3f-483a-b62a-55dabc48ca17',
 15: '345350ec-c983-4891-a9de-ee7889c54fcc',
 16: '9eed6760-5032-4373-98f7-a531e3ba4c82',
 17: '2545ac2b-029d-4c2b-a305-94a56cc935a8',
 18: '6294bc1d-ec73-4fd3-85c1-f3a76ea4eb9e',
 19: '7398133d-0ab5-4cc1-aa04-0270aa855ce5',
 20: '32887774-1874-4615-ae15-3d3d6e065c86',
 21: 'e65560e4-f39f-4e49-9ffe-2ac8420d72a5',
 22: '8d0c7851-8477-

In [9]:
vector_store.get_by_ids(['bc823f17-02d7-4e37-8ce1-2de508e7d180'])

[]

## Step 2 : RETRIEVAL

In [10]:
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={"k":4})

In [11]:
retriever.invoke('What is deepmind')

[Document(id='1feeee39-578a-4053-b5a2-604d6769b095', metadata={'source': 'Gfr50f6ZBvo'}, page_content="and how it works this is tough to uh ask you this question because you probably will say it's everything but let's let's try let's try to think to this because you're in a very interesting position where deepmind is the place of some of the most uh brilliant ideas in the history of ai but it's also a place of brilliant engineering so how much of solving intelligence this big goal for deepmind how much of it is science how much is engineering so how much is the algorithms how much is the data how much is the hardware compute infrastructure how much is it the software computer infrastructure yeah um what else is there how much is the human infrastructure and like just the humans interact in certain kinds of ways in all the space of all those ideas how much does maybe like philosophy how much what's the key if um uh if if you were to sort of look back like if we go forward 200 years look

In [12]:
llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash')

In [13]:
prompt = PromptTemplate(
    template="""
    You are a helpful assistent.
    Answer ONLY the provided transcript context.
    If the context is insufficient, just say you don't know.
    
    {context}
    Question: {question}""",
    input_variables=['context', 'question']
)

In [14]:
question = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs = retriever.invoke(question)

In [15]:
retrieved_docs

[Document(id='9367127a-acb9-4bb4-b9cd-e7afa525ac4d', metadata={'source': 'Gfr50f6ZBvo'}, page_content="in this case in fusion we we collaborated with epfl in switzerland the swiss technical institute who are amazing they have a test reactor that they were willing to let us use which you know i double checked with the team we were going to use carefully and safely i was impressed they managed to persuade them to let us use it and um and it's a it's an amazing test reactor they have there and they try all sorts of pretty crazy experiments on it and um the the the what we tend to look at is if we go into a new domain like fusion what are all the bottleneck problems uh like thinking from first principles you know what are all the bottleneck problems that are still stopping fusion working today and then we look at we you know we get a fusion expert to tell us and then we look at those bottlenecks and we look at the ones which ones are amenable to our ai methods today yes right and and and t

In [16]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"in this case in fusion we we collaborated with epfl in switzerland the swiss technical institute who are amazing they have a test reactor that they were willing to let us use which you know i double checked with the team we were going to use carefully and safely i was impressed they managed to persuade them to let us use it and um and it's a it's an amazing test reactor they have there and they try all sorts of pretty crazy experiments on it and um the the the what we tend to look at is if we go into a new domain like fusion what are all the bottleneck problems uh like thinking from first principles you know what are all the bottleneck problems that are still stopping fusion working today and then we look at we you know we get a fusion expert to tell us and then we look at those bottlenecks and we look at the ones which ones are amenable to our ai methods today yes right and and and then and would be interesting from a research perspective from our point of view from an ai point of\n\

In [17]:
final_prompt = prompt.invoke({"context":context_text, "question":question})

In [18]:
final_prompt

StringPromptValue(text="\n    You are a helpful assistent.\n    Answer ONLY the provided transcript context.\n    If the context is insufficient, just say you don't know.\n\n    in this case in fusion we we collaborated with epfl in switzerland the swiss technical institute who are amazing they have a test reactor that they were willing to let us use which you know i double checked with the team we were going to use carefully and safely i was impressed they managed to persuade them to let us use it and um and it's a it's an amazing test reactor they have there and they try all sorts of pretty crazy experiments on it and um the the the what we tend to look at is if we go into a new domain like fusion what are all the bottleneck problems uh like thinking from first principles you know what are all the bottleneck problems that are still stopping fusion working today and then we look at we you know we get a fusion expert to tell us and then we look at those bottlenecks and we look at the o

## Step: 4 GENERATION

In [19]:
result = llm.invoke(final_prompt)
print(result.content)

Yes, the topic of nuclear fusion is discussed in this video.

What was discussed:
*   Nuclear fusion is identified as an area where AI can provide significant help, especially in the domain of energy and climate.
*   The speakers collaborate with domain experts to identify bottleneck problems preventing fusion from working today, and then apply AI methods to address them.
*   They specifically mention a collaboration with EPFL in Switzerland, using their test reactor.
*   They discussed a project where AI, using deep reinforcement learning, was applied to the magnetic control of tokamak plasmas.
*   This work involved holding and controlling plasma in specific shapes for a record amount of time, effectively solving one of the challenges in fusion by providing a controller that can contain and hold plasma in desired structures (like "droplets") that are better for energy production.
*   They are now looking for the next problems to tackle in the fusion area, collaborating with fusion st

## Step: 5 Bulding Chains

In [20]:
from langchain_core.runnables import RunnableLambda, RunnableSequence, RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [21]:
def format_docs(retrieved_docs):
    context_text = '\n\n'.join(doc.page_content for doc in retrieved_docs)
    return context_text

In [22]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),     # this chain is giving whole string of for results
    'question': RunnablePassthrough()
})

In [23]:
parallel_chain.invoke("Who is Damis?")

{'context': "the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get

In [24]:
parser = StrOutputParser()

In [25]:
main_chain = parallel_chain | prompt | llm | parser 

In [26]:
main_chain.invoke('summarize the entire video')

'The speakers discuss the challenge of explaining complex concepts, particularly in physics, and the idea that a "deeper, maybe simpler explanation" beyond the standard model could address long-standing mysteries like consciousness and gravity. They touch upon the notion that the ability to explain things clearly and simply is a hallmark of intelligence, referencing Richard Feynman. The conversation also explores human enhancement through symbiosis with computing devices and the role of language as a powerful, though not exclusive, modality for systems to explain their capabilities and generalize across tasks.'