In [None]:

!pip install -q youtube-transcript-api \
               langchain-community \
               langchain-huggingface \
               faiss-cpu \
               sentence-transformers \
               transformers \
               python-dotenv


In [None]:

from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint



In [None]:

video_id = "Gfr50f6ZBvo" # only the ID, not full URL
try:
    # If you don’t care which language, this returns the “best” one
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

    # Flatten it to plain text
    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    trimmed_transcript = transcript[:1000]
    print(trimmed_transcript)

except TranscriptsDisabled:
    print("No captions available for this video.")

the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get good enough 

In [None]:
trimmed_transcript_list = transcript_list[:50]
trimmed_transcript_list

[{'text': 'the following is a conversation with',
  'start': 0.08,
  'duration': 3.44},
 {'text': 'demus hasabis', 'start': 1.76, 'duration': 4.96},
 {'text': 'ceo and co-founder of deepmind', 'start': 3.52, 'duration': 5.119},
 {'text': 'a company that has published and builds',
  'start': 6.72,
  'duration': 4.48},
 {'text': 'some of the most incredible artificial',
  'start': 8.639,
  'duration': 4.561},
 {'text': 'intelligence systems in the history of',
  'start': 11.2,
  'duration': 4.8},
 {'text': 'computing including alfred zero that',
  'start': 13.2,
  'duration': 3.68},
 {'text': 'learned', 'start': 16.0, 'duration': 2.96},
 {'text': 'all by itself to play the game of gold',
  'start': 16.88,
  'duration': 4.559},
 {'text': 'better than any human in the world and',
  'start': 18.96,
  'duration': 5.6},
 {'text': 'alpha fold two that solved protein',
  'start': 21.439,
  'duration': 4.241},
 {'text': 'folding', 'start': 24.56, 'duration': 4.16},
 {'text': 'both tasks consider

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=150)
chunks = splitter.create_documents([transcript])

In [None]:

len(chunks)

158

In [None]:

chunks[0]

Document(metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to inter

In [None]:

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:

vector_store = FAISS.from_documents(chunks, embeddings)

In [None]:

vector_store.index_to_docstore_id

{0: '450c9052-16a0-4471-a00f-9d2f8545445b',
 1: '4922e377-9803-4701-a917-e3222e7b4b88',
 2: '22c51755-3c60-4143-8eb4-0d01cb303cd1',
 3: 'ecab06de-cd26-474f-a574-46a7272fc3a7',
 4: 'f2ea5625-4358-4962-9c2f-e8ef059da23d',
 5: '39aaffef-86d4-4610-9017-bf0d078d1c8e',
 6: 'a4140808-06ee-47cc-9398-c9f3b04ccf42',
 7: 'e2c0558c-9655-4548-8e63-2191a7540eec',
 8: '068fa7f6-a600-4947-b41c-c0dbadeb2828',
 9: 'd603a5fa-9805-4204-9d29-bef6db4d7520',
 10: 'c1d88907-9803-4567-8658-04b3fd6aa212',
 11: '36168fc7-8bf3-4ff7-a18a-0084e777238b',
 12: 'a0366790-d43b-45ae-80fa-c02acd4c9d01',
 13: '0ea2125b-53e5-4f23-98d0-c78c08f76b1a',
 14: '724d3950-9b4c-428b-9f65-a9ff031efa44',
 15: '61e7c3c9-0466-4d55-8070-d12aacfb06f2',
 16: '6fd3e656-3772-44a2-bc96-1bd654e81386',
 17: '0b709582-e43b-41ab-805a-8b8107b75fb7',
 18: '719b7b35-840b-47a5-bbb6-f858f77b9e05',
 19: 'cfe1bd17-208d-4597-80bf-8d5b4ee33988',
 20: '46d8171a-2628-4c0e-b43c-2a40eff8a9f8',
 21: '08b83bfe-431a-4cdc-96d6-1b26a28bc018',
 22: '5a8b1a87-f127-

In [None]:

vector_store.get_by_ids(['c0cc193e-bfc2-4329-9d33-fd2d4d46a131'])

[]

In [None]:

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [None]:

retriever.invoke('What is deepmind')

[Document(id='46d8171a-2628-4c0e-b43c-2a40eff8a9f8', metadata={}, page_content="of our vision at the start of deepmind was that we would use games very heavily uh as our main testing ground certainly to begin with um because it's super efficient to use games and also you know it's very easy to have metrics to see how well your systems are improving and what direction your ideas are going in and whether you're making incremental improvements and because those games are often rooted in something that humans did for a long time beforehand there's already a strong set of rules like it's already a damn good benchmark yes it's really good for so many reasons because you've got you've got you've got clear measures of how good humans can be at these things and in some cases like go we've been playing it for thousands of years um and and uh often they have scores or at least win conditions so it's very easy for reward learning systems to get a reward it's very easy to specify what that reward i

In [None]:

from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
llm = HuggingFaceEndpoint(
    repo_id="deepseek-ai/DeepSeek-R1-0528",  # or any from the table
    task="text-generation"
)

# Wrap for chat-style usage
model = ChatHuggingFace(llm=llm)

In [None]:

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an expert assistant that answers questions based on a YouTube transcript.

Here is the transcript chunk:\n\n{context}

Answer the following question as clearly and specifically as possible:
{question}
"""
)

In [None]:

question          = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs    = retriever.invoke(question)

In [None]:

retrieved_docs

[Document(id='7c69678b-b979-45d7-8ba1-99a7aee7fff0', metadata={}, page_content="is we would like to learn that instead and they also had a simulator of these plasma so there were lots of criteria that matched what we we like to to to use so can ai eventually solve nuclear fusion well so we with this problem and we published it in a nature paper last year uh we held the fusion that we held the plasma in specific shapes so actually it's almost like carving the plasma into different shapes and control and hold it there for the record amount of time so um so that's one of the problems of of fusion sort of um solved so i have a controller that's able to no matter the shape uh contain it continue yeah contain it and hold it in structure and there's different shapes that are better for for the energy productions called droplets and and and so on so um so that was huge and now we're looking we're talking to lots of fusion startups to see what's the next problem we can tackle uh in the fusion a

In [None]:

context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"is we would like to learn that instead and they also had a simulator of these plasma so there were lots of criteria that matched what we we like to to to use so can ai eventually solve nuclear fusion well so we with this problem and we published it in a nature paper last year uh we held the fusion that we held the plasma in specific shapes so actually it's almost like carving the plasma into different shapes and control and hold it there for the record amount of time so um so that's one of the problems of of fusion sort of um solved so i have a controller that's able to no matter the shape uh contain it continue yeah contain it and hold it in structure and there's different shapes that are better for for the energy productions called droplets and and and so on so um so that was huge and now we're looking we're talking to lots of fusion startups to see what's the next problem we can tackle uh in the fusion area so another fascinating place in a paper title pushing the frontiers of\n\nh

In [None]:

final_prompt = prompt.invoke({"context": context_text, "question": question})

In [None]:

final_prompt

StringPromptValue(text="\nYou are an expert assistant that answers questions based on a YouTube transcript.\n\nHere is the transcript chunk:\n\nis we would like to learn that instead and they also had a simulator of these plasma so there were lots of criteria that matched what we we like to to to use so can ai eventually solve nuclear fusion well so we with this problem and we published it in a nature paper last year uh we held the fusion that we held the plasma in specific shapes so actually it's almost like carving the plasma into different shapes and control and hold it there for the record amount of time so um so that's one of the problems of of fusion sort of um solved so i have a controller that's able to no matter the shape uh contain it continue yeah contain it and hold it in structure and there's different shapes that are better for for the energy productions called droplets and and and so on so um so that was huge and now we're looking we're talking to lots of fusion startups

In [None]:
answer = model.invoke(final_prompt)
print(answer.content)

HfHubHTTPError: 401 Client Error: Unauthorized for url: https://router.huggingface.co/featherless-ai/v1/chat/completions (Request ID: Root=1-685a7cd0-0ca9774761070b924e64ea88;4bebc17f-e064-4b9a-85df-6c590002680e)

Invalid credentials in Authorization header

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [None]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [None]:

parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [None]:

parallel_chain.invoke('who is Demis')

In [None]:

parser = StrOutputParser()

In [None]:

main_chain = parallel_chain | prompt | model | parser

In [None]:

main_chain.invoke('what is the Conversation Going on in the video')
