In [1]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_ import OpenAIEmbeddings, ChatOpenAI
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [2]:
load_dotenv()

True

In [3]:
api_key = os.getenv("GOOGLE_API_KEY")

In [4]:
video_id = "I2ZK3ngNvvI"

In [5]:
try:
    api = YouTubeTranscriptApi()
    transcripts = api.list(video_id)

    # Pick English transcript
    transcript_obj = transcripts.find_transcript(['en'])

    # Fetch the actual list of snippets
    transcript_list = transcript_obj.fetch()

    # Flatten into plain text (using .text, not ['text'])
    transcript = " ".join(chunk.text for chunk in transcript_list)
    print(transcript)

except TranscriptsDisabled:
    print("Transcripts are disabled for this video.")
except NoTranscriptFound:
    print("No transcript found for this video.")

you're one of the greatest teachers of machine learning AI ever from cs231n to today what advice would you give to beginners interested in getting into machine learning beginners are often focused on like what to do and I think the focus should be more like how much you do so I I'm kind of like believer on a high level in this 10 000 hours kind of concept where you just kind of have to just pick the things where you can spend time and you you care about and you're interested in you literally have to put in 10 000 hours of work um it doesn't even like matter as much like where you put it and you'll iterate and you'll improve and you'll waste some time I don't know if there's a better way you need to put in 10 000 hours but I think it's actually really nice because I feel like there's some sense of determinism about being an expert at a thing if you spend ten thousand hours you can literally pick an arbitrary thing and I think if you spend ten thousand hours of deliberate effort and work

In [6]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [7]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [8]:
vector_store = FAISS.from_documents(chunks, embeddings)

In [9]:
vector_store.index_to_docstore_id

{0: '239a4e3b-8b92-4658-b99b-c36310da3803',
 1: 'afc448ea-e665-493d-874f-c7e33bac45b1',
 2: '858082c8-2064-49d1-8fe9-a0b10f0206a9',
 3: '4336fed9-e9a7-4819-947d-10a2ba56608a',
 4: '23eee3e8-6f91-4ca6-a6db-4b901135f918',
 5: '19692faa-1c92-4802-a08e-77c3194efef8',
 6: 'fc491918-e171-4e3f-a04c-cadd962620be',
 7: '62302ed5-00a5-4157-b202-cb2cc1794d3b'}

In [10]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [11]:
question          = "what is the conversation going on?"
retrieved_docs    = retriever.invoke(question)

In [12]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"like also the result will obviously look at this and then the result doesn't look like it and I'm like okay I thought I understood yeah but that's why it's really cool to literally code you run it in a notebook and it gives you a result and you're like oh wow yes and like actual numbers actual input X you know actual code yeah it's not mathematical symbols Etc the source of Truth is the code it's not slides it's just like let's build it it's beautiful you're a rare human in that sense\n\nis many times people compare themselves to others in the area I think this is very harmful only compare yourself to you from some time ago like say a year ago are you better than you year ago this is the only way to think um and I think this then you can see your progress and it's very motivating that's so interesting that focus on the quantity of ours because I think a lot of people uh in the beginner stage but actually throughout get paralyzed uh by uh the choice like which one do I pick this path o

In [14]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [15]:
final_prompt = prompt.invoke({"context": context_text, "question": question})
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know.\n\n      like also the result will obviously look at this and then the result doesn't look like it and I'm like okay I thought I understood yeah but that's why it's really cool to literally code you run it in a notebook and it gives you a result and you're like oh wow yes and like actual numbers actual input X you know actual code yeah it's not mathematical symbols Etc the source of Truth is the code it's not slides it's just like let's build it it's beautiful you're a rare human in that sense\n\nis many times people compare themselves to others in the area I think this is very harmful only compare yourself to you from some time ago like say a year ago are you better than you year ago this is the only way to think um and I think this then you can see your progress and it's very motivating that's so interest

In [16]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [17]:
answer = llm.invoke(final_prompt)
print(answer.content)

The conversation covers several topics:

1.  **Learning and Understanding:** The importance of coding and running actual code to understand concepts, rather than relying solely on mathematical symbols or slides.
2.  **Personal Growth and Comparison:** The idea of comparing oneself only to their past self (e.g., a year ago) to track progress and stay motivated, rather than comparing to others.
3.  **Learning from Mistakes:** The concept of accumulating "scar tissue" from doing things wrong, which leads to learning, growth, and stronger intuition for future situations.
4.  **The Speaker's View on Teaching:** The speaker clarifies that they don't love the act of teaching itself, but rather they love making people happy and being helpful. They acknowledge that they are good at teaching and people appreciate it, but also highlight how difficult, annoying, and frustrating it can be to create good educational materials.
5.  **Focus on Action:** The importance of focusing on what one has actua

In [36]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [37]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [45]:
class CleanStrOutputParser(StrOutputParser):
    def parse(self, text: str) -> str:
        # Flatten whitespace and strip markdown characters
        return text.replace("\\n", "\n").replace("*", "").strip()

In [46]:
parser = CleanStrOutputParser()

In [47]:
main_chain = parallel_chain | prompt | llm | parser

In [48]:
main_chain.invoke('Can you summarize the video?')

'This video segment includes:\n\n   A closing statement for a podcast, thanking sponsors and featuring a quote from Edsger Dijkstra: "Computer science is no more about computers than astronomy is about telescopes."\n   A discussion about the need for more fundamental and simpler explanations in physics, suggesting that the current standard model is insufficient.\n   Advice on self-improvement, emphasizing the importance of identifying and honing unique skills and strengths, and combining them with passions to make a difference.\n   A brief segment of "quick questions" about Demis\'s daily habits, such as wake-up time, coffee consumption, and computer use.\n   A reflection on creativity and intelligence, highlighting the ability of great scientists to uniquely combine known information and the power of deeply understanding a large body of knowledge (like many Wikipedia pages) for constructing thought experiments.'