In [None]:
# Import dependencies and libraries
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled  # Fixed spacing and case
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnableParallel, RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

load_dotenv()


  from .autonotebook import tqdm as notebook_tqdm


True

In [None]:

# Preparing Data

video_id = "ZftI2fEz0Fw" # only the ID, not URL
# Preparing Data

def get_youtube_transcript(video_id):
    try:
        # Fetch transcript with English("en")/ hindi ("hi") language preference
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
        
        # Flatten transcript list into a single string
        return " ".join(chunk["text"] for chunk in transcript_list)
    
    except TranscriptsDisabled:
        print(f"Error: No captions available for video ID: {video_id}")
        return None
    except Exception as e:
        print(f"Error: Failed to fetch transcript for video ID: {video_id}")
        print(f"Details: {str(e)}")
        print("\nPlease verify:")
        print("1. The video ID is correct")
        print("2. The video has English captions")
        print("3. The video is publicly accessible")
        return None

# Test with your video ID
video_id = "Gfr50f6ZBvo"  # only the ID, not full URL
transcript = get_youtube_transcript(video_id)

if transcript:
    print("Successfully fetched transcript!")
    print(f"Transcript length: {len(transcript)} characters")
    # Uncomment to see transcript content
    # print("\nTranscript preview:")
    # print(transcript[:200] + "...")
else:
    print("Failed to fetch transcript. Please try with a different video ID.")

Successfully fetched transcript!
Transcript length: 133836 characters


In [3]:
# Making Chunks of large data
splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [4]:
# Converting into their respective embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = FAISS.from_documents(chunks , embeddings)
vector_store.docstore._dict.keys()

dict_keys(['c83702a3-986e-4c35-a57b-31b232b3a154', 'f036096d-4600-499e-bd36-1e03c0f6bdf4', '8a8e9e86-eeb2-49bc-942c-15e58c936f60', 'da2f42f8-1b82-45f8-bc06-313b703adc97', '1d9c4c87-7e58-47b3-9afb-cdaa1acd6e82', '53f28043-41f1-450a-8b80-7d625d8b8936', '9a616731-2dbe-48bd-944f-e9a462ec0208', '4cd9f341-4810-4b6a-a3a9-a7daf523d7ca', '9449655d-7447-4309-8b5b-26fd3ab91754', 'ae6de94d-ba87-45e8-8f47-a388a683b411', '93987ac7-5874-4c3c-a4ea-11c79e2debc1', '252e1a3c-a0a7-44f2-8459-8f4d4dfcbb59', '40aeccc5-406e-4858-b843-d32e7420fc16', '0c893901-73d0-4732-8ace-4e5e5b31f3cd', '4d4ed586-b367-4d1b-86c8-97389077ef70', '9c7ec5da-6678-462d-96a4-560d4ab288d9', '34026d7c-ff38-4370-823d-afe43483a993', '00fdcaca-6515-4c19-b691-b356b1268267', 'b2628f92-fdcb-49db-a8e5-61f9f7401341', '7c603810-4891-4f4d-bf13-d0c44d1a6fdf', '838c1a1d-70f9-4f1d-8769-35e66cabb70c', 'ad63b1cc-3c1e-441a-85ab-3a16a59b9c29', '99df9604-13d7-4ef4-b5aa-32cfff478cbe', 'd33b6e5e-4313-42db-98ea-0e78c6a30c7e', 'c85b8027-52f1-4df0-957f-b16a

In [5]:
# Retriever 
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [9]:
def formatDocs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [6]:
# Augmentation Process:

model = ChatGoogleGenerativeAI(model="gemini-2.0-flash" , temperature=0.4)


In [7]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [10]:
parallelChain = RunnableParallel({
    "context": retriever | RunnableLambda(formatDocs),
    "question": RunnablePassthrough()
})

In [11]:
parser = StrOutputParser()


In [12]:
mainChain = parallelChain | prompt | model | parser

In [14]:
# Ask your question
result = mainChain.invoke("Can you summarize the video")
print(result)

I can't summarize the entire video, but based on the context, here are some topics discussed:

*   **Self-discovery:** Finding your strengths and passions to make a difference.
*   **Demis Hassabis' Daily Life:** Questions about his daily routine, habits, and work.
*   **Fundamental Physics:** Exploring more fundamental explanations of physics beyond the standard model.
*   **Simulating Molecular Interactions:** Using simulations and functional learning to describe electron interactions in materials, with the goal of simulating larger and more complex materials.
*   **Learning Functionals:** The difficulty of mapping initial conditions and simulation parameters to learn the functional.
*   **Data Generation:** Generating data from molecular dynamics simulations on compute clusters.
