In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableSequence, RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv

In [None]:
yt_api = YouTubeTranscriptApi()
video_id = 'd2kxUVwWWwU'
try:
    video_transcript = yt_api.fetch(video_id=video_id,languages=['en'])

    transcript = " ".join(text.text for text in video_transcript)
    print(transcript)
except TranscriptsDisabled:
    print("No captions available for this video.")

In [None]:
video_transcript

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])
len(chunks)

In [None]:
chunks[0]

In [None]:
embedding = HuggingFaceEmbeddings(model='sentence-transformers/all-MiniLM-L6-v2')
vector_store = FAISS.from_documents(chunks,embedding)

In [None]:
vector_store.index_to_docstore_id

In [None]:
vector_store.get_by_ids(['aea8f69c-8456-47d3-bafc-07918266d72b'])

In [None]:
retriever = vector_store.as_retriever(search_type='similarity',search_kwargs={'k':4})

In [None]:
retriever.invoke("what is weight")

In [None]:
llm = HuggingFaceEndpoint(
    repo_id='deepseek-ai/DeepSeek-V3.2-Exp',
    task='text-generation',
    server_kwargs={
        'temperature':0.2
    }
)

In [None]:
model = ChatHuggingFace(llm=llm)
model

In [None]:
prompt = PromptTemplate(
    template='''
    You are a helpful assistant.
    Answer ONLY from the provided transcript context.
    If the context is insufficient, just say you don't know.
    
    {context}
    Question: {question}
    ''',
    input_variables=['context','question']
)

In [None]:
question = 'What is bias'
retrieved_docs = retriever.invoke(question)

In [None]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

In [None]:
context_text

In [None]:
final_prompt = prompt.invoke({'context':context_text,'question':question})

In [None]:
final_prompt

In [None]:
answer = model.invoke(final_prompt)
print(answer.content)

In [None]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [None]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question':RunnablePassthrough()
})

In [None]:
parser = StrOutputParser()

In [None]:
final_chain = parallel_chain | prompt | model | parser
result = final_chain.invoke('what is sigmoid function')
result