In [1]:
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled
from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [13]:
llm = HuggingFaceEndpoint(
    repo_id="openai/gpt-oss-20b",
    task ="text-generation",
    # temperature= 2
)
model = ChatHuggingFace(llm = llm)

In [4]:
video_id = "Cbqtxys2qPM" #id only
try:
    #language na rakhe best language return garxa
    transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=['en'])
    transcript = " ".join(snippet.text for snippet in transcript_list.snippets)
except TranscriptsDisabled:
    print("No captions available for this video.")

In [5]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 350, chunk_overlap = 90)
chunks = splitter.create_documents([transcript])

In [6]:
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)

In [7]:
retriever = vector_store.as_retriever(search_type = "similarity", search_kwargs={"k": 3})

In [8]:
prompt = PromptTemplate(
    template="""
    You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
      """,
      input_variables=['context', 'question']
)

In [10]:
question = "is trump wanting greenland"

In [11]:
def format(retrieved_docs):
    context_text = "\n\n".join(docs.page_content for docs in retrieved_docs)
    return context_text

In [12]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format),
    'question': RunnablePassthrough()
})

In [15]:
parser = StrOutputParser()

In [16]:
chain = parallel_chain | prompt | model | parser

In [18]:
result = chain.invoke('is trump wanting greenland')

In [19]:
print(result)

The transcript indicates that Trump is seeking control of Greenland. It discusses whether he will use military force to capture it, and notes that diplomatic options have been rejected by Greenlandâ€™s prime minister and Denmark.
