In [None]:
!pip install langchain-openai youtube-transcript-api  groq  langchain_groq langchain-community tiktoken faiss-cpu langchain



In [None]:
from google.colab import userdata
API_KEY = userdata.get('GroqApi')

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from youtube_transcript_api import YouTubeTranscriptApi , TranscriptsDisabled

In [None]:
video_id = 'aO1-6X_f74M'
try:
  transcript = YouTubeTranscriptApi().fetch(video_id,languages=['en'])
  s=''
  for i,txt in enumerate(transcript):
    s += txt.text + ' '
except TranscriptsDisabled:
  print('Transcript is disabled')

# print(s)
# print(transcript)

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 30)
chunks = splitter.split_text(s)
print(len(chunks))
chunks

14


['how to talk about hometown I live in a city called London this is not London England it is a city in',
 "England it is a city in Canada some of Canada's cities have the same names as English cities London",
 'as English cities London Canada has many restaurants and cafes for entertainment you can go to the',
 'you can go to the movie theater or art gallery there is a famous university in London called',
 'university in London called Western University it is not close to the ocean or lake so there are no',
 'ocean or lake so there are no beaches it is a multicultural City so there are churches temples and',
 "are churches temples and mosques I've been living in London for 10 years my favorite part of this",
 'my favorite part of this city is the nature there are many parks and forests where you can go for',
 'forests where you can go for walks runs or have picnics With Friends the worst thing about London',
 'the worst thing about London is that it is far away from other big cities in

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vector_store = FAISS.from_texts(chunks, embeddings)

In [None]:
vector_store.index_to_docstore_id

{0: '8e9fa741-2f4f-4b84-86be-4d38fccdd23a',
 1: 'd9962150-68de-4bb7-92a0-0fcdf189e21c',
 2: '5a38ae19-c1a8-401a-a257-7efd094b1f60',
 3: 'c61371fa-f233-412f-a0d6-bd2fd1983192',
 4: '5a520529-31e6-42e6-9aeb-4a3b0723db7d',
 5: '4a487670-a7fa-4291-91d7-c264087ac636',
 6: 'e8b59808-2ba6-4877-882f-7ffb9c50003e',
 7: 'ae938929-f859-44f5-960d-8d5b5420d75c',
 8: 'f892d5b8-29c4-46c7-8971-ce851964e3a5',
 9: '57fc5b2f-2141-4220-903a-2289a876f67a',
 10: 'aaed84cf-1b55-42be-8de2-5dd58b771993',
 11: '9a2b727c-2360-41b3-b948-64801f3bc647',
 12: '3c05b35f-4b96-4979-9d19-71fb6b11cb95',
 13: '27a85a93-8958-47f4-b73a-2477d82793c1'}

In [None]:
vector_store.get_by_ids(['ab9c1c04-29fe-4c01-9f84-c64275b8e564'])

[]

In [None]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [None]:
retriever.invoke("Topic of Video")

[Document(id='c61371fa-f233-412f-a0d6-bd2fd1983192', metadata={}, page_content='you can go to the movie theater or art gallery there is a famous university in London called'),
 Document(id='8e9fa741-2f4f-4b84-86be-4d38fccdd23a', metadata={}, page_content='how to talk about hometown I live in a city called London this is not London England it is a city in'),
 Document(id='27a85a93-8958-47f4-b73a-2477d82793c1', metadata={}, page_content='I enjoy living in London [Music]'),
 Document(id='4a487670-a7fa-4291-91d7-c264087ac636', metadata={}, page_content='ocean or lake so there are no beaches it is a multicultural City so there are churches temples and')]

In [None]:
model = ChatGroq(model='llama-3.3-70b-versatile',api_key=API_KEY)

In [None]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.
      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)


In [None]:
question = 'Topic of this video?'
docs = retriever.invoke(question)

In [None]:
context_text = "\n\n".join(doc.page_content for doc in docs)

In [None]:
final_prompt = prompt.invoke({"context": context_text, "question": question})
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know.\n      how to talk about hometown I live in a city called London this is not London England it is a city in\n\nthe worst thing about London is that it is far away from other big cities in Canada you need to\n\nforests where you can go for walks runs or have picnics With Friends the worst thing about London\n\nI enjoy living in London [Music]\n      Question: Topic of this video?\n    ")

In [None]:
result = model.invoke(final_prompt)
result.content

"The topic of this video is the speaker's hometown, London (in Canada), and their thoughts about living there, including both positive and negative aspects."

WITH CHAIN

In [None]:
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

def RAG(user_question = 'Forms of entertainement in Hometown'):

  chain = prompt | model | parser
  result = chain.invoke({
      "context": context_text,
      "question": user_question
  })
  return result

RAG()

'In London, some forms of entertainment or leisure activities include going for walks, runs, or having picnics with friends in the forests.'