In [25]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFacePipeline,ChatHuggingFace
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

## Step 1a - Indexing (Document Ingestion

In [26]:
video_id = "bgzP3yn1kNE" # only the ID, not full URL
try:
    # If you don’t care which language, this returns the “best” one
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

    # Flatten it to plain text
    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)

except TranscriptsDisabled:
    print("No captions available for this video.")

it is truly wonderful to see you where are we we're in central London we've come to Mayfair now we are going to a very very special Indian restaurant I went to verish Swami now it's got one Michelin star and loads of you turned around and said Gary there's a better one I was thinking wow there can't be a better one is there gave me a recommendation what is it Jim Cara now it specializes in Northern Indian food and it hasn't got one it's got two to michelan Stars so it's got to be seriously up there you asked me to go here quite a while ago it's Tak me ages to get a booking 3 months waiting list so if you're ever interested in coming here you're going to have to be a bit patient can't wait for it as always got to be careful here let's give it a go kind it has to be very patient the road is suddenly become padilly circus not Mayfair I won't pass go and I won't collect 200 come on look at the menu outside it's certainly not for the Fain hearted I'm going to give you one simple example her

In [27]:
transcript_list

[{'text': 'it is truly wonderful to see you where',
  'start': 0.56,
  'duration': 3.799},
 {'text': "are we we're in central London we've",
  'start': 2.44,
  'duration': 5.16},
 {'text': 'come to Mayfair now we are going to a',
  'start': 4.359,
  'duration': 6.841},
 {'text': 'very very special Indian restaurant I',
  'start': 7.6,
  'duration': 5.32},
 {'text': "went to verish Swami now it's got one",
  'start': 11.2,
  'duration': 3.92},
 {'text': 'Michelin star and loads of you turned',
  'start': 12.92,
  'duration': 4.48},
 {'text': "around and said Gary there's a better",
  'start': 15.12,
  'duration': 4.079},
 {'text': "one I was thinking wow there can't be a",
  'start': 17.4,
  'duration': 4.119},
 {'text': 'better one is there gave me a', 'start': 19.199, 'duration': 6.24},
 {'text': 'recommendation what is it Jim Cara now',
  'start': 21.519,
  'duration': 6.441},
 {'text': 'it specializes in Northern Indian food',
  'start': 25.439,
  'duration': 5.041},
 {'text': "and 

## Step 1b - Indexing (Text Splitting)

In [28]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [29]:
len(chunks)

19

In [30]:
chunks[18]

Document(metadata={}, page_content="but that's an experience an experience that if you get the opportunity definitely definitely give it a go certainly at the high end I'm not silly it's really at the high end what marks I'm going to give it loads of you recommended fantastic recommendation it's quality that's the thing that's the standout quality in every aspect every aspect I'm going to give it a 10 out of 10 I've got to give it a 10 out of 10 because each dish that I had there was amongst the best if not the best I've ever had in any Indian restaurant ever and I love the whole experience if you recommended Jim carner I want to thank you personally thank you very very much if you've been here please tell me what you think if you can like And subscribe to the channel you know I always appreciate that and until next time very lucky person and I recognize that bye-bye that was a brilliant recommendation w")

## Step 1c & 1d - Indexing (Embedding Generation and Storing in Vector Store)

In [31]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
vector_store = FAISS.from_documents(chunks, embeddings)

In [32]:
vector_store.index_to_docstore_id

{0: '07fa68dd-99c0-4f89-8942-36a85dfec4c4',
 1: '9ae5aa62-150b-4013-8992-c66e67afe6ac',
 2: '44d49128-bf66-4472-adba-bacc1628ec97',
 3: 'fc99891e-12b1-4179-9925-019f1024ed2d',
 4: '2badafd6-bd5f-4288-8f3d-ba5268c6d557',
 5: 'fb499053-54ac-48fc-9a65-6320618fea51',
 6: '4c372e0a-a556-4314-bb19-8d1c03084a4e',
 7: '5d63c436-2b91-449f-a765-66db23599cc5',
 8: '8d3caeb2-e2eb-412d-96f0-8bcd8689ca38',
 9: '696a22a4-d393-4424-82b2-20f6c2b39f5f',
 10: 'bae240c2-7326-4601-8306-8e8986e637ae',
 11: 'cf3c35e3-8327-4dd6-bda9-c0afe91c0ba1',
 12: 'cf3fa3a9-ec44-434a-baba-725c896e018b',
 13: '292da20e-a7ac-4c48-a069-66e61c4291e3',
 14: '1c5c9e7a-3d09-4a75-bc98-02c9cc182f23',
 15: '1fbc013c-bfcb-4e90-b86e-aa7c80d8aa36',
 16: 'ebae6af9-a237-48fb-8c4a-5def6b899410',
 17: 'f113aba7-8790-4c95-8810-813197ca3475',
 18: '1a0b0e9c-60ee-43f4-93da-c54157631990'}

In [33]:
vector_store.get_by_ids(['a1c27a4d-37ab-4047-99f8-5f4090c2b93d'])

[]

## Step 2 - Retrieval

In [34]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [35]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002B21AC8F4A0>, search_kwargs={'k': 4})

In [36]:
retriever.invoke('What is the best dish here')

[Document(id='1a0b0e9c-60ee-43f4-93da-c54157631990', metadata={}, page_content="but that's an experience an experience that if you get the opportunity definitely definitely give it a go certainly at the high end I'm not silly it's really at the high end what marks I'm going to give it loads of you recommended fantastic recommendation it's quality that's the thing that's the standout quality in every aspect every aspect I'm going to give it a 10 out of 10 I've got to give it a 10 out of 10 because each dish that I had there was amongst the best if not the best I've ever had in any Indian restaurant ever and I love the whole experience if you recommended Jim carner I want to thank you personally thank you very very much if you've been here please tell me what you think if you can like And subscribe to the channel you know I always appreciate that and until next time very lucky person and I recognize that bye-bye that was a brilliant recommendation w"),
 Document(id='1fbc013c-bfcb-4e90-b8

## Step 3 - Augmentation

In [37]:
llm = HuggingFacePipeline.from_model_id(
    model_id='google/gemma-2-2b-it',
    task='text-generation',
    pipeline_kwargs=dict(
        temperature=0.5,
        max_new_tokens=100
    ) 
)

model = ChatHuggingFace(llm=llm)

Loading checkpoint shards: 100%|██████████| 2/2 [00:17<00:00,  8.66s/it]
Device set to use cpu


In [38]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [39]:
question          = "is the topic of Indian food discussed in this video? if yes then what was discussed"
retrieved_docs    = retriever.invoke(question)

In [40]:
retrieved_docs

[Document(id='ebae6af9-a237-48fb-8c4a-5def6b899410', metadata={}, page_content="Jim carner one of the most expensive and one of the best Indian restaurants in the world I can confirm both I love the inside I love the service I like the ambience everything on there was top draw the food wow wow wow wow seriously that food was truly incredible every mouthful every every aspect of it you put it in it had the flavor oh it it was brilliant cameraman he loved it as well so both of us thought that was great onto the bill starter 12250 uh that butter masala £32 the prawn Curry £ 33b Bread Basket 12250 uh the rice £7 and the dessert uh £10 service charge in there uh 15% 12 £ 3£ 123 that's mindblowing In fairness I did order a lot there there enough for two people but that's still an awful lot of money but then again I am looking at quality would I come here all the time you know would I come here all the time no no there's no chance but would is it somewhere you would go for that really special

In [41]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"Jim carner one of the most expensive and one of the best Indian restaurants in the world I can confirm both I love the inside I love the service I like the ambience everything on there was top draw the food wow wow wow wow seriously that food was truly incredible every mouthful every every aspect of it you put it in it had the flavor oh it it was brilliant cameraman he loved it as well so both of us thought that was great onto the bill starter 12250 uh that butter masala £32 the prawn Curry £ 33b Bread Basket 12250 uh the rice £7 and the dessert uh £10 service charge in there uh 15% 12 £ 3£ 123 that's mindblowing In fairness I did order a lot there there enough for two people but that's still an awful lot of money but then again I am looking at quality would I come here all the time you know would I come here all the time no no there's no chance but would is it somewhere you would go for that really special occasion yeah enough food for two people you got to remember that the other\n\

In [42]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [43]:
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know.\n\n      Jim carner one of the most expensive and one of the best Indian restaurants in the world I can confirm both I love the inside I love the service I like the ambience everything on there was top draw the food wow wow wow wow seriously that food was truly incredible every mouthful every every aspect of it you put it in it had the flavor oh it it was brilliant cameraman he loved it as well so both of us thought that was great onto the bill starter 12250 uh that butter masala £32 the prawn Curry £ 33b Bread Basket 12250 uh the rice £7 and the dessert uh £10 service charge in there uh 15% 12 £ 3£ 123 that's mindblowing In fairness I did order a lot there there enough for two people but that's still an awful lot of money but then again I am looking at quality would I come here all the time you know would 

## Step 4 - Generation

In [44]:
answer = model.invoke(final_prompt)
print(answer.content)



<bos><start_of_turn>user
You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      Jim carner one of the most expensive and one of the best Indian restaurants in the world I can confirm both I love the inside I love the service I like the ambience everything on there was top draw the food wow wow wow wow seriously that food was truly incredible every mouthful every every aspect of it you put it in it had the flavor oh it it was brilliant cameraman he loved it as well so both of us thought that was great onto the bill starter 12250 uh that butter masala £32 the prawn Curry £ 33b Bread Basket 12250 uh the rice £7 and the dessert uh £10 service charge in there uh 15% 12 £ 3£ 123 that's mindblowing In fairness I did order a lot there there enough for two people but that's still an awful lot of money but then again I am looking at quality would I come here all the time you know would I come here

## Building a Chain

In [45]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [46]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [47]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [48]:
parallel_chain.invoke('what is Jim Carner?')

{'context': "but that's an experience an experience that if you get the opportunity definitely definitely give it a go certainly at the high end I'm not silly it's really at the high end what marks I'm going to give it loads of you recommended fantastic recommendation it's quality that's the thing that's the standout quality in every aspect every aspect I'm going to give it a 10 out of 10 I've got to give it a 10 out of 10 because each dish that I had there was amongst the best if not the best I've ever had in any Indian restaurant ever and I love the whole experience if you recommended Jim carner I want to thank you personally thank you very very much if you've been here please tell me what you think if you can like And subscribe to the channel you know I always appreciate that and until next time very lucky person and I recognize that bye-bye that was a brilliant recommendation w\n\nJim carner one of the most expensive and one of the best Indian restaurants in the world I can confirm

In [49]:
parser = StrOutputParser()

In [50]:
main_chain = parallel_chain | prompt | llm | parser

In [51]:
main_chain.invoke('Can you summarize the video')



'\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don\'t know.\n\n      this is their signature so this is the one everyone goes mad about I\'m going to say the obious is there it\'s buttery it\'s creamy but it has a little little bit of heat a little bit of kick but I\'ve um wow I don\'t know how they make that in that kitchen with that sauce no I can\'t judge it on one so let\'s pick it up let\'s have a bit of chicken on the plate a bit of chicken GES let\'s put that there lots of this sauce this can\'t got to waste on there I want to try that with some of the rice get that there let\'s try try it with just a little bit of rice so give myself in a bit of a kafuffle here it\'s cut into the chicken to say that chicken is tender pulls away his understatement it\'s just come on let\'s get with it in there in there that\'s chicken thigh and what I like the chicken thigh that has got so much fl