In [41]:
from dotenv import load_dotenv

load_dotenv()

True

In [42]:
from youtube_transcript_api import YouTubeTranscriptApi,TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain_community.vectorstores import FAISS , pinecone
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings , ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings

## step 1 : Indexing (Document Ingestion)

In [43]:
video_id='LPZh9BOjkQs'

try:
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
    transcript= " ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)
except TranscriptsDisabled as e:
    print(e)

Imagine you happen across a short movie script that describes a scene between a person and their AI assistant. The script has what the person asks the AI, but the AI's response has been torn off. Suppose you also have this powerful magical machine that can take any text and provide a sensible prediction of what word comes next. You could then finish the script by feeding in what you have to the machine, seeing what it would predict to start the AI's answer, and then repeating this over and over with a growing script completing the dialogue. When you interact with a chatbot, this is exactly what's happening. A large language model is a sophisticated mathematical function that predicts what word comes next for any piece of text. Instead of predicting one word with certainty, though, what it does is assign a probability to all possible next words. To build a chatbot, you lay out some text that describes an interaction between a user and a hypothetical AI assistant, add on whatever the use

## step 1.1 : Text Splitting

In [44]:
splitting=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = splitting.create_documents([transcript])
len(chunks)

10

### step 1.2 : indexing(Embedding Generation and storing in vector store)

In [45]:
embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [46]:
vector_store = FAISS.from_documents(chunks,embedding_model)

In [47]:
vector_store.index_to_docstore_id

{0: '390f6497-af22-4639-8ac8-125c24c169d8',
 1: '343f6225-65a7-427f-b532-c295bfe2d4b7',
 2: 'f8f87704-b182-4375-86d1-9786e839226e',
 3: 'a4d15cc5-0f59-42da-85d7-95fef276c5ba',
 4: '477520bf-98fb-4154-a3e4-1b7bfd169f36',
 5: 'bc044f02-d689-4931-aab4-c91d75506b27',
 6: '4d3719f0-bf56-406c-955a-035ff06f1a5d',
 7: 'e87df059-726c-45c7-8fc8-d6a882eab800',
 8: '3fac5bde-a639-42f1-8c27-74fd5cbf421b',
 9: '186f4c34-fea8-4b3e-b056-26a870cadfd3'}

## step : 2 retrieval 

In [63]:
retriever=vector_store.as_retriever(search_type="similarity",search_kwargs={'k':4})

### step 3: Augmentation 


In [64]:
llm=ChatGroq(model="llama-3.1-8b-instant",temperature=0.5)


In [65]:
prompt=PromptTemplate(
    template='''
    you are a helpful assistant.
    answer only from the provided transcript context.
    if the context is insufficient , just say you don`t know.
    
    {context}
    question:{question}
    ''',
    input_variables=['context','question']
)

In [66]:
question          = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs    = retriever.invoke(question)

### step 4 : generation

In [54]:
final_prompt=prompt.invoke({'context':context_text,'question':question})

In [55]:
result=llm.invoke(final_prompt)
result.content

'The topic discussed in the video is how large language models work, specifically how they are used to build chatbots and the process of generating responses to user input.'

## Building a Chain

In [56]:
from langchain_core.runnables import RunnableParallel , RunnablePassthrough , RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [71]:
def format_docs(retriever):
    context_text='\n\n'.join(doc.page_content for doc in retriever)
    return context_text

In [72]:
parallel_chian = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()                                       
})

In [80]:
parallel_chian.invoke("what is large language model ?")


{'context': "a standard human to read the amount of text that was used to train GPT-3, for example, if they read non-stop 24-7, it would take over 2600 years. Larger models since then train on much, much more. You can think of training a little bit like tuning the dials on a big machine. The way that a language model behaves is entirely determined by these many different continuous values, usually called parameters or weights. Changing those parameters will change the probabilities that the model gives for the next word on a given input. What puts the large in large language model is how they can have hundreds of billions of these parameters. No human ever deliberately sets those parameters. Instead, they begin at random, meaning the model just outputs gibberish, but they're repeatedly refined based on many example pieces of text. One of these training examples could be just a handful of words, or it could be thousands, but in either case, the way this works is to pass in all but the l

In [81]:
parser=StrOutputParser()

In [83]:
main_chain = parallel_chian | prompt | llm | parser 

In [84]:
main_chain.invoke("what is large language model ?")


'A large language model is a sophisticated mathematical function that predicts what word comes next for any piece of text. It uses hundreds of billions of parameters, which are adjusted through a training process involving many example pieces of text.'