In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HUGGING_FACE_API_KEY"] = os.getenv("HUGGING_FACE_API_KEY")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGING_FACE_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

In [None]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://www.britannica.com/biography/A-P-J-Abdul-Kalam")
loader

In [None]:
docs = loader.load()
docs

Load Data -> 
Get into Docs -> 
Convert Text to Chunks -> 
Convert to Vectors (Embedding) ->
Store in Vector DB ->

In [None]:
#Dividing Text to Chunks (Due to restriction in context size)
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)
documents

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
from langchain_community.vectorstores import FAISS
vector_db = FAISS.from_documents(documents, embeddings)
vector_db

In [None]:
query = "On July 27, 2015, Kalam collapsed while delivering a lecture"
result = vector_db.similarity_search(query)
result[0].page_content

In [None]:
from langchain_community.llms import HuggingFaceHub

llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.1",  
    model_kwargs={"temperature": 0.7, "max_new_tokens": 512}
)

In [None]:
#Retrieval Chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """Answer the following questions based on only the context provided.

    <context>
    {context}

    </context>
    """	
    
)
document_chain = create_stuff_documents_chain(llm,prompt)
document_chain


In [None]:
from langchain_core.documents import Document

document_chain.invoke({
    "input": query,
    "context":[
        Document(page_content = "On July 27, 2015, Kalam collapsed while delivering a lecture at the Indian Institute of Managemen")
    ]
}
)

In [None]:
#Retriever
retriever = vector_db.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)
retrieval_chain


In [None]:
#Get the response from LLM
response = retrieval_chain.invoke(
    {
        "input": query,
    }
)
response["answer"]


In [None]:
response['context']