Creating memory for the LLM:

In [8]:
#importing relevent libs

from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS #facebookAIsimilaritysearch

In [9]:
pdf = "book/"
loader =  DirectoryLoader(pdf, glob="*.pdf", loader_cls=PyPDFLoader)
data = loader.load()

In [10]:
len(data) #pages of the book

816

In [11]:
#splitting text into smaller chunks
textsplitter = RecursiveCharacterTextSplitter(chunk_size = 500, 
                                              chunk_overlap=50)
data_chunks = textsplitter.split_documents(data)

In [12]:
#no of chunks
len(data_chunks)

9846

In [13]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
database = FAISS.from_documents(data_chunks, embedding_model)
database.save_local("storedata/db_faiss")

Memory is created.

Connecting memory with LLM:

In [15]:
#importing relevent libs
import os
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings

In [28]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate

#model_name = "meta-llama/Llama-3.1-8B-Instruct"
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer =  AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)




In [68]:
import torch

device = 0 if torch.cuda.is_available() else -1
pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                temperature=0.7,
                max_new_tokens=512,
                do_sample=True,           # ⬅️ Enable samplin       # ⬅️ Controls randomness
                top_p=0.9)

llm_to_use = HuggingFacePipeline(pipeline=pipe)

prompt = '''
Use the information in the context to answer the question. Only give direct response and answer only if you know, else say "I dont know"
Give a 2 sentence answer only
context: {context}
question: {question}
answer:
'''
prompt_to_feed = PromptTemplate(template=prompt, input_variables=["context","question"])


Device set to use cpu


In [69]:
db = FAISS.load_local("storedata/db_faiss", embedding_model, allow_dangerous_deserialization=True)
qa_chain = RetrievalQA.from_chain_type(
    llm = llm_to_use,
    chain_type="stuff",
    retriever = db.as_retriever(search_kwargs={"k":5}),
    chain_type_kwargs={"prompt":prompt_to_feed}
)
qa_chain.input_keys

['query']

Execution

In [70]:
response = qa_chain.invoke({"query": "What is root canal treatment?"})

# If response includes full prompt, clean it up
ans = response["result"].partition("answer:")
print(ans[-1])



Root canal treatment is a dental procedure that is performed to save a tooth that has become infected with dental caries or has become diseased due to extensive dental caries. The procedure involves removing the infected pulp (the living tissue within the tooth) and the surrounding bacteria and debris, and shaping the root canal into a hollow space that can be filled with a filling material. The procedure is often monitored and is performed under local anesthesia.
