### Imports

In [1]:
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
#from langchain.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
#from langchain.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain.prompts import PromptTemplate
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import os

### Load Knowledge Base

In [2]:
#load pdf file
loader3 = PyPDFLoader("BodyPartRecognition.pdf") #your pdf file path
papper = loader3.load_and_split()
print(f"number of pages of the document: {len(papper)}")
knowledgeBase=papper
print(f"number of pages of the knowledgeBase: {len(knowledgeBase)}")

number of pages of the document: 16
number of pages of the knowledgeBase: 16


### Chunking

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts_1024 = text_splitter.split_documents(knowledgeBase)
print(len(texts_1024))
#print(texts_1024[0].page_content)

56


### Embedings

In [5]:
#chose emmbedings model
#model_ = HuggingFaceEmbeddings(model_name="google-bert/bert-base-uncased")
#model_ = HuggingFaceEmbeddings(model_name= "sentence-transformers/all-roberta-large-v1")
model_ = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-bert-base-dot-v5")

### Embeddings in DB

In [6]:
db_name = "chroma_db_wizard"
db_path = os.path.join(db_name)
if os.path.exists(db_path):
    db=Chroma(persist_directory="chroma_db_wizard", embedding_function=model_)
else:
    db = Chroma.from_documents(texts_1024, model_, persist_directory="chroma_db_wizard")

In [7]:
#local model path
model_path = "C:/Users/Ridvan/AppData/Local/nomic.ai/GPT4All/Llama-3.2-3B-Instruct-Q4_0.gguf"

# Callbacks
callbacks = [StreamingStdOutCallbackHandler()]

llm= GPT4All(model=model_path, callbacks=callbacks, verbose=True,n_threads=16, temp=0.5)

### Promp Template

In [8]:
prompt_template='''
You can access the context between BEGININPUT and ENDINPUT tags for the following task. Answer the following question in English and only based on the given context. If these do not contain an answer, say that no answer is possible based on the information given!
USER: 
BEGININPUT{context}ENDINPUT
BEGINING {question} END 
ASSISTANT:
'''
PROMPT=PromptTemplate(
    template=prompt_template,input_variables=["context","question"]
)

In [9]:
chain_type_kwargs={"prompt":PROMPT}
qa_RAG_chain =RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 3}),
    return_source_documents=True,
    verbose=False,
    chain_type_kwargs=chain_type_kwargs
)

In [10]:
#qa_RAG_chain.invoke("what is experiments results?")["result"]
qa_RAG_chain.invoke("What is the subject of the article?")["result"]

The subject of the article appears to be a research work related to computer vision and human-computer interaction, specifically focusing on body part labeling using depth cameras. 

However, I must point out that there seems to be no clear answer within the provided context. The text does not explicitly state what the main topic or subject is.

If you'd like, I can try to provide a possible interpretation based on the content, but please note that it might not be entirely accurate without more information. 

Would you like me to attempt an interpretation?

"The subject of the article appears to be a research work related to computer vision and human-computer interaction, specifically focusing on body part labeling using depth cameras. \n\nHowever, I must point out that there seems to be no clear answer within the provided context. The text does not explicitly state what the main topic or subject is.\n\nIf you'd like, I can try to provide a possible interpretation based on the content, but please note that it might not be entirely accurate without more information. \n\nWould you like me to attempt an interpretation?"