### Imports

In [1]:
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import os

### Load Knowledge Base

In [2]:
#load pdf file
loader3 = PyPDFLoader("BodyPartRecognition.pdf") #your pdf file path
papper = loader3.load_and_split()
print(f"number of pages of the document: {len(papper)}")
knowledgeBase=papper
print(f"number of pages of the knowledgeBase: {len(knowledgeBase)}")

number of pages of the document: 16
number of pages of the knowledgeBase: 16


### Chunking

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts_1024 = text_splitter.split_documents(knowledgeBase)
print(len(texts_1024))
#print(texts_1024[0].page_content)

56


### Embedings

In [4]:
#chose emmbedings model
model_ = HuggingFaceEmbeddings(model_name="google-bert/bert-base-uncased")

  model_ = HuggingFaceEmbeddings(model_name="google-bert/bert-base-uncased")
  from .autonotebook import tqdm as notebook_tqdm
No sentence-transformers model found with name google-bert/bert-base-uncased. Creating a new one with mean pooling.


### Embeddings in DB

In [5]:
db_name = "chroma_db_wizard"
db_path = os.path.join(db_name)
if os.path.exists(db_path):
    db=Chroma(persist_directory="chroma_db_wizard", embedding_function=model_)
else:
    db = Chroma.from_documents(texts_1024, model_, persist_directory="chroma_db_wizard")

  db=Chroma(persist_directory="chroma_db_wizard", embedding_function=model_)


In [6]:
#local model path
model_path = "C:/Users/Ridvan/AppData/Local/nomic.ai/GPT4All/Llama-3.2-3B-Instruct-Q4_0.gguf"

# Callbacks
callbacks = [StreamingStdOutCallbackHandler()]

llm= GPT4All(model=model_path, callbacks=callbacks, verbose=True,n_threads=16, temp=0.5)

### Promp Template

In [7]:
from langchain.prompts import PromptTemplate
prompt_template='''
You can access the context between BEGININPUT and ENDINPUT tags for the following task. Answer the following question in English only, based on the given context. If these do not contain an answer, say that no answer is possible based on the information given!
USER: 
BEGININPUT{context}ENDINPUT
BEGINING {question} END 
ASSISTANT:
'''
PROMPT=PromptTemplate(
    template=prompt_template,input_variables=["context","question"]
)

In [8]:
chain_type_kwargs={"prompt":PROMPT}
qa_RAG_chain =RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 3}),
    return_source_documents=True,
    verbose=False,
    chain_type_kwargs=chain_type_kwargs
)

In [9]:
qa_RAG_chain.invoke("what is experiments results?")["result"]

The experiment's results are as follows:

1. **Classification Accuracy**: Our approach achieved an average classification accuracy of 95% on the synthetic test set and 92% on the real test set.
2. **Joint Prediction Accuracy**: The joint prediction accuracy was higher for the synthetic test set, with an average accuracy of 98%, compared to 94% for the real test set.
3. **Rotation Effect**: We found that the rotation effect had a significant impact on both classification and joint prediction accuracy, with errors increasing by up to 20% when rotating beyond ±120°.
4. **Full 360° Scenario Evaluation**: Evaluating our approach in the full 360° scenario resulted in lower performance compared to the limited rotation scenario.

These results indicate that our semi-local body part classifier can generalize well across a wide range of poses and scenarios, with some limitations observed when considering extreme variability or rotations beyond ±120°.

"The experiment's results are as follows:\n\n1. **Classification Accuracy**: Our approach achieved an average classification accuracy of 95% on the synthetic test set and 92% on the real test set.\n2. **Joint Prediction Accuracy**: The joint prediction accuracy was higher for the synthetic test set, with an average accuracy of 98%, compared to 94% for the real test set.\n3. **Rotation Effect**: We found that the rotation effect had a significant impact on both classification and joint prediction accuracy, with errors increasing by up to 20% when rotating beyond ±120°.\n4. **Full 360° Scenario Evaluation**: Evaluating our approach in the full 360° scenario resulted in lower performance compared to the limited rotation scenario.\n\nThese results indicate that our semi-local body part classifier can generalize well across a wide range of poses and scenarios, with some limitations observed when considering extreme variability or rotations beyond ±120°."