In [None]:
# install required packages
!pip install transformers
!pip install langchain[docarray]
!pip install docarray
!pip install pypdf
!pip install langchain_huggingface
!pip install bitsandbytes
!pip install langchain-community
!pip install accelerate
!pip install sentence-transformers


In [None]:
# import necessary libraries
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_core.output_parsers import StrOutputParser
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from transformers import BitsAndBytesConfig
import torch



In [None]:
# load model and tokenizer
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-2-9b-it", use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    "unsloth/gemma-2-9b-it",
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16
)

# create text generation pipeline
text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=64
)

In [None]:
# create HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=text_gen)

# function to apply chat template and get response
def apply_chat_template_and_response(prompt):
    messages = [
    {'role': 'user', 'content': prompt}
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    return llm.invoke(text).replace(text, '')

In [None]:
# create output parser
parser = StrOutputParser()

In [None]:
# create prompt template
template = """
You are a helpful and knowledgeable AI assistant. Use only the information retrieved from the documents to answer the user's question in Persian (Farsi). If the answer is not found in the retrieved context, respond with: "متاسفانه اطلاعاتی در این مورد ندارم." Do not use your own knowledge beyond the provided context. Be accurate, clear, and polite. Never mention the documents or the retrieval process in your response. Just respond naturally in Persian.
Context: {context}

Question: {question}

Answer:

"""

prompt = PromptTemplate.from_template(template)


In [None]:
# load and split documents
loader = PyPDFLoader("Sample.pdf")
pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
text_documents = text_splitter.split_documents(pages)

In [None]:
# create question list
questions = [
    "سن متقاضی در زمان ثبت نام چقدر است؟",
    "میانگین وزنی کل نمرات دوران تحصیل چقدر است؟",
    'مرتبه علمی دبیر کارگروه تخصصی چیست؟',

]

In [None]:
# create embeddings with 'sbunlp/fabert'

"""
# try other embedding models
'heydariAI/persian-embeddings'
'Msobhi/Persian_Sentence_Embedding_v3'
'HooshvareLab/bert-base-parsbert-uncased'
'intfloat/multilingual-e5-base'
"""

embedding_name = 'sbunlp/fabert'
embeddings = HuggingFaceEmbeddings(
    model_name=embedding_name
)

In [None]:
# create vector store and retriever
vectorstore = DocArrayInMemorySearch.from_documents(text_documents, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [None]:
# perform retrieval-augmented generation for each question
for question in questions:
    retrieved_context = retriever.invoke(question)
    formatted_prompt = prompt.format(context=retrieved_context, question=question)
    response_from_model = apply_chat_template_and_response(formatted_prompt)
    parsed_response = parser.parse(response_from_model)

    print(f"Question: {question}")
    print(f"Answer: {parsed_response}")
    print()