In [1]:
!pip install transformers
!pip install langchain[docarray]
!pip install docarray
!pip install pypdf
!pip install langchain_huggingface
!pip install bitsandbytes
!pip install langchain-community

Collecting pypdf
  Downloading pypdf-5.7.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.7.0-py3-none-any.whl (305 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m305.5/305.5 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.7.0
Collecting langchain_huggingface
  Downloading langchain_huggingface-0.3.0-py3-none-any.whl.metadata (996 bytes)
Downloading langchain_huggingface-0.3.0-py3-none-any.whl (27 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-0.3.0
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cud

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-2-9b-it", use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    "unsloth/gemma-2-9b-it",
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16
)

text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=64
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


In [2]:
from langchain_huggingface import HuggingFaceEmbeddings


embedding_name = 'sbunlp/fabert'
embeddings = HuggingFaceEmbeddings(
    model_name=embedding_name,
    # model_kwargs={"trust_remote_code": True},
)

Some weights of BertModel were not initialized from the model checkpoint at sbunlp/fabert and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from langchain_huggingface.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=text_gen)

def apply_chat_template_and_response(prompt):
    messages = [
    {'role': 'user', 'content': prompt}
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    return llm.invoke(text).replace(text, '')

In [8]:
print(apply_chat_template_and_response("اسمت چیه؟"))

من Gemma هستم، یک مدل زبان بزرگ که توسط Google DeepMind آموزش دیده ام.



In [9]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
response_from_model = apply_chat_template_and_response("Give me an inspirational quote in persian")
parsed_response = parser.parse(response_from_model)
print(parsed_response)

"هر کجا که عشق باشد، امید است."

This translates to: **"Wherever there is love, there is hope."**


Let me know if you'd like another quote!



In [10]:
from langchain.prompts import PromptTemplate

template = """
You are a helpful and knowledgeable AI assistant. Use only the information retrieved from the documents to answer the user's question in Persian (Farsi). If the answer is not found in the retrieved context, respond with: "متاسفانه اطلاعاتی در این مورد ندارم." Do not use your own knowledge beyond the provided context. Be accurate, clear, and polite. Never mention the documents or the retrieval process in your response. Just respond naturally in Persian.
Context: {context}

Question: {question}

Answer:

"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

'\nYou are a helpful and knowledgeable AI assistant. Use only the information retrieved from the documents to answer the user\'s question in Persian (Farsi). If the answer is not found in the retrieved context, respond with: "متاسفانه اطلاعاتی در این مورد ندارم." Do not use your own knowledge beyond the provided context. Be accurate, clear, and polite. Never mention the documents or the retrieval process in your response. Just respond naturally in Persian.\nContext: Here is some context\n\nQuestion: Here is a question\n\nAnswer:\n\n'

In [11]:
context = "من علیرضا هستم و تهران زندگی میکنم."
formatted_prompt = prompt.format(context = context, question="اسم من چیه؟")
response_from_model = apply_chat_template_and_response(formatted_prompt)
parsed_response = parser.parse(response_from_model)
print(parsed_response.replace(formatted_prompt, ""))

اسم شما علیرضا است. 



In [12]:
formatted_prompt = prompt.format(context=context, question="من چند سالمه؟")
response_from_model = apply_chat_template_and_response(formatted_prompt)
parsed_response = parser.parse(response_from_model)
print(parsed_response.replace(formatted_prompt, ""))

متاسفانه اطلاعاتی در این مورد ندارم. 



In [13]:
!wget -O sample_doc.pdf "https://github.com/Alireza-Akhavan/datasets_and_models/raw/main/sample_doc.pdf"

--2025-07-01 16:32:00--  https://github.com/Alireza-Akhavan/datasets_and_models/raw/main/sample_doc.pdf
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Alireza-Akhavan/datasets_and_models/main/sample_doc.pdf [following]
--2025-07-01 16:32:00--  https://raw.githubusercontent.com/Alireza-Akhavan/datasets_and_models/main/sample_doc.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 242084 (236K) [application/octet-stream]
Saving to: ‘sample_doc.pdf’


2025-07-01 16:32:00 (9.44 MB/s) - ‘sample_doc.pdf’ saved [242084/242084]



In [15]:
from langchain_community.document_loaders import PyPDFLoader


loader = PyPDFLoader("sample_doc.pdf")
pages = loader.load_and_split()
#pages = loader.load()
pages



[Document(metadata={'producer': 'Skia/PDF m137 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': '__شیوه نامه ایجاد و ترمیم سابقه تحصیلی - اصلاح شده نهایی 25 فروردین_', 'source': 'sample_doc.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1'}, page_content='ﺑﺎﺳﻣﮫ ﺗﻌﺎﻟﯽ    اﺻﻼح ﻧﺎﻣﮫ ﺷﯾوه اﺟراﯾﯽ  ﻧﺣوه  اﯾﺟﺎد  و  ﺗرﻣﯾم  ﺳﺎﺑﻘﮫ  ﺗﺣﺻﯾﻠﯽ  ھﺎی آزﻣون  ﻧﮭﺎﯾﯽ  دوره  دوم  ﻣﺗوﺳطﮫ  ﻣﺻوب  \nﭘﻧﺟﺎه وھﻔﺗﻣﯾن)\n57\n(\n \nﺟﻠﺳﮫ\n \nﮐﻣﯾﺳﯾون\n \nﻣﻌﯾن\n \nﺷورای ﻋﺎﻟﯽ\n \nآﻣوزش\n \nو\n \nﭘرورش\n \nﺗﺎرﯾﺦ\n \n27/9/1402\n  ﺑﮫ اﺳﺗﻧﺎد  ﻣﺻوﺑﮫ  ﺟﻠﺳﮫ  1053  ﺗﺎرﯾﺦ  04/12/1403  ﻣوﺿوع  ﻋﻧﺎوﯾن  دروس  آزﻣون  ﻧﮭﺎﯾﯽ  دوره  دوم  \nﻣﺗوﺳطﮫ\n \nو\n \nﭼﮕوﻧﮕﯽ\n \nﺗرﻣﯾم\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nﺳواﺑق\n \nﺗﺣﺻﯾﻠﯽ،\n \nﺷﯾوه ﻧﺎﻣﮫ\n \nاﺟراﯾﯽ\n \nﻧﺣوه\n \nاﯾﺟﺎد\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nو\n \nﺗرﻣﯾم\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nآزﻣون ھﺎی\n \nﻧﮭﺎﯾﯽ\n \nدوره\n \nدوم\n \nﻣﺗوﺳطﮫ)اﺻﻼﺣﯾﮫ\n \nﺷﯾوه ﻧﺎﻣﮫ\n \nﻣﺻوب\n \nﭘﻧﺟﺎه\n \nو\n \nھﻔﺗﻣﯾن\n \nﺟﻠﺳﮫ\n \nﮐﻣﯾﺳﯾون\n \nﻣﻌﯾن\n \nﺷورای\n \nﻋﺎﻟﯽ\n \nآﻣوزش\n \nو\n \nﭘرورش\n \nﺗﺎرﯾﺦ\n \n27/

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
text_documents = text_splitter.split_documents(pages)[:5]

pages

[Document(metadata={'producer': 'Skia/PDF m137 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': '__شیوه نامه ایجاد و ترمیم سابقه تحصیلی - اصلاح شده نهایی 25 فروردین_', 'source': 'sample_doc.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1'}, page_content='ﺑﺎﺳﻣﮫ ﺗﻌﺎﻟﯽ    اﺻﻼح ﻧﺎﻣﮫ ﺷﯾوه اﺟراﯾﯽ  ﻧﺣوه  اﯾﺟﺎد  و  ﺗرﻣﯾم  ﺳﺎﺑﻘﮫ  ﺗﺣﺻﯾﻠﯽ  ھﺎی آزﻣون  ﻧﮭﺎﯾﯽ  دوره  دوم  ﻣﺗوﺳطﮫ  ﻣﺻوب  \nﭘﻧﺟﺎه وھﻔﺗﻣﯾن)\n57\n(\n \nﺟﻠﺳﮫ\n \nﮐﻣﯾﺳﯾون\n \nﻣﻌﯾن\n \nﺷورای ﻋﺎﻟﯽ\n \nآﻣوزش\n \nو\n \nﭘرورش\n \nﺗﺎرﯾﺦ\n \n27/9/1402\n  ﺑﮫ اﺳﺗﻧﺎد  ﻣﺻوﺑﮫ  ﺟﻠﺳﮫ  1053  ﺗﺎرﯾﺦ  04/12/1403  ﻣوﺿوع  ﻋﻧﺎوﯾن  دروس  آزﻣون  ﻧﮭﺎﯾﯽ  دوره  دوم  \nﻣﺗوﺳطﮫ\n \nو\n \nﭼﮕوﻧﮕﯽ\n \nﺗرﻣﯾم\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nﺳواﺑق\n \nﺗﺣﺻﯾﻠﯽ،\n \nﺷﯾوه ﻧﺎﻣﮫ\n \nاﺟراﯾﯽ\n \nﻧﺣوه\n \nاﯾﺟﺎد\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nو\n \nﺗرﻣﯾم\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nآزﻣون ھﺎی\n \nﻧﮭﺎﯾﯽ\n \nدوره\n \nدوم\n \nﻣﺗوﺳطﮫ)اﺻﻼﺣﯾﮫ\n \nﺷﯾوه ﻧﺎﻣﮫ\n \nﻣﺻوب\n \nﭘﻧﺟﺎه\n \nو\n \nھﻔﺗﻣﯾن\n \nﺟﻠﺳﮫ\n \nﮐﻣﯾﺳﯾون\n \nﻣﻌﯾن\n \nﺷورای\n \nﻋﺎﻟﯽ\n \nآﻣوزش\n \nو\n \nﭘرورش\n \nﺗﺎرﯾﺦ\n \n27/

In [20]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(text_documents, embedding=embeddings)



In [21]:
query = "نمونه سوال"
retriever = vectorstore.as_retriever()
retriever.invoke(query)


[Document(metadata={'producer': 'Skia/PDF m137 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': '__شیوه نامه ایجاد و ترمیم سابقه تحصیلی - اصلاح شده نهایی 25 فروردین_', 'source': 'sample_doc.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1'}, page_content='ﺑﺎﺳﻣﮫ ﺗﻌﺎﻟﯽ    اﺻﻼح ﻧﺎﻣﮫ ﺷﯾوه اﺟراﯾﯽ  ﻧﺣوه  اﯾﺟﺎد  و  ﺗرﻣﯾم  ﺳﺎﺑﻘﮫ  ﺗﺣﺻﯾﻠﯽ  ھﺎی آزﻣون  ﻧﮭﺎﯾﯽ  دوره  دوم  ﻣﺗوﺳطﮫ  ﻣﺻوب  \nﭘﻧﺟﺎه وھﻔﺗﻣﯾن)\n57\n(\n \nﺟﻠﺳﮫ\n \nﮐﻣﯾﺳﯾون\n \nﻣﻌﯾن\n \nﺷورای ﻋﺎﻟﯽ\n \nآﻣوزش\n \nو\n \nﭘرورش\n \nﺗﺎرﯾﺦ\n \n27/9/1402\n  ﺑﮫ اﺳﺗﻧﺎد  ﻣﺻوﺑﮫ  ﺟﻠﺳﮫ  1053  ﺗﺎرﯾﺦ  04/12/1403  ﻣوﺿوع  ﻋﻧﺎوﯾن  دروس  آزﻣون  ﻧﮭﺎﯾﯽ  دوره  دوم  \nﻣﺗوﺳطﮫ\n \nو\n \nﭼﮕوﻧﮕﯽ\n \nﺗرﻣﯾم\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nﺳواﺑق\n \nﺗﺣﺻﯾﻠﯽ،\n \nﺷﯾوه ﻧﺎﻣﮫ\n \nاﺟراﯾﯽ\n \nﻧﺣوه\n \nاﯾﺟﺎد\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nو\n \nﺗرﻣﯾم\n \nﺳﺎﺑﻘﮫ\n \nﺗﺣﺻﯾﻠﯽ\n \nآزﻣون ھﺎی\n \nﻧﮭﺎﯾﯽ\n \nدوره\n \nدوم\n \nﻣﺗوﺳطﮫ)اﺻﻼﺣﯾﮫ\n \nﺷﯾوه ﻧﺎﻣﮫ\n \nﻣﺻوب\n \nﭘﻧﺟﺎه\n \nو\n \nھﻔﺗﻣﯾن\n \nﺟﻠﺳﮫ\n \nﮐﻣﯾﺳﯾون\n \nﻣﻌﯾن\n \nﺷورای\n \nﻋﺎﻟﯽ\n \nآﻣوزش\n \nو\n \nﭘرورش\n \nﺗﺎرﯾﺦ\n \n27/

In [22]:
retrieved_context = retriever.invoke(query)

In [25]:
questions = [
    "کی از متقاضیان ثبت نام انجام میگیرد؟"
]

for question in questions:
    retrieved_context = retriever.invoke(question)
    formatted_prompt = prompt.format(context=retrieved_context, question=question)
    response_from_model = apply_chat_template_and_response(formatted_prompt)
    parsed_response = parser.parse(response_from_model)

    print(f"Question: {question}")
    print(f"Answer: {parsed_response}")
    print()

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Question: کی از متقاضیان ثبت نام انجام میگیرد؟
Answer: متقاضیان ثبت نام در  آزماون ها ی نهایی  در **مرحله  نوّبّت  خردادماه** انجام میگیرد. 




In [26]:
questions = [
    "نمره کل سابقه چگونه محاسبه میشود؟"
]

for question in questions:
    retrieved_context = retriever.invoke(question)
    formatted_prompt = prompt.format(context=retrieved_context, question=question)
    response_from_model = apply_chat_template_and_response(formatted_prompt)
    parsed_response = parser.parse(response_from_model)

    print(f"Question: {question}")
    print(f"Answer: {parsed_response}")
    print()

Question: نمره کل سابقه چگونه محاسبه میشود؟
Answer: نمره کل سابقه تحصیلی براساس میانگین نمره وزنی نمره های  دروس عمومی و تخصصی محاسبه می شود. 






In [27]:
questions = [
    "نمره کل بر اساس چه بندی و چه مصوبه ای حساب میشه؟"
]

for question in questions:
    retrieved_context = retriever.invoke(question)
    formatted_prompt = prompt.format(context=retrieved_context, question=question)
    response_from_model = apply_chat_template_and_response(formatted_prompt)
    parsed_response = parser.parse(response_from_model)

    print(f"Question: {question}")
    print(f"Answer: {parsed_response}")
    print()

Question: نمره کل بر اساس چه بندی و چه مصوبه ای حساب میشه؟
Answer: نمره کل سابقه تحصیلی بر اساس بند 2 - 2 مصوبه 843 مورخ 15/04/1400 شورای عالی انقلابی فرهنگی است. 




In [31]:
while True:
    print("Say 'exit' or 'quit' to exit the loop")
    question = input('User question: ')
    print(f"Question: {question}")
    if question.lower() in ["exit", "quit"]:
        print("Exiting the conversation. Goodbye!")
        break
    formatted_prompt = prompt.format(context=retrieved_context, question=question)
    response_from_model = apply_chat_template_and_response(formatted_prompt)
    parsed_response = parser.parse(response_from_model)
    print(f"Answer: {parsed_response}")
    print()

Say 'exit' or 'quit' to exit the loop
User question: سلام
Question: سلام
Answer: سلام 


Say 'exit' or 'quit' to exit the loop
User question: نمره کل بر اساس چه بندی و چه مصوبه ای حساب میشه؟
Question: نمره کل بر اساس چه بندی و چه مصوبه ای حساب میشه؟
Answer: نمره کل سوابق تحصیلی بر اساس بند 2 - 2 مصوبه جلسه 843 مورخ 15/04/1400 شورای عالی انقلاب فرهنگی حساب می شود. 


Say 'exit' or 'quit' to exit the loop
User question: قرمه سبزی چه طوری پخته میشه؟
Question: قرمه سبزی چه طوری پخته میشه؟
Answer: متاسفانه اطلاعاتی در مورد این مورد ندارم. 


Say 'exit' or 'quit' to exit the loop
User question: exit
Question: exit
Exiting the conversation. Goodbye!
