In [37]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import faiss

In [38]:
# 1️⃣ استخراج و پیش‌پردازش PDF

with open("processed_text.txt", "r", encoding="utf-8") as f:
    text = f.read()

In [39]:
# 2️⃣ تقسیم متن به بخش‌های کوچک
chunk_size = 1000
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

In [40]:
# 3️⃣ ساخت Embedding و Vector Store با FAISS
from sentence_transformers import SentenceTransformer

embedding_model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

embedder = SentenceTransformer(embedding_model_name, device=device)

# تبدیل chunks به embeddings
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True, device=embedder.device)

# نرمال‌سازی embeddings برای Cosine similarity
chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
chunk_embeddings = chunk_embeddings.cpu().numpy()  # FAISS نیاز به numpy دارد

dim = chunk_embeddings.shape[1]
index = faiss.IndexFlatIP(dim)  # Inner Product = Cosine similarity بعد از normalize
index.add(chunk_embeddings)


In [41]:
model_name = "lightblue/DeepSeek-R1-Distill-Qwen-1.5B-Multilingual"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    dtype=torch.float16
).to(device)


In [42]:
def answer_question(question, top_k=5, max_new_tokens=200):
    # 1️⃣ ساخت embedding سوال
    question_embedding = embedder.encode([question], convert_to_tensor=True, device=embedder.device)
    question_embedding = torch.nn.functional.normalize(question_embedding, p=2, dim=1)
    question_embedding = question_embedding.cpu().numpy()

    # 2️⃣ پیدا کردن بخش‌های مرتبط
    D, I = index.search(question_embedding, top_k)
    relevant_chunks = [chunks[i] for i in I[0]]

    # 3️⃣ آماده‌سازی prompt
    prompt = "متن مرجع:\n" + "\n\n".join(relevant_chunks) + f"\n\nسوال: {question}\nپاسخ:"

    # 4️⃣ تولید پاسخ با کاهش مصرف حافظه
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        repetition_penalty=1.2,
        num_beams=3
    )

    # 5️⃣ استخراج پاسخ نهایی (جدا کردن prompt)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = answer.split("پاسخ:")[-1].strip()
    return answer

In [43]:
question = "خلاصه‌ای از محتوای PDF بده."
print(answer_question(question))

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


**  
"خلاصه از محتوای PDF"
