In [1]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="Qwen/Qwen3-Embedding-4B",
    encode_kwargs={"normalize_embeddings": True},
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
from langchain_postgres import PGEngine

# Replace these variable values
engine = PGEngine.from_connection_string(url="postgresql+psycopg://simon:shehr farsi@localhost/PersianPoetry")

In [4]:
from langchain_postgres import Column

VECTOR_SIZE = 2560

await engine.ainit_vectorstore_table(
    table_name='beyt_vector_big',
    vector_size=VECTOR_SIZE,
    metadata_columns=[
        Column("type", "VARCHAR"),
        Column("book_or_ghazal", "INTEGER"),
        Column("line", "INTEGER"),
        Column("translation", "VARCHAR"),
    ],
)

In [5]:
from langchain_postgres import PGVectorStore

beyt_store = await PGVectorStore.create(
    engine=engine,
    table_name="beyt_vector_big",
    embedding_service=embeddings,
    metadata_columns=["type", "book_or_ghazal", "line", "translation"],
)

## Add beyts to pgvector

In [6]:
import pandas as pd
import psycopg2

db_host = "localhost"
db_name = "PersianPoetry"
db_user = "simon"
db_password = "shehr farsi"

try:
    # Connect to PostgreSQL
    conn = psycopg2.connect(host=db_host, database=db_name, user=db_user, password=db_password)
    cursor = conn.cursor()

    masnavi = pd.read_sql(f"SELECT * FROM masnavi;", conn)
    ghazal = pd.read_sql(f"SELECT * FROM ghazal;", conn)

    cursor.close()
    conn.close()

except psycopg2.Error as e:
    print(f"Error connecting to PostgreSQL: {e}")

  masnavi = pd.read_sql(f"SELECT * FROM masnavi;", conn)
  ghazal = pd.read_sql(f"SELECT * FROM ghazal;", conn)


In [7]:
from langchain_core.documents import Document
import uuid

masnavi_documents = [
    Document(
        page_content = f"{row_data['beyt1']} {row_data['beyt2']}",
        metadata = {
            "id": str(uuid.uuid4()),
            "type": "masnavi",
            "book_or_ghazal": row_data['book'],
            "line": row_data['number'],
            "translation": f"{row_data['beyt1_en']} {row_data['beyt2_en']}"
        }
    )
    for idx, (_, row_data) in enumerate(masnavi.iterrows())
]

ghazal_documents = [
    Document(
        page_content = f"{row_data['beyt1']} {row_data['beyt2']}",
        metadata = {
            "id": idx,
            "type": "ghazal",
            "book_or_ghazal": row_data['ghazal_num'],
            "line": row_data['beyt_num'],
            "translation": "",
        }
    )
    for idx, (_, row_data) in enumerate(ghazal.iterrows())
]

In [8]:
documents = masnavi_documents + ghazal_documents

### Add documents (by batch to avoid postgresql max parameters)

In [9]:
batch_size = 1000

for i in range(0, len(documents), batch_size):
    batch = documents[i:i + batch_size]
    beyt_store.add_documents(batch)

## Add index

In [12]:
# from langchain_postgres.v2.indexes import HNSWIndex

# index = HNSWIndex()
# await beyt_store.aapply_vector_index(index)

### Query

In [16]:
results = beyt_store.similarity_search("در این خاک", k=5)

for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

first_doc = results[0]

if first_doc.metadata['type'] == 'masnavi':
    display(masnavi.query('book == @first_doc.metadata["book_or_ghazal"] and number == @first_doc.metadata["line"]'))
elif first_doc.metadata['type'] == 'ghazal':
    display(ghazal.query('ghazal_num == @first_doc.metadata["book_or_ghazal"] and beyt_num == @first_doc.metadata["line"]'))

* جان از سفر دراز آمد بر خاک در تو بازآمد [{'id': 7419, 'type': 'ghazal', 'book_or_ghazal': 709, 'line': 1, 'translation': ''}]
* پازهر تویی و زهر دنیا دانه تو و دام زندگانی [{'id': 29051, 'type': 'ghazal', 'book_or_ghazal': 2734, 'line': 3, 'translation': ''}]
* ما ذره آفتاب عشقیم ای عشق برآی تا برآییم [{'id': 16327, 'type': 'ghazal', 'book_or_ghazal': 1555, 'line': 4, 'translation': ''}]
* اندر دل ما تویی نگارا غیر تو کلوخ و سنگ خارا [{'id': 1274, 'type': 'ghazal', 'book_or_ghazal': 114, 'line': 1, 'translation': ''}]
* ای ساقی باده معانی درده تو شراب ارغوانی [{'id': 29015, 'type': 'ghazal', 'book_or_ghazal': 2732, 'line': 1, 'translation': ''}]


Unnamed: 0,ghazal_num,beyt_num,beyt1,beyt2
7419,709,1,جان از سفر دراز آمد,بر خاک در تو بازآمد


In [17]:
from langchain_ollama import ChatOllama

rephraser_model = ChatOllama(model="gemma3n:e4b")
rag_llm = ChatOllama(model="gemma3n:e4b")

In [18]:
from langchain import hub
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from timeit import default_timer as timer

rag_prompt_template = PromptTemplate.from_template(
    "You're a RAG system, expert in interpreting Persian poetry and passionate about Jalāl al-Dīn Muḥammad Rumi poems."
    "You answer user questions based on the documents you found."
    "First, you answer precisely the question."
    "Then, you reference all the documents you found and their metadata"
    "For example you will reference them in a list like so: "
    " - Masnavi, Book 4, line 3000:\n * Persian: کس نسازد نقشِ گرمابه و خِضابجز پیِ قصدِ صواب و ناصواب \n * English: ...\n\n"
    " - Ghazal 1475, Beyt 1:\n * Persian: بجوشید بجوشید که ما اهل شعاریم	بجز عشق بجز عشق دگر کار نداریم\n * English: ...\n\n"    
    "You always include both the original Persian version and its English translation."
    "### Question: "
    "{question}"
    "### Retrieved documents:"
    "{context}"
)

rephraser_prompt_template = PromptTemplate.from_template((
    "You're a RAG system, the vector database contain Persian Rumi Poem verses "
    "That have been given to an embedding model "
    "and the user's prompt will be compared to these embedding of poem verses by "
    "a similarity search "
    "Think how the user prompt could be improved to find verses relevant to their question "
    "Make sure to expand the user question with its unwritten objective, and "
    "if there's symbols or metaphors related to the question make sure to include them."
    "Only include the prompt, don't include your thoughts, don't include and make it one sentence only."
    "Here's the user prompt to rephrase: {question}"
))

# Define state for application
class State(TypedDict):
    question: str
    rephrased_question: str
    context: List[Document]
    answer: str
    rephrase_time_elapsed: float
    retrieve_time_elapsed: float
    generate_time_elapsed: float

def rephrase(state: State):
    start = timer()
    rephrased_prompt = rephraser_prompt_template.invoke({"question": state["question"]})
    rephrased_question = rephraser_model.invoke(rephrased_prompt)
    end = timer()

    return {
        "rephrased_question": rephrased_question.content,
        "rephrase_time_elapsed": end - start
    }

def retrieve(state: State):
    start = timer()
    retrieved_docs = beyt_store.similarity_search(state["rephrased_question"], k=5)
    end = timer()

    return {
        "context": retrieved_docs,
        "retrieve_time_elapsed": end - start
    }

def generate(state: State):
    start = timer()
    docs_content = ""

    for doc in state["context"]:
        docs_content += doc.page_content + "\n"
        docs_content += ", ".join([f"{key}: {val}" for key, val in doc.metadata.items()]) + "\n\n"

    messages = rag_prompt_template.invoke({"question": state["question"], "context": docs_content})
    response = rag_llm.invoke(messages)
    end = timer()

    return {
        "answer": response.content,
        "generate_time_elapsed": end - start
    }


# Compile application and test
graph_builder = StateGraph(State).add_sequence([rephrase, retrieve, generate])
graph_builder.add_edge(START, "rephrase")
graph = graph_builder.compile()

In [19]:
def print_response(response):
    print(f'Question: {response["question"]}')
    print(f'Rephrased as: {response["rephrased_question"]}')
    print("-" * 20)
    print(response["answer"])
    print()
    print()
    print("Elapsed times:")
    print("-" * 20)
    print(f'rephrase_time_elapsed: {response["rephrase_time_elapsed"]}')
    print(f'retrieve_time_elapsed: {response["retrieve_time_elapsed"]}')
    print(f'generate_time_elapsed: {response["generate_time_elapsed"]}')

In [20]:
response = graph.invoke({"question": "I like this verse from Parvin Etesami: ز آب چشمه و باران نمی‌شود خاموش که آتشی که در اینجاست آتش جگریست Did Mawlana write anything similar? "})
print_response(response)

Question: I like this verse from Parvin Etesami: ز آب چشمه و باران نمی‌شود خاموش که آتشی که در اینجاست آتش جگریست Did Mawlana write anything similar? 
Rephrased as: Considering the user appreciates the verse's imagery of suppressed passion ("آتش جگریست" - a fire of yearning) and longing, and seeks similar sentiment from Rumi, can you find any of his verses that explore themes of hidden desires, internal flames, or passionate yearning, particularly those using metaphors of fire, water, or nature to convey these emotions?




--------------------
Yes, Rumi has written verses that express a similar sentiment to the poem by Parvin Etesami. Here are a few examples:

- از غم و اندهان من سوخت درون جان من جمله فروغ آتشین تا به کیش نهان کنم
 * Persian: گر نه آتش می زند آتش رخی در جان نهان پس دماغ عاشقان پرآتش و پرباد چیست
 * English: If fire does not burn, what is the hidden fire in the face? Then what is the state of the lovers, full of fire and breath?
- جان من از بحر عشق آب چو آتش بخورد در ق

In [None]:
response = graph.invoke({"question": "I heard a poem starting with 'in this earth, in this earth, in this pure farm'. Can you find it for me?"})
print_response(response)

Question: I heard a poem starting with 'in this earth, in this earth, in this pure farm'. Can you find it for me?
Rephrased as: Find Persian Rumi verses that begin with the phrase "in this earth, in this earth, in this pure farm," focusing on themes of earthly existence, purity, and spiritual cultivation.




--------------------
The phrase "in this earth, in this earth, in this pure farm" is found in the following document:

- Masnavi, Book 4, line 1762:
 * Persian: این زمین را ریعِ او خود بی‌حَد است دانه‌یی را، کمترین خود هفصد است
 * English: Verily, the produce of this earth is infinite: even the least for a single seed is seven-hundredfold.




Elapsed times:
--------------------
rephrase_time_elapsed: 2.0013440829998217
retrieve_time_elapsed: 0.1360577919995194
generate_time_elapsed: 5.2828967499999635


In [None]:
response = graph.invoke({"question": "My friend has her birthday soon, can you give me some relevant verse?"})
print_response(response)

Question: My friend has her birthday soon, can you give me some relevant verse?
Rephrased as: "Find Persian Rumi verses about celebrating life, joy, and the beauty of friendship, particularly those expressing blessings and well wishes for a loved one's upcoming birthday."
--------------------
Here are some verses from Rumi that might be suitable for your friend's birthday, focusing on themes of joy, love, and celebration.

- Ghazal 1686, Line 11:
 * Persian: هر جا رَوی بیایم، هر جا رَوَم بیایی در مرگ و زندگانی با تو خوشم، خوشَستَم
 * English: Wherever you go, I come. Wherever you go, I am with you in life and death. I am pleased with you, it is good.

- Ghazal 1402, Line 7:
 * Persian: گفتم عشق را شبی راست بگو تو کیستی گفت حیات باقیم عمر خوش مکررم
 * English: I asked love one night, "Tell me, who are you?" Life replied, "I am the remaining of your life, repeat me often."

- Ghazal 1734, Line 15:
 * Persian: شهری پر از عشق و فرح بر دست هر مستی قدح این سوی نوش آن سوی صح این جوی شیر و آن 