In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_postgres.vectorstores import PGVector
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_core.prompts import ChatPromptTemplate
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig, AutoModel
import torch

In [2]:
# doc = TextLoader("test.txt").load()
# chunks = RecursiveCharacterTextSplitter(
#     chunk_size=500,
#     chunk_overlap=100
# ).split_documents(doc)

embedding_model = HuggingFaceEmbeddings(
    model="Qwen/Qwen3-Embedding-0.6B",
    model_kwargs={
        "device": "cpu",
    }
)

In [4]:


connection = 'postgresql+psycopg://langchain:langchain@localhost:6024/langchain'
# db = PGVector.from_documents(
#     chunks,
#     connection=connection,
#     embedding=embedding_model
# )

db = PGVector(
    connection=connection,
    embeddings=embedding_model
)
retriever = db.as_retriever(search_kwargs={"k" : 3})


In [5]:
model_id = "Qwen/Qwen3-4B-Instruct-2507"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)


tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             device_map="auto",
                                             max_memory={
                                                 0 : "3.5GiB",
                                                 "cpu" : "4.5GiB"
                                             })
pipe = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    max_new_tokens=2000
)
llm = HuggingFacePipeline(pipeline=pipe)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


In [6]:
template = """Answer the question using **only** the following context. Answer it in sharp and short way.

Context:
{context}


Question: {question}

"""

prompt = ChatPromptTemplate.from_template(template)
chain = prompt | llm


def qa(question):
    docs = retriever.invoke(question)
    context = "\n\n".join([d.page_content for d in docs])
    
    results = chain.invoke({
        "context": context,
        "question": question
    })
    
    return results

# Example usage
answer = qa("Who is the leader of ECMA?")
print(answer)


Human: Answer the question using **only** the following context. Answer it in sharp and short way.

Context:
PART I: GENERAL PROVISIONS

1. Short Title
This Directive may be cited as the "Directive for the Dematerialization of Publicly Offered Securities No. 1047/2025."

2. Definitions
In this Directive unless the context otherwise requires:

1/ "Authority" means the Ethiopian Capital Market Authority established under the Capital Market Proclamation No. 1248/2021.

[file name]: standardized_formatted.pdf
[file content begin]

ETHIOPIAN CAPITAL MARKET AUTHORITY
DIRECTIVE NUMBER 1047/2025

DEMATERIALIZATION OF PUBLICLY OFFERED SECURITIES
MARCH 2025

TABLE OF CONTENTS

33. Effective Date
This Directive shall come into force on the date of its registration with the Ministry of Justice and uploading it onto the official website of the Authority.

DONE IN ADDIS ABABA ON THE 5TH DAY OF MARCH 2025

HANA TEHELKU
DIRECTOR GENERAL
ETHIOPIAN CAPITAL MARKET AUTHORITY

[file content end]


Question

In [9]:
rewrite_prompt = ChatPromptTemplate.from_template("""Provide a better search query for legal search on
                                                  legal information about the Ethiopian Capital Market laws to answer the given question.
                                                  End the queries with '**'. Just give me one good query and do it quickly.

                                                  Questions: {x}
                                                  Answer:
""")
rewriter = rewrite_prompt | llm

question = """I woke up today in a very weird day. The temperature was hot. 
I needed a shower so I took one and here I am. Who is the general director of ECMA?"""

result = rewriter.invoke({"x" : question})
print(result)

Human: Provide a better search query for legal search on
                                                  legal information about the Ethiopian Capital Market laws to answer the given question.
                                                  End the queries with '**'. Just give me one good query and do it quickly.

                                                  Questions: I woke up today in a very weird day. The temperature was hot. 
I needed a shower so I took one and here I am. Who is the general director of ECMA?
                                                  Answer:
                                                  The general director of ECMA is Mr. Alemayehu G. Tewolde.

AssistantAssistant: "general director of ECMA Ethiopia capital market authority"**
