In [None]:
from openai import OpenAI
import pathlib, toml, os
import yaml, json
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer
from jinja2 import Template

In [None]:
config = toml.load("../../.streamlit/secrets.toml")
os.environ["OPENAI_API_KEY"] = config["openai"]["OPENAI_API_KEY"]

In [None]:
qd_client = QdrantClient("http://localhost:6333") #connecting to local Qdrant instance

In [None]:
llm_client = OpenAI()

In [None]:
model_name = 'all-mpnet-base-v2'

model = SentenceTransformer(
    model_name, 
    trust_remote_code=True,
    cache_folder="./models"   # explicitly setting cache location
)
emb_dimensions = model.get_sentence_embedding_dimension()

In [None]:
history_storage = 'data/query_history.jsonl'
# collection_name = "bfp-a3447q_v2"

In [None]:
class Search:
    def __init__(self, qd_client, model, collection_name, model_name, history_storage):
        self.qd_client = qd_client
        self.model = model
        self.collection_name = collection_name
        self.model_name = model_name
        self.history_storage = history_storage

    def search(self, query, limit=5):
        results = self.qd_client.query_points(
            collection_name=self.collection_name,
            query=self.model.encode(query).tolist(),
            limit=limit,
            with_payload=True
        )
        return results.points

    def search_with_history(self, query, limit=5):
        results = self.qd_client.query_points(
            collection_name=self.collection_name,
            query=self.model.encode(query).tolist(),
            limit=limit,
            with_payload=True
        )
        record = {}
        record['query'] = query
        record['ground_truth_points'] = []
        record['limit'] = limit
        record['result_points_scores'] = [(point.id, point.score) for point in results.points]
        with open(self.history_storage, "a+") as f:
            f.write(json.dumps(record) + "\n")
        return results.points

    def rrf_search(self, query: str, limit: int = 5):
        results = self.qd_client.query_points(
            collection_name=self.collection_name,
            prefetch=[
                models.Prefetch(
                    query=self.model.encode(query).tolist(),
                    using=self.model_name,
                    limit=(5 * limit),
                ),
                models.Prefetch(
                    query=models.Document(
                        text=query,
                        model="Qdrant/bm25",
                    ),
                    using="bm25",
                    limit=(5 * limit),
                ),
            ],
            query=models.FusionQuery(fusion=models.Fusion.RRF),
            limit=limit,
            with_payload=True
        )
        return results.points

In [None]:
collection_name = "bfp-a3447q_hybrid"
searcher = Search(qd_client, model, collection_name, model_name, history_storage)

In [None]:
class PromptLoader:
    def __init__(self, path: str = "prompts.yaml"):
        with open(path, "r", encoding="utf-8") as f:
            self.prompts = yaml.safe_load(f)

    def render(self, name: str, **kwargs) -> str:
        """Render a named prompt with given variables."""
        template = Template(self.prompts[name])
        return template.render(**kwargs)

In [None]:
def build_prompt(query, search_results):
    loader = PromptLoader("data/prompts.yaml")
    context = ""
    for index, payload in enumerate(search_results):
        context += f"{index}) Manual:\t{payload.payload['manual']},\nMain Chapter:\t{payload.payload['main_chapter']}\nChapter:\t{payload.payload['chapter']}\nContent: {payload.payload['content']}\n\n"
    return loader.render(
        "assistant_prompt",
        query=query,
        context=context
    )

In [None]:
def llm(client, prompt, model='gpt-5-nano'):
    response = client.chat.completions.create(
        model=model,
        messages=[{'role': 'user', 'content': prompt }]
    )
    return response.choices[0].message.content

In [None]:
def refine_query(query, query_count, verbose = False):
    loader = PromptLoader("data/prompts.yaml") 
    prompt = loader.render(
        "refine_query",
        query=query,
        query_count=query_count
    )
    if verbose:
        print(f"Prompt:\n{prompt}")
        
    llm_queries = []
    trials_count = 0
    while len(llm_queries) != query_count :
        trials_count += 1
        if trials_count >= 3:
            break
        llm_queries = llm(llm_client,prompt).split("\n")
        
    return llm_queries

In [None]:
def rag(query, verbose_search=False, verbose_prompt=False):
    llm_queries = refine_query(query, 2)
    search_set = set()
    search_results = []
    queries = llm_queries.append(query)
    for q in queries:
        print("Query: ", q)
        results = searcher.rrf_search(q, 5)
        result_ids = set([p.id for p in results])
        unique_ids = result_ids - search_set
        for result in results:
            if result.id in unique_ids: #avoid duplicates to be sent to LLM
                search_results.append(result)
        search_set.update(result_ids) 
    print(len(search_results), "results in total search\n")
    if verbose_search:
        print("Query search results:")
        print(*search_results, sep="\n\n")
    prompt = build_prompt(query, search_results)
    if verbose_prompt:
        print("Query prompt output:")
        print(prompt)
    message = llm(llm_client, prompt)
    return message

In [None]:
query = "What is the name of connector, where we can plug in power supply?"

In [None]:
llm_query = refine_query(query,2,verbose=True)

In [None]:
llm_query

In [None]:
answer = rag(query, verbose_search = True)

In [None]:
answer