In [15]:
from sentence_transformers import SentenceTransformer
import numpy as np
import os
import glob
import chromadb
from chromadb.config import Settings

In [13]:
MODEL_NAME = "all-MiniLM-L6-v2"
DB_PATH = r"C:\Users\rauna\projects\llm_engineering\My Projects\Drug Chatbot\ChromaDB\chroma_db"
COLLECTION_NAME = "document_embeddings"

In [16]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [17]:
chroma_client = chromadb.PersistentClient(path=DB_PATH)
collection = chroma_client.get_collection(COLLECTION_NAME)

In [18]:
def retrieve_documents(query: str, n_results: int = 5):
    """
    Call your vector store and get top-k relevant chunks.
    """
    # Important: you added embeddings manually, so query with query_embeddings
    query_embedding = model.encode(query).tolist()

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=n_results
    )
    return results

In [19]:
def build_context(results) -> str:
    """
    Convert Chroma results into a single context string.
    """
    docs = results["documents"][0]      # list of texts
    metadatas = results["metadatas"][0]  # list of metadata dicts

    context_blocks = []
    for doc, meta in zip(docs, metadatas):
        filename = meta.get("filename", "unknown_source.txt")
        context_blocks.append(f"Source: {filename}\n{doc}")

    context = "\n\n---\n\n".join(context_blocks)
    return context

In [32]:
from dotenv import load_dotenv
import os
from openai import OpenAI
from IPython.display import Markdown, display
import gradio as gr
import ast

load_dotenv(override=True)
openrouter_api_key = os.getenv('OPENROUTER_API_KEY')

if openrouter_api_key:
    print(f"OpenRouter API Key exists and begins {openrouter_api_key[:3]}")
else:
    print("OpenRouter API Key not set (and this is optional)")

openrouter_url = "https://openrouter.ai/api/v1"

openrouter = OpenAI(base_url=openrouter_url, api_key=openrouter_api_key)

MODEL = 'google/gemini-2.5-flash-lite'


OpenRouter API Key exists and begins sk-


In [33]:
def call_llm(model, system_message, user_message):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
    ]
    response = openrouter.chat.completions.create(model=model, messages=messages)
    return response.choices[0].message.content


def call_llm_with_history(model, system_message, history, user_message):
    messages = [{"role": "system", "content": system_message}] + history + [
        {"role": "user", "content": user_message}
    ]
    response = openrouter.chat.completions.create(model=model, messages=messages)
    return response.choices[0].message.content


def safe_eval(text):
    return ast.literal_eval(text)

In [34]:
def rag_answer(message: str) -> str:
    """
    Full RAG pipeline:
    1) Retrieve from Chroma
    2) Build context
    3) Ask LLM using that context
    """
    results = retrieve_documents(message)
    context = build_context(results)

    system_message = f"""You are a helpful drug information assistant.

Use ONLY the following context to answer the question. 
If the answer is not in the context, say you don't know.

Context:
{context}

Question: {message}

Answer:"""

    answer = call_llm(MODEL, system_message, message)
    return answer

In [55]:
message = ('is paracetamol male or female')
results = retrieve_documents(message)
context = build_context(results)
system_message = f"""You are a helpful drug information assistant.

Use ONLY the following context to answer the question. 
If the answer is not in the context, say you don't know.

Context:
{context}

Question: {message}

Answer:"""
print(system_message)

You are a helpful drug information assistant.

Use ONLY the following context to answer the question. 
If the answer is not in the context, say you don't know.

Context:
Source: file_11.txt
Topic: Weaknesses and areas for growth

## Paracetamol: Strengths and Areas for Growth

Paracetamol, a familiar name in many households, is a highly effective and widely used analgesic and antipyretic. Its strengths lie in its accessibility, affordability, and a generally favorable safety profile when used as directed. However, like any pharmaceutical, Paracetamol isn't without its limitations and areas where further understanding and cautious application are crucial.

### Key Strengths:

*   **Ubiquitous Accessibility and Affordability:** One of Paracetamol's most significant strengths is its widespread availability. It's an over-the-counter medication found in virtually every pharmacy, supermarket, and even convenience store. This ease of access makes it a go-to for immediate relief from common ai

In [56]:
rag_answer(system_message)

'Paracetamol is male.'

Category 2 : Drug Information
['paracetamol']
paracetamol_agent is running
Category 2 : Drug Information
['paracetamol']
paracetamol_agent is running


In [45]:
def cat_agent(message):
    system_message = '''You are a helpful assistant that catergorizes the given message into two catergories.
                        Category 1 : General Query
                        Category 2 : Drug Information
                        If the message contains any drug or medicine name then put it into Category 2 : Drug Information
                        Return only the category decision of the whole message.
    '''
    category = call_llm(MODEL, system_message, message)
    print(category)
    return category

def reply_normal_agent(message,history):
    system_message = 'You are a helpful assistant that can answer questions and help with tasks.'
    response = call_llm_with_history(MODEL, system_message,history, message)
    return response


def paracetamol_agent(message):
    print('paracetamol_agent is running')
    # system_message = '''You are a helpful assistant that answers questions only related to paracetamol and nothing else.
    #                     If any other drug is mentioned then completly ignore its mention and do not talk about it.
    #                     Return results in Dict format where the keys are different medicine names
    #                     Return ONLY a Python dict.
    #                     Do NOT wrap the response in backticks.
    #                     Do NOT use JSON.
    #                     Do NOT add explanations.
    #                     Example : user message : what is paracetamol. what is insulin
    #                               assistant message : {'paracetamol' : 'details about paracetamol',
    #                                                     'insulin' : 'details about insulin'}'''
    raw_paractamol_output = rag_answer(message)
    # paracetamol = safe_eval(raw_paractamol_output)
    # return paracetamol['paracetamol']
    return raw_paractamol_output


def insulin_agent(message):
    print('insulin_agent is running')
    # system_message = '''You are a helpful assistant that answers questions only related to insulin and nothing else.
    #                     If any other drug is mentioned then completly ignore its mention and do not talk about it.
    #                     Return results in Dict format where the keys are different medicine names
    #                     Return ONLY a Python dict.
    #                     Do NOT wrap the response in backticks.
    #                     Do NOT use JSON.
    #                     Do NOT add explanations.
    #                     Example : user message : what is paracetamol. what is insulin
    #                               assistant message : {'paracetamol' : 'details about paracetamol',
    #                                                     'insulin' : 'details about insulin'}'''
    raw_insulin_output = rag_answer(message)
    # insulin = safe_eval(raw_insulin_output)
    # return insulin['insulin']
    return raw_insulin_output

def drug_id(message):
    system_message = '''You are a helpful assistant that picks out all the drug names in a query and gives a list of the drug names present in the query.
                        Return only python list
                        Fix the typos as well if you find any'''
    drugs_raw = call_llm(MODEL, system_message, message)
    drugs = safe_eval(drugs_raw)
    print(drugs)

    results = []

    for drug in drugs:
        if drug.lower() == 'paracetamol':
            results.append(paracetamol_agent(message))

        elif drug.lower() == 'insulin' :
            results.append(insulin_agent(message)) 
        
        else :
            results.append(f"Invalid drug {drug}. I only answer questions related to paracetamol and insulin")

    return results



In [46]:
def orch(message,history) :
    catergory = cat_agent(message)
    if 'General Query' in catergory:
        return reply_normal_agent(message,history)
    elif 'Drug Information' in catergory:
        return drug_id(message)
    else :
        return "Sorry, I could not categorize your question. Please try rephrasing it."

In [None]:
gr.ChatInterface(fn=orch, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




Category 2 : Drug Information
['paracetamol']
paracetamol_agent is running
