In [1]:
from langchain import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import SentenceTransformerEmbeddings
from qdrant_client import QdrantClient
from langchain_community.vectorstores import Qdrant
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set the model name
MODEL_NAME = "Medico"

# Initialize LLM with optimized parameters
local_llm = "BioMistral-7B.Q4_K_M.gguf"
llm = LlamaCpp(
    model_path=local_llm,
    temperature=0.3,
    max_tokens=2048,  # Reduced from 2048
    top_p=1,
    n_ctx=1024,  # Reduced from 2048
    n_batch=512,  # Added batch size for faster processing
    f16_kv=True,  # Use half-precision for key/value cache
    verbose=False  # Disable verbose output for speed
)

In [3]:
# Optimize embeddings
embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")


In [4]:
# Initialize Qdrant client and database
url = "http://localhost:6333"
client = QdrantClient(url=url, prefer_grpc=True)  # Changed to prefer gRPC for faster communication
db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")
retriever = db.as_retriever(search_kwargs={"k": 1})


In [5]:
# Initialize conversation chain
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever)

In [6]:
# Updated prompt template
prompt_template = f"""You are {MODEL_NAME}, an AI assistant specialized in biomedical knowledge. 
Use the following information to answer the user's question. If you don't know the answer, say so honestly.

Chat History: {{chat_history}}
Question: {{question}}

Provide a concise, well-explained answer:
"""

In [7]:
prompt = PromptTemplate(template=prompt_template, input_variables=["chat_history", "question"])


In [8]:
chat_history = []

def predict(message, history):
    if any(phrase in message.lower() for phrase in ["what's your name", "what is your name", "who are you", "your name"]):
        response = f"My name is {MODEL_NAME}. I'm an AI assistant specialized in biomedical knowledge. How can I help you today?"
        print(f"Name asked. Responding with: {response}")  # Debug print
        return response

    # Process the message
    response = chain({"question": message, "chat_history": chat_history})
    answer = response['answer']

    # Update chat history
    chat_history.append((message, answer))

    return answer

# Create a chat interface using Gradio
gr.ChatInterface(
    predict,
    title=f"Chat with {MODEL_NAME}",
    description="I'm an AI assistant specialized in biomedical knowledge. Ask me anything!",
).launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




  warn_deprecated(


Name asked. Responding with: My name is Medico. I'm an AI assistant specialized in biomedical knowledge. How can I help you today?
