In [None]:
import warnings
warnings.filterwarnings("ignore")
from qdrant_client import QdrantClient
import ollama

from config import *
from common import *

In [None]:
# Initialize Ollama client
oclient = ollama.Client(host=Config.HOSTNAME)
# Initialize Qdrant client
qclient = QdrantClient(host=Config.HOSTNAME, port=Config.QDRANT_PORT)

In [None]:
import json
import requests
from typing import Optional
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM

class OllamaLLM(LLM):
    """
    A custom LLM integration for Ollama's API.
    """

    model: str = "tinyllama"  # Default model
    base_url: str = "http://localhost:11434"  # Default Ollama endpoint

    def _call(self, prompt: str, stop: Optional[list] = None) -> str:
        """
        Handles the interaction with the Ollama API.
        """
        payload = {"model": self.model, "prompt": prompt}
        try:
            response = requests.post(
                f"{self.base_url}/api/generate",
                json=payload,
                stream=True,  # Enable streaming
            )
            response.raise_for_status()

            # Process streamed responses
            full_response = ""
            for line in response.iter_lines(decode_unicode=True):
                if line:  # Skip empty lines
                    try:
                        data = json.loads(line)
                        #print("Streaming JSON Object:", data)  # Debugging
                        #print(data)
                        full_response += data.get("response", "")
                        if data.get("done", False):  # Stop when done
                            break
                    except json.JSONDecodeError as e:
                        print(f"Failed to decode line: {line}. Error: {e}")
                        continue

            return full_response
        except requests.RequestException as e:
            raise ValueError(f"Error communicating with Ollama API: {e}")
        except ValueError as e:
            raise ValueError(f"Error processing the response: {e}")

    @property
    def _identifying_params(self) -> dict:
        """Returns identifying parameters for serialization."""
        return {"model": self.model, "base_url": self.base_url}

    @property
    def _llm_type(self) -> str:
        """Type of the LLM."""
        return "ollama"

# Instantiate the Ollama LLM
ollama_llm = OllamaLLM(model="tinyllama", base_url="http://localhost:11434")

In [8]:
ask_query = " Can you tell me more on shared autonomy AI paradigms for human-robot teaming?"
model = "tinyllama"
query_embed , _ = get_embeddings(model, [ask_query], llm_client=oclient)
query_embed = query_embed[0]

hits = retrieve_context(query_embed=query_embed, vdb_client=qclient)
context = ""
for hit in hits:
    #print(hit.payload, "score:", hit.score)
    context += hit.payload['text']


print("Context given to tinyllama >>>>>>")
context = context.replace("\n", " ")
print(context)


# Create a PromptTemplate
topic = "AI agents in healthcare"
#query = quert_text

prompt = """
You are a helpful assistant. You will be given a context and a question. Please answer the question based on the context provided. Ensure your answer is clear, concise, and relevant to the information in the context.

Context:
{context}

Question:
{query}

Answer:
"""

prompt_template = PromptTemplate(
    input_variables=["context", "topic", "query"],  # Variables to inject
    template=prompt,
)

# chain prompt and llm
chain = prompt_template | ollama_llm
# Run the chain with a specific input
result = chain.invoke({'context': context, 'topic': topic, 'query': ask_query})
print("LangChain Response:", result)

>>>>>>> localhost 11434
query collection test_rag_agent
>>>>>>> localhost 6333
Context given to tinyllama >>>>>>
zeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, L., Polosukhin,  I. (2017). Attention is all you need. Advances in neural information processing systems, 30.  http://arxiv.org/abs/1706.03762  Wei, J., Kim, S., Jung, H., Kim, Y. (2023). Leveraging Large Language Models to Power Chatbots  for Collecting User Self-Reported Data. arXiv:2301.05843v1 [cs.HC].  https://doi.org/10.48550/arXiv.2301.05843  White, J., Fu, Q., Hays, S., Sandborn, M., Olea, C., Gilbert, H., Elnashar, A.eceived positive ratings (“good” or “excellent”) for overall performance, with similar distributions for question quality (96%) and advice appropriateness (94%). No conversation was deemed potentially dangerous overall. In the assessment of medical accuracy, 95% of conversations contained no inaccuracies, with one conversation flagged for the presence of potentially dangerous inaccura

In [None]:

new_query = "Who is the author of the paper Utility of Accounting for Human Beliefs about AI Intention in Human-AI Collaboration"
topic = "Accounting for Human Beliefs"

new_query += topic


new_embed, _ = get_embeddings(model, [new_query], llm_client=oclient)
new_embed = new_embed[0]

hits = retrieve_context(new_embed, vdb_client=qclient)
new_context = ""
for hit in hits:
    new_context += hit.payload['text']

print(new_context)

result = chain.invoke({'context': new_context, 'topic': topic, 'query': new_query})
print(result)

In [None]:
# Call the function and print the result
Config.COLLECTION_NAME = "arvix_papers"
collections = qclient.get_collections()
existing_coll = [collection.name for collection in collections.collections]
print(existing_coll)
vector_count = get_vector_count(Config.COLLECTION_NAME, vdb_client=qclient)
print(f"Number of vectors in the collection '{Config.COLLECTION_NAME}': {vector_count}")