In [1]:
import warnings
warnings.filterwarnings("ignore")
from qdrant_client import QdrantClient
import ollama
from config import *
from common import *

In [2]:
# Initialize Ollama client
oclient = ollama.Client(host=Config.HOSTNAME)
# Initialize Qdrant client
qclient = QdrantClient(host=Config.HOSTNAME, port=Config.QDRANT_PORT)

In [3]:
import json
import requests
from typing import Optional
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM

class OllamaLLM(LLM):
    """
    A custom LLM integration for Ollama's API.
    """

    model: str = "tinyllama"  # Default model
    base_url: str = "http://localhost:11434"  # Default Ollama endpoint

    def _call(self, prompt: str, stop: Optional[list] = None) -> str:
        """
        Handles the interaction with the Ollama API.
        """
        payload = {"model": self.model, "prompt": prompt}
        try:
            response = requests.post(
                f"{self.base_url}/api/generate",
                json=payload,
                stream=True,  # Enable streaming
            )
            response.raise_for_status()

            # Process streamed responses
            full_response = ""
            for line in response.iter_lines(decode_unicode=True):
                if line:  # Skip empty lines
                    try:
                        data = json.loads(line)
                        #print("Streaming JSON Object:", data)  # Debugging
                        #print(data)
                        full_response += data.get("response", "")
                        if data.get("done", False):  # Stop when done
                            break
                    except json.JSONDecodeError as e:
                        print(f"Failed to decode line: {line}. Error: {e}")
                        continue

            return full_response
        except requests.RequestException as e:
            raise ValueError(f"Error communicating with Ollama API: {e}")
        except ValueError as e:
            raise ValueError(f"Error processing the response: {e}")

    @property
    def _identifying_params(self) -> dict:
        """Returns identifying parameters for serialization."""
        return {"model": self.model, "base_url": self.base_url}

    @property
    def _llm_type(self) -> str:
        """Type of the LLM."""
        return "ollama"

# Instantiate the Ollama LLM
ollama_llm = OllamaLLM(model="tinyllama", base_url="http://localhost:11434")

In [4]:
ask_query = " Can you tell me something about distributed quantum computing"
model = "tinyllama"
query_embed , _ = get_embeddings(model, [ask_query], llm_client=oclient)
query_embed = query_embed[0]

hits = retrieve_context(query_embed=query_embed, vdb_client=qclient)
context = ""
for hit in hits:
    #print(hit.payload, "score:", hit.score)
    context += hit.payload['text']

#print(context)


# Create a PromptTemplate
topic = "quantum computing"
#query = quert_text

prompt = """
You are an assistant who answers questions based on provided context and a specific topic.
Keep the response succinct under 50 words. Exclude the context and query in final answer 
context and query text in the answer.

Context:
{context}

Topic:
{topic}

Question:
{query}

Answer:
"""

prompt_template = PromptTemplate(
    input_variables=["context", "topic", "query"],  # Variables to inject
    template=prompt,
)

# chain prompt and llm
chain = prompt_template | ollama_llm
# Run the chain with a specific input
result = chain.invoke({'context': context, 'topic': topic, 'query': ask_query})
print("LangChain Response:", result)

>>>>>>> localhost 11434
>>>>>>> localhost 6333
LangChain Response: Sure! Here's a succinct version of my answer to your question:

Yes, I can provide you with some information about distributed quantum computing. Distributed quantum computing refers to the use of quantum mechanics for performing complex calculations on distributed nodes over an Internet-scale network. This involves using quantum computers to solve problems that are computationally intensive but impossible to solve using classical computers. Examples of such problems include cryptography, signal processing, and drug discovery. In terms of applications, distributed quantum computing has the potential to revolutionize various industries like finance, healthcare, and manufacturing, as it can perform highly complex calculations in real-time, without any need for storage or centralized control. Overall, distributed quantum computing offers significant advancements in fields like cryptography and drug discovery, as well as ma

In [8]:

new_query = "On which date the paper on rise of quantum computing published and in which publication?"
topic = "quantum computing"

new_embed, _ = get_embeddings(model, [new_query], llm_client=oclient)
new_embed = new_embed[0]

hits = retrieve_context(new_embed, vdb_client=qclient)
new_context = ""
for hit in hits:
    new_context += hit.payload['text']

#print(new_context)

result = chain.invoke({'context': new_context, 'topic': topic, 'query': new_query})
print(result)

>>>>>>> localhost 11434
>>>>>>> localhost 6333
The answer to the question "On which date the paper on rise of quantum computing was published" is as follows: The paper titled "The Risese of Quantum Internet Computing" was published in the August issue of IEEE IoT Magazine, Vol. XX, No. X, X 2022.
