In [1]:
# See README for more info on how the DataCollectionPipeline works
# The retrieval pipeline is part of the DataCollectionPipeline
from shared import getQdrantClient, getEmbeddingsModel
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from operator import itemgetter
# Create a qdrant connection
qClient = getQdrantClient()

# Setup the text embedder
embeddingsModel = getEmbeddingsModel()

# Setup the model
MODEL = 'llama3.2'
model = Ollama(model=MODEL)

# Retrieval Pipeline
# Retrieve the chunks with the most similar embeddings from Qdrant
def retriever(text, collection):
    results = qClient.search(
        collection_name=collection,
        query_vector = embeddingsModel.embed_query(text),
        limit=10
    )
    return results

  return OllamaEmbeddings(model=MODEL)
  model = Ollama(model=MODEL)


In [21]:
# User query
query = "Can you create a README file for ROS"

# Query expansion(I only generate one additional prompt for simplicity)
template = """
Rewrite the prompt. The new prompt must offer a different perspective.
Do not change the meaning. Output only the rewritten prompt with no introduction.
    Prompt: {prompt}
"""
prompt = PromptTemplate.from_template(template)
chain = {"prompt": itemgetter("prompt")} | prompt | model
queryExpansion = chain.invoke({"prompt": query})
print("Query expansion: ", queryExpansion)

# Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)
template = """
You are an AI assistant. You must determine if the prompt requires code as the answer.
Output a 1 if it is or a 0 if it is not and nothing else.
    Prompt: {prompt}
"""
prompt = PromptTemplate.from_template(template)
chain = {"prompt": itemgetter("prompt")} | prompt | model
codingQuestion = chain.invoke({"prompt": query})
print("Coding Question?: ", codingQuestion)

# Filtered vector search for each of the N queries after expansion
relatedCollection = 'Document'
if (codingQuestion == '1'):
    relatedCollection = 'Github'
print("Related Collection: ", relatedCollection)
results1 = retriever(query, relatedCollection)
results2 = retriever(queryExpansion, relatedCollection)

# Collecting results
results = results1+results2

# Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)
ids = [result.id for result in results]
scores = [result.score for result in results]
topIds = []
topIndexes = []
for x in range(3):
    maxScore = 0
    maxIndex = 0
    for i in range(len(ids)):
        if ids[i] not in topIds and scores[i] > maxScore:
            maxScore = scores[i]
            maxIndex = i
    topIds.append(ids[maxIndex])
    topIndexes.append(maxIndex)
texts = [result.payload['text'] for result in results]
links = [result.payload['link'] for result in results]
topTexts = ''
for index in topIndexes:
    print("Top texts: ", texts[index][0])
    print("Link: ", links[index])
    topTexts += texts[index][0]

# Building prompt
if(codingQuestion == '1'):
    template = """
    Write code for the following question given the related coding document below.

    Document: {document}
    Question: {question}
    """
    prompt = PromptTemplate.from_template(template)
else:
    template = """
    Answer the question based on the document below. If you can't answer the question, reply "I don't know"

    Document: {document}
    Question: {question}
    """
    prompt = PromptTemplate.from_template(template)

# Obtaining answer
chain = {"document": itemgetter("document"), "question": itemgetter("question")} | prompt | model
chain.invoke({"document": topTexts, "question": query})

Query expansion:  Create a comprehensive and user-friendly README document for the Robot Operating System (ROS), focusing on its features, benefits, and guidelines for adoption in various industries and applications.
Coding Question?:  1
Related Collection:  Github
Top texts:  nyxml2_vendor.gitversion:rollingros2/tlsf:type:giturl:https://github.com/ros2/tlsf.gitversion:rollingros2/unique_identifier_msgs:type:giturl:https://github.com/ros2/unique_identifier_msgs.gitversion:rollingros2/urdf:type:giturl:https://github.com/ros2/urdf.gitversion:rollingros2/yaml_cpp_vendor:type:giturl:https://github.com/ros2/yaml_cpp_vendor.gitversion:rolling
Link:  https://github.com/ros2/ros2/tree/rolling/ros2.repos
Top texts:  s2/rosidl_core:type:giturl:https://github.com/ros2/rosidl_core.gitversion:rollingros2/rosidl_dds:type:giturl:https://github.com/ros2/rosidl_dds.gitversion:rollingros2/rosidl_defaults:type:giturl:https://github.com/ros2/rosidl_defaults.gitversion:rollingros2/rosidl_dynamic_typesuppor

'Here is a sample README file for ROS (Robot Operating System):\n\n---\n\n# Robot Operating System (ROS)\n\n## Introduction\n\nThe Robot Operating System (ROS) is an open-source software framework that allows developers to build robot applications. It provides a set of tools and libraries that make it easy to develop, test, and deploy robotic systems.\n\n## Features\n\n*   **Cross-platform**: ROS can run on multiple operating systems, including Linux, Windows, and macOS.\n*   **Modular design**: ROS is designed as a collection of interchangeable modules, making it easy to extend or modify the system.\n*   **Extensive community support**: The ROS community provides extensive documentation, tutorials, and forums for help with development and troubleshooting.\n\n## Installing ROS\n\nTo install ROS on your machine:\n\n1.  Download the installation package from the official [ROS website](https://index.ros.org/).\n2.  Follow the instructions in the installation guide to install ROS on your s