In [12]:
from pymongo import MongoClient
from bson import ObjectId
import json

client = MongoClient("mongodb://localhost:27017/")
db = client['clearml']
collection = db['crawled_data']

In [13]:
class JSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, ObjectId):
            return str(obj)
        return super(JSONEncoder, self).default(obj)

In [14]:
documents = list(collection.find({}))
with open('data.json', 'w') as f:
    json.dump(documents, f, indent=4, cls=JSONEncoder)

In [15]:
import json

raw_data = "data.json"
with open(raw_data, 'r') as f:
    raw_data = json.load(f)

documents = []
for entry in raw_data:
    document = {
        "id": entry["_id"],
        "title": entry["name"].split(" - YouTube")[0],
        "content": entry["content"],
        "metadata": {
            "platform": entry["platform"].capitalize(),
            "link": entry["link"]
        }
    }
    documents.append(document)

formatted_data = {"documents": documents}
with open('formatted_documents.json', 'w') as f:
    json.dump(formatted_data, f, indent=4)

print("Data transformed and saved to formatted_documents.json")

Data transformed and saved to formatted_documents.json


In [16]:
import openai
import os
from dotenv import load_dotenv

load_dotenv()

openai.api_key = os.getenv('OPEN_AI_API_KEY')

def generate_embeddings(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response['data'][0]['embedding']

for doc in documents:
    doc['embedding'] = generate_embeddings(doc['content'])


In [17]:
def sanitize_text(text):
    return text.encode('utf-8', 'replace').decode('utf-8')

for doc in documents:
    doc["content"] = sanitize_text(doc["content"])

In [18]:
for idx, doc in enumerate(documents, start=1):
    doc["id"] = idx

In [19]:
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct

client = QdrantClient(host="localhost", port=6333)

client.recreate_collection(
    collection_name="robotics-documents",
    vectors_config={"size": 1536, "distance": "Cosine"} 
)

points = [
    PointStruct(
        id=doc["id"],
        vector=doc["embedding"],
        payload={
            "title": doc["title"],
            "content": doc["content"],
            "platform": doc["metadata"]["platform"],
            "link": doc["metadata"]["link"],
        }
    )
    for doc in documents
]

client.upsert(collection_name="robotics-documents", points=points)
print("Data uploaded successfully!")

Data uploaded successfully!


  client.recreate_collection(


In [22]:
query = "What are the three different communication channels?"
query_embedding = generate_embeddings(query)

results = client.search(
    collection_name="robotics-documents",
    query_vector=query_embedding,
    limit=3
)

for match in results:
    print(f"Title: {match.payload['title']}")
    print(f"Content: {match.payload['content']}")
    print(f"Platform: {match.payload['platform']}")
    print(f"Link: {match.payload['link']}\n")

Title: ROS/ROS 2 - What are messages, services and actions?
Content: so in this video we'll talk about three different ways of communicating between different sub modules or packages in ros one message two service and three actions hello guys so this is our second video in our series which is all about ross and robotics in the previous video we talked about what ross exactly is in a very intuitive way and in this video although i said we will talk about ros 2 but before that there is something we need to focus on we need to understand what different communication methodologies are present in ros to connect different modules in a distributed system when we build our robot using ros so in this video we'll talk about three different ways of communicating between different sub modules or packages in ros one message two service and three actions i love understanding everything using examples so let's take an example like the last video in the last video we were talking about a naive example

In [23]:
context = "\n\n".join([f"Title: {match.payload['title']}\n{match.payload['content']}" for match in results])

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",  # Use "gpt-4" if you want a more advanced model
    messages=[
        {"role": "system", "content": "You are a helpful assistant for answering questions based on provided context."},
        {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"}
    ],
    max_tokens=200,
    temperature=0  # Adjust temperature for deterministic responses
)

print(response['choices'][0]['message']['content'].strip())


The three different communication channels mentioned in the context are:
1. Visual info: Used by the perception module to inform the brain about the presence of trash in the surroundings.
2. Commands: Used by the brain to inform the actuation package about the actions the robot needs to take.
3. Motor states: Used by the brain to get information about the battery status of motors in the actuation package.
