In [6]:
import dotenv

dotenv.load_dotenv()

True

## Init Client

In [7]:
from rag import weaviate_utils

client = weaviate_utils.get_weaviate_client()

## Load Markdown

In [8]:
from llama_index import Document

from rag import load_utils

documents: list[Document] = load_utils.load_data_dir(dir_path="../mission-docs")

documents[0]

Document(id_='4146f49b-880a-45c4-8165-8f5c1f879168', embedding=None, metadata={'file_path': '../mission-docs/console.md', 'file_name': 'console.md', 'file_type': 'text/markdown', 'file_size': 41188, 'creation_date': '2024-02-10', 'last_modified_date': '2024-02-10', 'last_accessed_date': '2024-02-12'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='\n\n3 Console\n\n', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

## Import into Weaviate

In [9]:
from rag import weaviate_utils

CLASS_NAME = "MarkdownDocsChunk"
vector_store = weaviate_utils.as_vector_store(client, CLASS_NAME)



In [10]:
from llama_index import VectorStoreIndex, StorageContext
from rag import weaviate_utils, index_utils

# populate database
if weaviate_utils.is_populated(client, class_name=CLASS_NAME):
    index = VectorStoreIndex.from_vector_store(vector_store)
else:
    # populate
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = index_utils.populate_index(storage_context, documents)

Generating embeddings:   0%|          | 0/133 [00:00<?, ?it/s]

## Query Test

In [11]:
query_engine = index.as_query_engine()

In [12]:
response = query_engine.query("The ECS controller is malfunctioning, why could that be?")

print(response.response)

The ECS controller could be malfunctioning due to various reasons such as incorrect configurations of the subsystems, a failure in one or more of the essential subsystems (Water Management Section, Oxygen Supply and Cabin Pressure Control Section, Atmosphere Revitalization System, and Heat Transport Section), or a technical issue with the controller itself.


In [14]:
response = client.query.get(
    CLASS_NAME,
    properties=["chunk"]
).with_near_text({
    "concepts": ["Who operates the Aegis Athena mission?"]
}).do()

for idx, doc in enumerate(response["data"]["Get"][CLASS_NAME][:5]):
    chunk = doc["chunk"]
    print(f"Chunk #{idx}: {chunk}\n\n")

Chunk #0: 

3.1 Introduction

The Aegis Athena space mission is an illustrious epitome of such a pursuit. Primarily destined to accomplish a monumental milestone of delivering the first human astronaut to the lunar surface, this visionary mission is governed by a sophisticated piece of technology known as the S.P.A.C.E.C.R.A.F.T. console.

This highly innovative system adopts a dual-purpose role, integrating the essential features of both a command model and a service module. This strategically harmonious blend of utilitarian efficiency and comforting resources paves the way for an environment conducive for the astronaut-trio executing this expedition, turning it into a virtual sanctuary amidst the vast cosmos. The S.P.A.C.E.C.R.A.F.T. console operates with a plethora of commands, designed to modify the inherent system configurations and retrieve state-of-the-art status information. Such telemetry data is indubitably critical in assessing the spacecraft's performance and securing the t