In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
import weaviate, os
from weaviate.config import AdditionalConfig, Timeout
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
CLUSTER_URL = os.getenv("CLUSTER_URL")
API_KEY = os.getenv("API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Connect to Weaviate
client = weaviate.connect_to_weaviate_cloud(
	cluster_url=CLUSTER_URL,
	auth_credentials=weaviate.auth.AuthApiKey(API_KEY),
	headers={
		"X-OpenAI-Api-Key": OPENAI_API_KEY,
		"X-Cohere-Api-Key": COHERE_API_KEY,
        "X-Goog-Api-Key": GOOGLE_API_KEY
	},
	additional_config=AdditionalConfig(
		timeout=Timeout(init=30, query=60, insert=120)
	)
)

ready = client.is_ready()
server_version = client.get_meta()["version"]
client_version = weaviate.__version__
live = client.is_live()
connected = client.is_connected()

print(f"Weaviate Ready: {ready}")
print(f"Weaviate Client Version: {client_version}")
print(f"Weaviate Server Version: {server_version}")
print(f"Weaviate Live: {client.is_live()}")
print(f"Client Connected: {connected}")

In [None]:
# Create dummy collections with cross-references
from weaviate.classes.config import Property, DataType, ReferenceProperty, Configure

# Step 1: Create the Report collection with UUID as a property
client.collections.create(
    name="Report",
    description="A report containing metadata",
    vector_config=Configure.Vectors.text2vec_openai(),
    properties=[
        Property(name="report_uuid", data_type=DataType.UUID, index_filterable=True, index_searchable=False),
        Property(name="title", data_type=DataType.TEXT, index_filterable=True, index_searchable=True),
        Property(name="author", data_type=DataType.TEXT, index_filterable=True, index_searchable=True),
        Property(name="newspaper", data_type=DataType.TEXT, index_filterable=True, index_searchable=True),
        Property(name="sip", data_type=DataType.TEXT_ARRAY, index_filterable=True, index_searchable=True)
    ]
)

# Step 2: Create the Chunk collection with UUID as a property and a reference to Report
client.collections.create(
    name="Chunk",
    description="Chunks of a report's text",
    vector_config=Configure.Vectors.text2vec_openai(),
    properties=[
        Property(name="chunk_uuid", data_type=DataType.UUID, index_filterable=True, index_searchable=False),
        Property(name="chunk_text", data_type=DataType.TEXT, index_filterable=True, index_searchable=True)
    ],
    references=[
        ReferenceProperty(
            name="belongsToReport",  # The name of the reference
            target_collection="Report"  # The collection that this points to
        )
    ]
)

In [None]:
# Dummy Data insertion snippet
reports_data = [
    {
        "report_uuid": "f9b7c0c1-29b5-4b8e-8a07-ded92e570b67",
        "title": "The Future of Artificial Intelligence",
        "author": "Alice Johnson",
        "newspaper": "Tech Review",
        "sip": ["AI", "Future", "Ethics"],
    },
    {
        "report_uuid": "a1b2c3d4-5678-90ab-cdef-1234567890ab",
        "title": "Climate Change and Technology",
        "author": "Bob Smith",
        "newspaper": "Global News",
        "sip": ["Climate", "Technology", "Innovation"],
    }
]

# Insert reports
report_collection = client.collections.get("Report")
custom_to_weaviate_uuid = {}

for report in reports_data:
    result = report_collection.data.insert(properties=report)
    custom_to_weaviate_uuid[str(report["report_uuid"])] = str(result)
    print(f"Inserted report with custom UUID {report['report_uuid']} -> Weaviate UUID {result}")

print("Reports inserted successfully.")


In [None]:
# Dummy Chunks insertion snippet
chunks_data = [
    {
        "chunk_uuid": "c1d2e3f4-5678-90ab-cdef-1234567890ab",
        "chunk_text": "This is the first chunk of the AI report.",
        "report_custom_uuid": "f9b7c0c1-29b5-4b8e-8a07-ded92e570b67"
    },
    {
        "chunk_uuid": "d4e5f6a7-1234-5678-90ab-cdef12345678",
        "chunk_text": "This is the first chunk of the Climate report.",
        "report_custom_uuid": "a1b2c3d4-5678-90ab-cdef-1234567890ab"
    }
]

chunk_collection = client.collections.get("Chunk")

for chunk in chunks_data:
    report_custom_uuid = chunk.pop("report_custom_uuid")
    weaviate_uuid = custom_to_weaviate_uuid[report_custom_uuid]
    
    chunk_collection.data.insert(
        properties=chunk,
        references={"belongsToReport": weaviate_uuid}
    )
    print(f"Inserted chunk with reference to Weaviate UUID {weaviate_uuid}")

print("Chunks inserted successfully.")

In [None]:
# Fetching a specific Chunk object and its associated Report metadata
from pprint import pprint
from weaviate.classes.query import Filter, QueryReference

chunks_coll = client.collections.get("Chunk")

chunk_objects = chunks_coll.query.fetch_objects(
    filters=Filter.by_property("chunk_uuid").equal("c1d2e3f4-5678-90ab-cdef-1234567890ab"),
    return_references=QueryReference(
        link_on="belongsToReport",  # The reference property
        return_properties=["title", "author", "newspaper", "sip"]  # Properties of the referenced Report
    )
)

# Check if references were fetched
for chunk_obj in chunk_objects.objects:
    print(f"Chunk UUID: {chunk_obj.uuid}")
    print(f"Chunk Text: {chunk_obj.properties['chunk_text']}")

    # Check for references
    if chunk_obj.references and "belongsToReport" in chunk_obj.references:
        for ref in chunk_obj.references["belongsToReport"].objects:
            print("Referenced Report:")
            print(f"  Title: {ref.properties['title']}")
            print(f"  Author: {ref.properties['author']}")
            print(f"  Newspaper: {ref.properties['newspaper']}")
            print(f"  SIP: {ref.properties['sip']}")
    else:
        print("No references found.")

In [None]:
# Hybrid Search Query on Parent collection
from weaviate.classes.query import MetadataQuery

coll = client.collections.get("Report")

filters = (
    Filter.by_property("title").equal("The Future of Artificial Intelligence")
    & Filter.by_ref("Chunk").by_property("sip").equal("Future")
)

response = coll.query.hybrid(
    query="AI",
    alpha=0.5,
    filters=filter
)

for o in response.objects:
    print(o.properties)
    print(o.metadata.score, o.metadata.explain_score)

In [None]:
# Hybrid Search Query on Child collection
from weaviate.classes.query import QueryReference, Filter

coll = client.collections.get("Chunk")  # Query from Chunk collection

# Correct reference filter syntax for filtering by referenced Report properties
filters = Filter.by_ref("belongsToReport").by_property("sip").contains_any(["Future"])

response = coll.query.hybrid(
    query="AI",
    alpha=0.5,
    filters=filters,
    return_references=QueryReference(link_on="belongsToReport")
)

print("=== Results ===")
for o in response.objects:
    print("Chunk properties:", o.properties)
    # Show referenced Report data
    if o.references and "belongsToReport" in o.references:
        report = o.references["belongsToReport"].objects[0]
        print("Referenced Report:", report.properties)
    print("---")