In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
# Connection to Weaviate
import weaviate, os
from weaviate.classes.init import Auth
from weaviate.classes.init import AdditionalConfig, Timeout
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
CLUSTER_URL = os.getenv("CLUSTER_URL")
API_KEY = os.getenv("API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")

# Connect to Weaviate
client = weaviate.connect_to_wcs(
	cluster_url=CLUSTER_URL,
	auth_credentials=weaviate.auth.AuthApiKey(API_KEY),
	headers={
		"X-OpenAI-Api-Key": OPENAI_API_KEY,
		"X-Cohere-Api-Key": COHERE_API_KEY
	},
	additional_config=AdditionalConfig(
		timeout=Timeout(init=30, query=60, insert=120)
			)
)

# Check if the client is ready
ready = client.is_ready()
# Retrieve Weaviate server version
server_version = client.get_meta()["version"]
# Retrieve Weaviate client version
client_version = weaviate.__version__

print(f"Weaviate client is ready: {ready}")
print(f"Weaviate Client Version: {client_version}")
print(f"Weaviate Server Version: {server_version}")


In [None]:
from weaviate.classes.config import Property, DataType, ReferenceProperty, Configure

# Create the Report and ReportChunks collections as example for cross-references
# Step 1: Create the Report collection with UUID as a property
client.collections.create(
    name="Report",
    description="A report containing metadata",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    properties=[
        Property(name="report_uuid", data_type=DataType.UUID, index_filterable=True, index_searchable=False),
        Property(name="title", data_type=DataType.TEXT, index_filterable=True, index_searchable=True),
        Property(name="author", data_type=DataType.TEXT, index_filterable=True, index_searchable=True),
        Property(name="newspaper", data_type=DataType.TEXT, index_filterable=True, index_searchable=True),
    ]
)

# Step 2: Create the ReportChunks collection with UUID as a property and a reference to Report
client.collections.create(
    name="Chunk",
    description="Chunks of a report's text",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    properties=[
        Property(name="chunk_uuid", data_type=DataType.UUID, index_filterable=True, index_searchable=False),
        Property(name="chunk_text", data_type=DataType.TEXT, index_filterable=True, index_searchable=True)
    ],
    references=[
        ReferenceProperty(
            name="belongsToReport",  # The name of the reference
            target_collection="Report"  # The collection that this points to
        )
    ]
)

In [None]:
from weaviate.classes.config import ReferenceProperty

# Add the reference to Chunk, after it was created
report = client.collections.get("Report")

report.config.add_reference(
        ReferenceProperty(
            name="belongsToChunk",  # The name of the reference
            target_collection="Chunk"  # The collection that this points to
        )
)

In [None]:
# Some example data to insert into the collections
reports_data = [
    {
        "chunk_uuid": "f9b7c0c1-29b5-4b8e-8a07-ded92e570b67",
        "title": "The Future of Artificial Intelligence",
        "author": "Alice Johnson",
        "newspaper": "Tech Review",
        "words": (
            "Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. "
            "From self-driving cars to intelligent personal assistants, AI is permeating every aspect of modern life. "
            "In the future, AI is expected to revolutionize industries such as healthcare, education, and manufacturing. "
            "For instance, AI-powered systems can already diagnose medical conditions with greater accuracy than human doctors, "
            "and AI is being used to personalize educational content to better suit individual learning styles. "
            "As AI continues to evolve, ethical concerns surrounding its use are also growing. Issues such as job displacement, "
            "privacy, and security are at the forefront of debates surrounding AI. The development of AI must be guided by strong ethical standards to ensure that it benefits society as a whole. "
            "Moreover, AI's ability to process large amounts of data and identify patterns will enable breakthroughs in areas like climate change modeling and drug discovery. "
            "Despite the challenges, the potential of AI to solve complex problems and improve lives is enormous. The future of AI is filled with both promise and uncertainty, "
            "and it will be fascinating to see how it continues to unfold in the coming decades. "
            "Researchers and policymakers must work together to create a balanced approach that maximizes the benefits of AI while minimizing its risks."
        )
    },
    {
        "chunk_uuid": "b23a4627-fddf-4f95-b29d-dfbeed4b4c18",
        "title": "Global Economic Trends in 2024",
        "author": "James Walker",
        "newspaper": "Global Finance",
        "words": (
            "As the world enters 2024, the global economy faces a unique set of challenges and opportunities. "
            "In many developed countries, inflation rates are beginning to stabilize after the turbulence of the past few years. "
            "However, inflation remains a significant concern in emerging economies, where price increases are impacting the cost of living for millions of people. "
            "At the same time, the ongoing global supply chain disruptions continue to affect industries such as technology, automotive, and agriculture. "
            "Governments worldwide are focused on rebuilding their economies by implementing policies to boost growth, improve employment rates, and foster innovation. "
            "In addition, the rise of digital currencies and blockchain technology has created new opportunities for financial transactions, but also raised questions about regulation and security. "
            "The role of central banks remains critical in shaping the economic landscape, with many countries closely watching the actions of the Federal Reserve and European Central Bank. "
            "Climate change is also becoming a significant driver of economic policy. Governments are beginning to prioritize green investments, renewable energy, and sustainable practices. "
            "The economic shift towards sustainability is expected to continue throughout 2024 and beyond, creating new industries and transforming existing ones. "
            "As economies worldwide recover from the effects of the pandemic, the focus will be on ensuring that growth is both resilient and inclusive. "
            "The next year promises to be pivotal in shaping the future trajectory of the global economy."
        )
    },
    {
        "chunk_uuid": "4e18db1e-67d8-4b3c-99ea-ff9db35b4b4d",
        "title": "The Rise of Renewable Energy in 2024",
        "author": "Sophia Green",
        "newspaper": "Environmental Times",
        "words": (
            "Renewable energy has been on the rise for the past decade, but 2024 is expected to be a milestone year in the transition to cleaner, "
            "more sustainable energy sources. The global push to combat climate change has led to significant investments in solar, wind, and hydroelectric power. "
            "In particular, the cost of solar energy has dropped dramatically, making it more accessible for both businesses and homeowners. "
            "Wind energy has also seen tremendous growth, particularly in offshore wind farms, which have proven to be highly efficient. "
            "Governments worldwide are implementing policies to incentivize the use of renewable energy, offering subsidies, tax breaks, and regulatory support. "
            "As more companies and individuals embrace renewable energy, the demand for energy storage solutions is also growing. "
            "Batteries that can store excess energy for later use are becoming increasingly important to ensure the reliability of renewable sources. "
            "In addition, the rise of electric vehicles (EVs) is closely tied to the growth of renewable energy. EVs require charging infrastructure, "
            "and as more people adopt electric cars, the need for clean, renewable power to fuel them becomes even more critical. "
            "The transition to renewable energy is not without challenges, however. "
            "In some regions, the grid infrastructure is outdated, and integrating renewable energy sources can be difficult. "
            "Despite these challenges, the future of renewable energy looks bright, and 2024 will likely be remembered as a turning point in the global shift towards sustainability."
        )
    },
    {
        "chunk_uuid": "3cf7461b-fb93-46a4-88b1-d2b13f3e456f",
        "title": "Advancements in Quantum Computing",
        "author": "David Carter",
        "newspaper": "Tech Innovations",
        "words": (
            "Quantum computing is an emerging field that holds the potential to revolutionize industries such as cybersecurity, artificial intelligence, "
            "and materials science. Unlike classical computers, which process information in binary, quantum computers use quantum bits, or qubits, "
            "which can exist in multiple states at once. This allows quantum computers to perform calculations that would be impossible for classical computers. "
            "One of the most promising applications of quantum computing is in the field of cryptography. Quantum computers could potentially break current encryption methods, "
            "but they could also be used to create unbreakable encryption, leading to a new era of secure communications. "
            "In the field of artificial intelligence, quantum computing could enable faster processing of large datasets, leading to more powerful AI models. "
            "Additionally, quantum simulations could be used to accelerate drug discovery, design new materials, and solve complex optimization problems. "
            "While quantum computers are still in the early stages of development, companies such as IBM, Google, and Microsoft are making significant progress. "
            "As the technology matures, it is expected that quantum computers will become more accessible and affordable, paving the way for widespread adoption. "
            "Despite the excitement surrounding quantum computing, many challenges remain. For instance, qubits are highly sensitive to environmental factors, "
            "which makes it difficult to maintain their stability. Additionally, the algorithms needed to harness the full potential of quantum computing are still being developed. "
            "Nevertheless, the potential of quantum computing is enormous, and it is widely believed that it will play a key role in shaping the future of technology."
        )
    }
]

In [None]:
import weaviate

# Insert data into Weaviate using provided UUIDs
for report in reports_data:
    # Step 1: Use the provided report_uuid directly
    report_uuid = report["chunk_uuid"]  # Assuming chunk_uuid corresponds to the report's unique identifier

    # Step 2: Insert the Report object
    report_obj = {
        "report_uuid": report_uuid,  # Use the provided UUID
        "title": report["title"],
        "author": report["author"],
        "newspaper": report["newspaper"]
    }
    client.collections.get("Report").data.insert(properties=report_obj, uuid=report_uuid)

    # Step 3: Use the provided chunk_uuid directly for the Chunk collection
    chunk_uuid = report["chunk_uuid"]

    chunk_obj = {
        "chunk_uuid": chunk_uuid,  # Use the provided UUID
        "chunk_text": report["words"]  # Store the entire text as is
    }
    client.collections.get("Chunk").data.insert(properties=chunk_obj, uuid=chunk_uuid)

    # Step 4: Create two-way references
    # Add a reference from Chunk to Report
    client.collections.get("Chunk").data.reference_add(
        from_uuid=chunk_uuid,
        from_property="belongsToReport",
        to=report_uuid
    )

    # Add a reference from Report to Chunk
    client.collections.get("Report").data.reference_add(
        from_uuid=report_uuid,
        from_property="belongsToChunk",
        to=chunk_uuid
    )

print("Data insertion and two-way referencing completed successfully!")

In [None]:
from pprint import pprint
from weaviate.classes.query import Filter, QueryReference

# Fetch the Chunk collection
chunks_coll = client.collections.get("Chunk")

# Fetch the specific Chunk object along with its associated Report metadata
chunk_objects = chunks_coll.query.fetch_objects(
    filters=Filter.by_property("chunk_uuid").equal("3cf7461b-fb93-46a4-88b1-d2b13f3e456f"),
    return_references=QueryReference(
        link_on="belongsToReport",  # The reference property
        return_properties=["title", "author", "newspaper"]  # Properties of the referenced Report
    )
)

# Check if references were fetched
for chunk_obj in chunk_objects.objects:
    print(f"Chunk UUID: {chunk_obj.uuid}")
    print(f"Chunk Text: {chunk_obj.properties['chunk_text']}")

    # Check for references
    if chunk_obj.references and "belongsToReport" in chunk_obj.references:
        for ref in chunk_obj.references["belongsToReport"].objects:
            print("Referenced Report:")
            print(f"  Title: {ref.properties['title']}")
            print(f"  Author: {ref.properties['author']}")
            print(f"  Newspaper: {ref.properties['newspaper']}")
    else:
        print("No references found.")