In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
import weaviate, os
from weaviate.config import AdditionalConfig, Timeout, ConnectionConfig
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
CLUSTER_URL = os.getenv("CLUSTER_URL")
API_KEY = os.getenv("API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")

# Connect to Weaviate
client = weaviate.connect_to_weaviate_cloud(
	cluster_url=CLUSTER_URL,
	auth_credentials=weaviate.auth.AuthApiKey(API_KEY),
	headers={
		"X-OpenAI-Api-Key": OPENAI_API_KEY,
		"X-Cohere-Api-Key": COHERE_API_KEY
	},
	additional_config=AdditionalConfig(
		timeout=Timeout(init=30, query=60, insert=120)
			)
)

ready = client.is_ready()
server_version = client.get_meta()["version"]
client_version = weaviate.__version__

live = client.is_live()
connected = client.is_connected()

print(f"Weaviate client is ready: {ready}")
print(f"Weaviate Client Version: {client_version}")
print(f"Weaviate Server Version: {server_version}")
print(f"live: {live}")
print(f"Connected: {connected}")

In [None]:
from weaviate.classes.config import Configure
from weaviate.classes.config import Property, DataType, ReferenceProperty, Tokenization

client.collections.create(
    name="<collection-name>",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    generative_config=Configure.Generative.openai(),
    inverted_index_config=Configure.inverted_index(
        index_timestamps = True
    ),
    replication_config=Configure.replication(factor=3, async_enabled=True),
    properties=[
        Property(
            name="<PROPERTY-NAME>",
            data_type=DataType.TEXT,
            tokenization=Tokenization.WORD,
        ),
        Property(
            name="<PROPERTY-NAME>",
            data_type=DataType.TEXT,
            tokenization=Tokenization.FIELD,
        )
    ]
)

In [None]:
# Check if the collection exists 
col = client.collections.get("<collection-name>").exists()
if col:
    print("Collection exists")
else:
    print("Collection does not exist")

In [None]:
# Test the Raw Content that pass to the Vectorizer
from weaviate.classes.config import Configure
from weaviate.classes.config import Property, DataType, Tokenization

client.collections.create(
    name="<collection-name>",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(
        base_url="https://webhook.site/<ID>" # Webhook URL from webhook.site
    ),
    generative_config=Configure.Generative.openai(),
    inverted_index_config=Configure.inverted_index(
        index_timestamps = True
    ),
    replication_config=Configure.replication(factor=3, async_enabled=True),
    properties=[
        Property(
            name="<PROPERTY-NAME>",
            data_type=DataType.TEXT,
            tokenization=Tokenization.WORD,
        ),
        Property(
            name="<PROPERTY-NAME>",
            data_type=DataType.TEXT,
            tokenization=Tokenization.FIELD,
        )
    ]
)

coll = client.collections.get("<PROPERTY-NAME>")
result = coll.data.insert(properties={"<PROPERTY-NAME>": "<DATA>", "<PROPERTY-NAME>": "<DATA>"})
print(result)

# In the webhook.site, the Raw Content will be displayed on how it's sent to the Vectorizer for vectorization at Request Content section under Raw Content.

In [None]:
# Using generate_uuid5, you can create a consistent UUID for each object based on its content. 
# When using batch insertion, Weaviate will automatically skip any objects with duplicate UUIDs, 
# ensuring that no duplicate entries are added to the collection.
# For example, during the first run, 100 unique objects might be added. 
# If the same code is run again, no new objects will be added since the UUIDs remain the same.

from tqdm import tqdm
from weaviate.util import generate_uuid5

sample_100 = data_2k[0:100] # Assuming data_2k is a list of 2000 items

coll = client.collections.get("<COLLECTION-NAME>")

with coll.batch.fixed_size(batch_size=20, concurrent_requests=2) as batch:
    for item in tqdm(sample_100):
        id = generate_uuid5(item["PROPERTY_ID"])

        batch.add_object(
            item,
            uuid=id
        )

print(f"Object count: {len(coll)}")

In [None]:
# Insert a single object with Consistency level ALL
import weaviate.classes as wvc

# Get the collection
collection = client.collections.get("<COLLECTION_NAME>").with_consistency_level(
    wvc.config.ConsistencyLevel.ONE
)

# Insert a single object
uuid = collection.data.insert({
    "<PROP>": "<DATA>",
    "<PROP>": "<DATA>"
})

print(uuid)  # the return value is the object's UUID

In [None]:
# Batching Snippet
try:
    with items.batch.dynamic() as batch:
        [YOUR_LOOP_FOR_DATA]
    failed_objs_a = items.batch.failed_objects  # Get failed objects
    if failed_objs_a:
        print(f"Number of failed objects in the first batch: {len(failed_objs_a)}")
        for i, failed_obj in enumerate(failed_objs_a, 1):
            print(f"Failed object {i}:")
            print(f"Error message: {failed_obj.message}")
    else:
        print("All objects were successfully added.")
except Exception as e:
    print(f"Error during batch import: {e}")
    print(f"Exception details: {str(e)}")