In [1]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

## Connect to Astra

In [2]:
from astrapy import DataAPIClient
import os

# Initialize the client
client = DataAPIClient(os.getenv('ASTRA_DB_APPLICATION_TOKEN'))
db = client.get_database_by_api_endpoint(
  os.getenv('ASTRA_DB_API_ENDPOINT'),
    namespace=os.getenv('ASTRA_DB_NAMESPACE'),
)
      
print(f"Connected to Astra DB: {db.list_collection_names()}")

Connected to Astra DB: ['langchain_message_store', 'memories']


## Create Collection (Vectorize)

> You can add an external embedding provider to your collection to automatically generate embeddings with [Astra DB vectorize](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html). To do this, you must add an [external embedding provider integration](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html#supported-embedding-providers) to your Astra DB organization, and then you can use that embedding provider when you create a collection.

For demonstration, have added OpenAI integration.

In [5]:
from astrapy.constants import VectorMetric
from astrapy.info import CollectionVectorServiceOptions

collection = db.create_collection(
    "demo",
    metric=VectorMetric.COSINE,
    service=CollectionVectorServiceOptions(
        provider="openai",
        model_name="text-embedding-3-small",
        authentication={
            "providerKey": "phil_svcacct_poYgA",
        },
        # parameters={
        #     "organizationId": "ORGANIZATION_ID",
        #     "projectId": "PROJECT_ID",
        # },
    ),
)
print(f"* Collection: {collection.full_name}\n")


* Collection: default_keyspace.demo



## Add Documents to Collection (Vectorize)

In [14]:
# Insert documents into the collection.
# (UUIDs here are version 7.)
documents = [
    {
        "_id": "018e65c9-df45-7913-89f8-175f28bd7f74",
        "$vectorize": "Chat bot integrated sneakers that talk to you",
        "metadata": {}
    },
    {
        "_id": "018e65c9-e1b7-7048-a593-db452be1e4c2",
        "$vectorize": "An AI quilt to help you sleep forever",
        "metadata": {}
    },
    {
        "_id": "018e65c9-e33d-749b-9386-e848739582f0",
        "$vectorize": "A deep learning display that controls your mood",
        "metadata": {}
    },
]
insertion_result = collection.insert_many(documents)
print(f"* Inserted {len(insertion_result.inserted_ids)} items.\n")

* Inserted 3 items.



## Vector Search (Vectorize)

In [12]:
# Perform a similarity search
query = "I'd like some talking shoes"
results = collection.find(
    sort={"$vectorize": query},
    limit=2,
    projection={"$vectorize": True},
    include_similarity=True,
)
print(f"Vector search results for '{query}':")
for document in results:
    print("    ", document)

Vector search results for 'I'd like some talking shoes':
     {'_id': UUID('018e65c9-df45-7913-89f8-175f28bd7f74'), '$vectorize': 'Chat bot integrated sneakers that talk to you', '$similarity': 0.76328015}
     {'_id': UUID('018e65c9-e33d-749b-9386-e848739582f0'), '$vectorize': 'A deep learning display that controls your mood', '$similarity': 0.6000229}
