Getting Started with Weaviate

In [37]:
import weaviate
import os

In [38]:
client = weaviate.Client(
    url=os.getenv("WEAVIATE_CLUSTER_URL"),
    additional_headers={
        "Authorization": f"Bearer {os.getenv('WEAVIATE_API_KEY')}",
        "Content-Type": "application/json",
        "X-OpenAI-Api-Key": os.getenv("OPEN_API_KEY"),
    },
)

            your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.

            For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
            For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
            
  client = weaviate.Client(


In [39]:
# client = weaviate.connect_to_wcs(
#     # Replace with your Weaviate Cloud URL
#     cluster_url=os.getenv("WEAVIATE_CLUSTER_URL"),
#     auth_credentials=weaviate.auth.AuthApiKey(
#         os.getenv("WEAVIATE_API_KEY")
#     ),  # Replace with your Weaviate Cloud key
#     # Replace with your OpenAI API key
#     headers={"X-OpenAI-Api-key": os.getenv("OPEN_API_KEY")},
# )

To check if everything is set up correctly, run:



In [40]:
client.is_ready()

True

How to Create and Populate a Weaviate Vector Database

In [41]:
import pandas as pd

df = pd.read_csv("jeopardy_questions.csv", nrows=100)

In [42]:
df

Unnamed: 0,show_number,air_date,round,category,value,question,answer
0,4680,12/31/04,Jeopardy!,HISTORY,200.0,"For the last 8 years of his life, Galileo was ...",Copernicus
1,4680,12/31/04,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,200.0,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe
2,4680,12/31/04,Jeopardy!,EVERYBODY TALKS ABOUT IT...,200.0,The city of Yuma in this state has a record av...,Arizona
3,4680,12/31/04,Jeopardy!,THE COMPANY LINE,200.0,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's
4,4680,12/31/04,Jeopardy!,EPITAPHS & TRIBUTES,200.0,"Signer of the Dec. of Indep., framer of the Co...",John Adams
...,...,...,...,...,...,...,...
95,5957,7/6/10,Double Jeopardy!,SEE & SAY,800.0,"Say <a href=""http://www.j-archive.com/media/20...",Oregon
96,5957,7/6/10,Double Jeopardy!,NEWS TO ME,800.0,This car company has been in the news for wide...,Toyota
97,5957,7/6/10,Double Jeopardy!,IN THE DICTIONARY,800.0,"As an adjective, it can mean proper; as a verb...",correct
98,5957,7/6/10,Double Jeopardy!,SCIENCE CLASS,1200.0,The wedge is an adaptation of the simple machi...,plane


Step 1: Create a Schema

In [43]:
class_obj = {
    # Class definition
    "class": "JeopardyQuestion",
    # Property definitions
    "properties": [
        {
            "name": "category",
            "dataType": ["text"],
        },
        {
            "name": "question",
            "dataType": ["text"],
        },
        {
            "name": "answer",
            "dataType": ["text"],
        },
    ],
    # Specify a vectorizer
    "vectorizer": "text2vec-openai",
    # Module settings
    "moduleConfig": {
        "text2vec-openai": {
            "vectorizeClassName": False,
            "model": "ada",
            "modelVersion": "002",
            "type": "text",
        },
    },
}

 create the class with the create_class() method.

In [44]:
client.schema.create_class(class_obj)

In [45]:
client.schema.get("JeopardyQuestion")

{'class': 'JeopardyQuestion',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'moduleConfig': {'text2vec-openai': {'baseURL': 'https://api.openai.com',
   'model': 'ada',
   'modelVersion': '002',
   'type': 'text',
   'vectorizeClassName': False}},
 'multiTenancyConfig': {'enabled': False},
 'properties': [{'dataType': ['text'],
   'indexFilterable': True,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-openai': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'category',
   'tokenization': 'word'},
  {'dataType': ['text'],
   'indexFilterable': True,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-openai': {'skip': False,
     'vectorizePropertyName': False}},
   'name': 'question',
   'tokenization': 'word'},
  {'dataType': ['text'],
   'indexFilterable': True,
   'indexSearchable': True,
   'moduleConfig': {'text2vec-openai': {'skip':

Step 2: Import data into Weaviate

At this stage, the vector database has a schema but is still empty. So, let’s populate it with our dataset. This process is also called “upserting”.

In [53]:
from weaviate.util import generate_uuid5

with client.batch(batch_size=2, num_workers=2) as batch:
    for _, row in list(df.iterrows())[:1]:
        question_object = {
            "category": row.category,
            "question": row.question,
            "answer": row.answer,
        }
        batch.add_data_object(
            question_object,
            class_name="JeopardyQuestion",
            uuid=generate_uuid5(question_object),
        )

{'error': [{'message': 'update vector: connection to: OpenAI API failed with status: 429 error: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.'}]}
