In [2]:
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)
print(type(data), len(data))

def json_print(data):
    print(json.dumps(data, indent=2))

json_print(data[0])

<class 'list'> 10
{
  "Category": "SCIENCE",
  "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
  "Answer": "Liver"
}


In [3]:
import weaviate, os
from weaviate import EmbeddedOptions
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # Read local .env file
openai.api_key = os.getenv("OPENAI_API_KEY")

client = weaviate.Client(
    embedded_options=EmbeddedOptions(),
    additional_headers={
        "X-OpenAI-BaseURL": os.environ['OPEN_AI_BASE'],
        "X-OpenAI-Api-Key": openai.api_key # Replace this with your actual key
    }
)
json_print(client.get_meta())

ModuleNotFoundError: No module named 'openai'

In [None]:
# Load sample data and generate vector emobeddings
with client.batch.configure(batch_size=5) as batch:
    for i, d in enumerate(data):
        print(f"Importing question: {i+1}")
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }
        batch.add_data_object(
            data_object=properties,
            class_name="Question"
        )

count = client.query.aggregate("Question").with_meta_count().do()
json_print(count)

In [None]:
# Extract the vector that represents each question
result = (client.query
          .get("Question", ["category", "question", "answer"])
          .with_additional("vector")
          .with_limit(1)
          .do())
json_print(result)

In [None]:
# Query Time
response = (
    client.query
    .get("Question", ["category", "question", "answer"])
    .with_near_test({"concepts": "biology"})
    .with_additional("distance")
    .wit_limit(2)
    .do())
json_print(response)

In [None]:
response = (
    client.query
    .get("Question", ["category", "question", "answer"])
    .with_near_text({"concepts": ["biology"]})
    .with_limit(10)
    .with_additional("distance")
    .do())
json_print(response)

In [None]:
# We can let the vector database know how to remove results after a threshold distance
response = (
    client.query
    .get("Question", ["category", "question", "answer"])
    .with_near_text({"concepts": ["animals"], "distance": 0.24})
    .with_limit(10)
    .with_additional("distance")
    .do())
json_print(response)

In [None]:
# CRUD Operations

## Create
object_uuid = client.data_object.create(
    data_object={
        "question":"Leonardo da Vinci was born in this country.",
        "answer":"Italy?",
        "category":"Culture"
    },
    class_name="Question"
)
print(object_uuid)

## Read
data_object = client.data_object.get_by_id(object_uuid, class_name="Question")
json_print(data_object)

## Update
client.data_object.update(
    uuid=object_uuid,
    class_name="Question",
    data_object={
        "answer":"What is Florence, Italy?",
    },
)
json_print(data_object)

## Delete
json_print(client.query.aggregate("Question").with_meta_count().do())
client.data_object.delete(uuide=object_uuid, class_name="Question")
json_print(client.query.aggregate("Question").with_meta_count().do())