In [14]:
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data

# Parse the JSON and preview it
print(type(data), len(data))
print(json.dumps(data[0], indent=2))

def jprint(data):
    print(json.dumps(data, indent=2))

<class 'list'> 10
{
  "Category": "SCIENCE",
  "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
  "Answer": "Liver"
}


In [2]:
import weaviate
from weaviate import EmbeddedOptions
import os

client = weaviate.Client(
    embedded_options=EmbeddedOptions(),
    additional_headers={
        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]  # Replace this with your actual key
    }
)

Started /Users/zainhasan/.cache/weaviate-embedded: process ID 56407


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2023-08-25T03:29:39-04:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2023-08-25T03:29:39-04:00"}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"question_RarZP5cMvHGJ","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-08-25T03:29:39-04:00","took":1087416}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50051","time":"2023-08-25T03:29:39-04:00"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:6666","time":"2023-08-25T03:29:39-04:00"}


In [17]:
jprint(client.get_meta())

{
  "hostname": "http://127.0.0.1:6666",
  "modules": {
    "generative-openai": {
      "documentationHref": "https://beta.openai.com/docs/api-reference/completions",
      "name": "Generative Search - OpenAI"
    },
    "qna-openai": {
      "documentationHref": "https://beta.openai.com/docs/api-reference/completions",
      "name": "OpenAI Question & Answering Module"
    },
    "ref2vec-centroid": {},
    "text2vec-cohere": {
      "documentationHref": "https://docs.cohere.ai/embedding-wiki/",
      "name": "Cohere Module"
    },
    "text2vec-huggingface": {
      "documentationHref": "https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task",
      "name": "Hugging Face Module"
    },
    "text2vec-openai": {
      "documentationHref": "https://beta.openai.com/docs/guides/embeddings/what-are-embeddings",
      "name": "OpenAI Module"
    }
  },
  "version": "1.19.12"
}


{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"question_2AGgoWLbD7hf","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-08-25T03:37:36-04:00","took":66375}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"question_4ZNNfyLhCI80","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-08-25T04:06:18-04:00","took":69958}


In [4]:
if client.schema.exists("Question"):
    client.schema.delete_class("Question")

In [5]:
class_obj = {
    "class": "Question",
    "vectorizer": "text2vec-openai",  # If set to "none" you must always provide vectors yourself. Could be any other "text2vec-*" also.
}

client.schema.create_class(class_obj)

{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"question_sc4qi3Xowe3n","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-08-25T03:29:51-04:00","took":993625}


In [6]:
with client.batch.configure() as batch:
    for i, d in enumerate(data):  # Batch import data
        
        print(f"importing question: {i+1}")
        
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }
        
        batch.add_data_object(
            data_object=properties,
            class_name="Question"
        )

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
importing question: 6
importing question: 7
importing question: 8
importing question: 9
importing question: 10


In [16]:
jprint(client.query.aggregate("Question").with_meta_count().do())

{
  "data": {
    "Aggregate": {
      "Question": [
        {
          "meta": {
            "count": 10
          }
        }
      ]
    }
  }
}


In [15]:
jprint(client.query.get("Question", ["question","answer"]).with_limit(3).do())

{
  "data": {
    "Get": {
      "Question": [
        {
          "answer": "Antelope",
          "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
        },
        {
          "answer": "wire",
          "question": "A metal that is ductile can be pulled into this while cold & under pressure"
        },
        {
          "answer": "Sound barrier",
          "question": "In 70-degree air, a plane traveling at about 1,130 feet per second breaks it"
        }
      ]
    }
  }
}
