In [40]:
import weaviate
from weaviate.collections.classes.config import DataType, Configure, Property
import weaviate.classes as wvc
client = weaviate.connect_to_local()

print(client.is_ready())

True


In [52]:
import ollama
import json

In [56]:
data = [{"Category":"SCIENCE","Question":"This organ removes excess glucose from the blood & stores it as glycogen","Answer":"Liver"},{"Category":"ANIMALS","Question":"It's the only living mammal in the order Proboseidea","Answer":"Elephant"},{"Category":"ANIMALS","Question":"The gavial looks very much like a crocodile except for this bodily feature","Answer":"the nose or snout"},{"Category":"ANIMALS","Question":"Weighing around a ton, the eland is the largest species of this animal in Africa","Answer":"Antelope"},{"Category":"ANIMALS","Question":"Heaviest of all poisonous snakes is this North American rattlesnake","Answer":"the diamondback rattler"},{"Category":"SCIENCE","Question":"2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification","Answer":"species"},{"Category":"SCIENCE","Question":"A metal that is ductile can be pulled into this while cold & under pressure","Answer":"wire"},{"Category":"SCIENCE","Question":"In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance","Answer":"DNA"},{"Category":"SCIENCE","Question":"Changes in the tropospheric layer of this are what gives us weather","Answer":"the atmosphere"},{"Category":"SCIENCE","Question":"In 70-degree air, a plane traveling at about 1,130 feet per second breaks it","Answer":"Sound barrier"}]

In [57]:
data

[{'Category': 'SCIENCE',
  'Question': 'This organ removes excess glucose from the blood & stores it as glycogen',
  'Answer': 'Liver'},
 {'Category': 'ANIMALS',
  'Question': "It's the only living mammal in the order Proboseidea",
  'Answer': 'Elephant'},
 {'Category': 'ANIMALS',
  'Question': 'The gavial looks very much like a crocodile except for this bodily feature',
  'Answer': 'the nose or snout'},
 {'Category': 'ANIMALS',
  'Question': 'Weighing around a ton, the eland is the largest species of this animal in Africa',
  'Answer': 'Antelope'},
 {'Category': 'ANIMALS',
  'Question': 'Heaviest of all poisonous snakes is this North American rattlesnake',
  'Answer': 'the diamondback rattler'},
 {'Category': 'SCIENCE',
  'Question': "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification",
  'Answer': 'species'},
 {'Category': 'SCIENCE',
  'Question': 'A metal that is ductile can be pulled into this while cold & under pressur

In [61]:
questions = client.collections.create(
        name="Questions",
            vectorizer_config=[
        Configure.NamedVectors.text2vec_ollama(
            name="title_vector",
            source_properties=["title"],
            api_endpoint="http://host.docker.internal:11434",  # If using Docker, use this to contact your local Ollama instance
            model="nomic-embed-text",  # The model to use, e.g. "nomic-embed-text"
        )
    ],    
        properties=[
            wvc.config.Property(
                name="question",
                data_type=wvc.config.DataType.TEXT,
            ),
            wvc.config.Property(
                name="answer",
                data_type=wvc.config.DataType.TEXT,
            ),
            wvc.config.Property(
                name="category",
                data_type=wvc.config.DataType.TEXT,
            )
        ]
    )

In [62]:
questions

<weaviate.collections.collection.sync.Collection at 0x7f84f97b67a0>

In [63]:
# Prepare DataObject instances
question_objects = [
    wvc.data.DataObject(
        properties={
            "question_text": obj["Question"],
            "answer": obj["Answer"],
            "category": obj["Category"],
        }# Include the pre-generated vector
    )
    for obj in data
]

In [64]:
questions.data.insert_many(question_objects)

BatchObjectReturn(_all_responses=[UUID('6b2fdb17-3735-41a8-b90f-3feed041f6f0'), UUID('cf5bc7ae-5adf-4381-b731-69c2f011abf1'), UUID('92858183-e581-409e-bd9c-868b18679eca'), UUID('b3273ca8-3456-4f39-87fb-7dfc748cde40'), UUID('6793257c-dbb7-4685-8fc0-5084c4504302'), UUID('e62b7ac6-0cc2-476b-9afd-f15270364e22'), UUID('eca9c5c4-7344-4a55-8623-84a9ffe91664'), UUID('0f8f5f86-4845-4a0d-b001-9aeef816b950'), UUID('9abd45b7-ae81-483b-a5b5-6698383d750e'), UUID('80031dc1-9a58-4095-8c7f-bc54861243a8')], elapsed_seconds=0.6143290996551514, errors={}, uuids={0: UUID('6b2fdb17-3735-41a8-b90f-3feed041f6f0'), 1: UUID('cf5bc7ae-5adf-4381-b731-69c2f011abf1'), 2: UUID('92858183-e581-409e-bd9c-868b18679eca'), 3: UUID('b3273ca8-3456-4f39-87fb-7dfc748cde40'), 4: UUID('6793257c-dbb7-4685-8fc0-5084c4504302'), 5: UUID('e62b7ac6-0cc2-476b-9afd-f15270364e22'), 6: UUID('eca9c5c4-7344-4a55-8623-84a9ffe91664'), 7: UUID('0f8f5f86-4845-4a0d-b001-9aeef816b950'), 8: UUID('9abd45b7-ae81-483b-a5b5-6698383d750e'), 9: UUID('8

In [65]:
response = questions.query.near_text(
    query="biology",
    limit=2
)

for obj in response.objects:
    print(json.dumps(obj.properties, indent=2))

client.close() 

{
  "answer": "the atmosphere",
  "question_text": "Changes in the tropospheric layer of this are what gives us weather",
  "question": null,
  "category": "SCIENCE"
}
{
  "answer": "wire",
  "question_text": "A metal that is ductile can be pulled into this while cold & under pressure",
  "question": null,
  "category": "SCIENCE"
}


In [75]:
for item in response.objects:
    print(item.properties['question_text'])

Changes in the tropospheric layer of this are what gives us weather
A metal that is ductile can be pulled into this while cold & under pressure


In [76]:
" ".join([item.properties['question_text'] for item in response.objects])

'Changes in the tropospheric layer of this are what gives us weather A metal that is ductile can be pulled into this while cold & under pressure'

In [39]:
#questions = client.collections.get("Question")

with questions.batch.dynamic() as batch:
    for d in data:
        batch.add_object({
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
            
        }, {"vector": d["vector"]})
        if batch.number_errors > 10:
            print("Batch import stopped due to excessive errors.")
            break

failed_objects = questions.batch.failed_objects
if failed_objects:
    print(f"Number of failed imports: {len(failed_objects)}")
    print(f"First failed object: {failed_objects[0]}")

WeaviateBatchValidationError: Batch validation error: 4 validation errors for BatchObject
references.vector.str
  Input should be a valid string [type=string_type, input_value=EmbeddingsResponse(embedd..., -0.37761157751083374]), input_type=EmbeddingsResponse]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
references.vector.uuid
  UUID input should be a string, bytes or UUID object [type=uuid_type, input_value=EmbeddingsResponse(embedd..., -0.37761157751083374]), input_type=EmbeddingsResponse]
    For further information visit https://errors.pydantic.dev/2.10/v/uuid_type
references.vector.json-or-python[json=list[union[str,uuid]],python=chain[is-instance[Sequence],function-wrap[sequence_validator()]]]
  Input should be an instance of Sequence [type=is_instance_of, input_value=EmbeddingsResponse(embedd..., -0.37761157751083374]), input_type=EmbeddingsResponse]
    For further information visit https://errors.pydantic.dev/2.10/v/is_instance_of
references.vector.ReferenceToMulti
  Input should be a valid dictionary or instance of ReferenceToMulti [type=model_type, input_value=EmbeddingsResponse(embedd..., -0.37761157751083374]), input_type=EmbeddingsResponse]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type

In [50]:
#questions = client.collections.get("Question")

response = questions.query.near_text(
    query="biology",
    limit=2
)

WeaviateQueryError: Query call with protocol GRPC search failed with message <AioRpcError of RPC that terminated with:
	status = StatusCode.UNKNOWN
	details = "explorer: get class: vectorize params: could not vectorize input for collection Questions with search-type nearText. Make sure a vectorizer module is configured for this collection"
	debug_error_string = "UNKNOWN:Error received from peer  {created_time:"2025-02-24T01:14:56.508045+04:00", grpc_status:2, grpc_message:"explorer: get class: vectorize params: could not vectorize input for collection Questions with search-type nearText. Make sure a vectorizer module is configured for this collection"}"
>.

In [None]:
client.close()