## Challenge 5

1. Write a query that extracts 3 animal-related questions from the database, print these questions out
2. Use the with_generate query to provide these questions to an LLM and get it to answer these questions
3. See how many of these questions the LLM got correct by printing out the correct answer from the database!


In [1]:
import requests
import json
#
# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data
    
def json_print(data):
    print(json.dumps(data, indent=2))
    
json_print(data)

[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  },
  {
    "Category": "ANIMALS",
    "Question": "The gavial looks very much like a crocodile except for this bodily feature",
    "Answer": "the nose or snout"
  },
  {
    "Category": "ANIMALS",
    "Question": "Weighing around a ton, the eland is the largest species of this animal in Africa",
    "Answer": "Antelope"
  },
  {
    "Category": "ANIMALS",
    "Question": "Heaviest of all poisonous snakes is this North American rattlesnake",
    "Answer": "the diamondback rattler"
  },
  {
    "Category": "SCIENCE",
    "Question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification",
    "Answer": "species"
  },
  {
    "Category": "SCIENCE",
   

In [2]:
import weaviate
from weaviate import EmbeddedOptions
import os

client = weaviate.Client(
    embedded_options=EmbeddedOptions(),
    additional_headers={
        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]  # Replace this with your actual key
    }
)

embedded weaviate is already listening on port 6666


In [3]:
if client.schema.exists("Question"):
    client.schema.delete_class("Question")

In [4]:
class_obj = {
    "class": "Question",
    "vectorizer": "text2vec-openai",  # If set to "none" you must always provide vectors yourself. Could be any other "text2vec-*" also.
}

client.schema.create_class(class_obj)

In [5]:
with client.batch.configure() as batch:
    for i, d in enumerate(data):  # Batch import data
        
        print(f"importing question: {i+1}")
        
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }
        
        batch.add_data_object(
            data_object=properties,
            class_name="Question"
        )

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
importing question: 6
importing question: 7
importing question: 8
importing question: 9
importing question: 10


In [6]:
json_print(client.query.aggregate('Question').with_meta_count().do())

{
  "data": {
    "Aggregate": {
      "Question": [
        {
          "meta": {
            "count": 10
          }
        }
      ]
    }
  }
}


### Q1. Write a query that extracts 3 animal-related questions from the database, print these questions out

In [8]:
response = (client.query
            .get("Question",'question')
            .with_near_text({"concepts" :"animals"})
            .with_limit(3)
            .do()
           )

json_print(response)

{
  "data": {
    "Get": {
      "Question": [
        {
          "question": "It's the only living mammal in the order Proboseidea"
        },
        {
          "question": "The gavial looks very much like a crocodile except for this bodily feature"
        },
        {
          "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
        }
      ]
    }
  }
}


### 2. Use the with_generate query to provide these questions to an LLM and get it to answer these questions

In [9]:
prompt = "Answer the Jeopardy question: {question}. If you don't know the answer say I don't know."

response = (client.query
            .get("Question",'question')
            .with_near_text({"concepts":"animals"})
            .with_generate(single_prompt=prompt)
            .with_limit(3)
            .do()

)

json_print(response)

{
  "data": {
    "Get": {
      "Question": [
        {
          "_additional": {
            "generate": {
              "error": null,
              "singleResult": "What is the elephant?"
            }
          },
          "question": "It's the only living mammal in the order Proboseidea"
        },
        {
          "_additional": {
            "generate": {
              "error": null,
              "singleResult": "What is its long, narrow snout?"
            }
          },
          "question": "The gavial looks very much like a crocodile except for this bodily feature"
        },
        {
          "_additional": {
            "generate": {
              "error": null,
              "singleResult": "What is antelope?"
            }
          },
          "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
        }
      ]
    }
  }
}


### 3. See how many of these questions the LLM got correct by printing out the correct answer from the database!

In [10]:
response = (client.query
            .get("Question",['question','answer'])
            .with_near_text({"concepts" :"animals"})
            .with_limit(3)
            .do()
           )

json_print(response)

{
  "data": {
    "Get": {
      "Question": [
        {
          "answer": "Elephant",
          "question": "It's the only living mammal in the order Proboseidea"
        },
        {
          "answer": "the nose or snout",
          "question": "The gavial looks very much like a crocodile except for this bodily feature"
        },
        {
          "answer": "Antelope",
          "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
        }
      ]
    }
  }
}
