### Create Dataset

In [None]:
# list of dictionaries 
my_dict = [{"name": "Apple","color": "red"},
           {"name":"Blueberries", "color": "blue"}, 
           {"name": "Strawberries", "color": "red"}, 
           {"name": "Banana", "color": "yellow"}, 
           {"name": "Oranges", "color": "orange"},
           {"name": "Pineapple", "color": "yellow"}]

In [None]:
my_dict

[{'color': 'red', 'name': 'Apple'},
 {'color': 'blue', 'name': 'Blueberries'},
 {'color': 'red', 'name': 'Strawberries'},
 {'color': 'yellow', 'name': 'Banana'},
 {'color': 'orange', 'name': 'Oranges'},
 {'color': 'yellow', 'name': 'Pineapple'}]

### Create Weaviate Instance 

In [None]:
!pip install weaviate-client==3.4.2 > /dev/null

[Website](https://console.semi.technology/) to create Weaviate account

In [None]:
# create Weaviate instance
from getpass import getpass
import weaviate
from weaviate.wcs import WCS

my_credentials = weaviate.auth.AuthClientPassword(username=input("User name: "), password=getpass("Password: "))

User name: erikacardenas300@gmail.com
Password: ··········


In [None]:
my_wcs = WCS(my_credentials)
cluster_name = "weaviate-cloud-example" #name of the cluster, don't capitalize and no underscore
modules = [{
    "name": "text2vec-transformers", # pre-trained language transformer model as a Weaviate vectorization module
    "tag": "sentence-transformers-paraphrase-MiniLM-L6-v2" # recommendation for best accuracy/speed tradeoff
}]
weaviate_url = my_wcs.create(cluster_name, with_auth=False, modules=modules, wait_for_completion=True)
client = weaviate.Client(weaviate_url)
client.is_ready()

True

### Create Weaviate Schema 

In [None]:
weaviate_schema = {
      "classes": [
                  {
                      "class": "Fruit",
                      "description": "One of many fruits",
                      "properties": [
                                     {
                                         "name": "name",
                                         "dataType": ["text"],
                                         "description": "Name of the fruit",
                                         "moduleConfig": {
                                             "text2vec-transformers": {
                                                 "skip": False,
                                                 "vectorizePropertyName": False
                                             }
                                         }
                                     },
                                     {
                                      "name": "color",
                                         "dataType": ["text"],
                                         "description": "Color of the fruit",
                                          "moduleConfig": {
                                             "text2vec-transformers": {
                                                 "skip": False,
                                                 "vectorizePropertyName": False
                                             }
                                             }
                                     }
                                     ]
                  }
      ]
}                                   

In [None]:
from weaviate.batch import Batch # class for handling data upload
from weaviate.util import generate_uuid5 # generate a unique id for each object

client.batch.configure(batch_size=5, dynamic=True) # good hyperparameter to be aware of - check batch size to debug

def add_fruit(batch: Batch, data: dict) -> str: # adding the data objects to the batch 
  fruit_object = {
      "name": data["name"],
      "color": data["color"]
  }
  fruit_id = data["id"] # needs a unique id
  batch.add_data_object(
      data_object = fruit_object,
      class_name = "Fruit",
      uuid = fruit_id
  )

with client.batch as batch: # connect to cloud
  for idx in range(len(my_dict)):
    id = generate_uuid5(idx)
    new_data_object = my_dict[idx]
    new_data_object["id"] = id
    add_fruit(batch, new_data_object)

### GraphQL in Python Examples

In [None]:
graphQL_query = """
{
  Get{
    Fruit (
      nearText: {
        concepts: ["Breakfast"]
      }
    ) {
      name
    }
  }
}
"""

client.query.raw(graphQL_query)["data"]["Get"]["Fruit"]

[{'name': 'Strawberries'},
 {'name': 'Oranges'},
 {'name': 'Apple'},
 {'name': 'Banana'},
 {'name': 'Blueberries'},
 {'name': 'Pineapple'}]

In [None]:
graphQL_query = """
{
  Get{
    Fruit (
      nearText: {
        concepts: ["Summer"]
      }
    ) {
      name
    }
  }
}
"""

client.query.raw(graphQL_query)["data"]["Get"]["Fruit"]

[{'name': 'Banana'},
 {'name': 'Strawberries'},
 {'name': 'Blueberries'},
 {'name': 'Oranges'},
 {'name': 'Apple'},
 {'name': 'Pineapple'}]

In [None]:
graphQL_query = """
{
  Get{
    Fruit (
      nearText: {
        concepts: ["Yellow"]
      }
      limit:2
    ) {
      name
    }
  }
}
"""

client.query.raw(graphQL_query)["data"]["Get"]["Fruit"]

[{'name': 'Banana'}, {'name': 'Pineapple'}]