## Install Dependencies and Libraries

In [None]:
pip install weaviate-client==4.5.4

In [None]:
pip install google-generativeai

In [None]:
pip install requests

In [8]:
import weaviate
import weaviate.classes.config as wvcc
from weaviate.classes.config import Property, DataType


import os
import json
import requests

from PIL import Image
from io import BytesIO
import google.generativeai as genai
from weaviate.util import generate_uuid5

## Connect to Weaviate Cluster

In [2]:
WEAVIATE_HTTP_URL = os.getenv("WEAVIATE_HTTP_URL", "34.30.108.185")
WEAVIATE_GRPC_URL = os.getenv("WEAVIATE_GRPC_URL", "34.133.233.221")
WEAVIATE_AUTH = os.getenv("WEAVIATE_AUTH", "next-demo349834")


client = weaviate.connect_to_custom(
        http_host=WEAVIATE_HTTP_URL,
        http_port="80",
        http_secure=False,
        grpc_host=WEAVIATE_GRPC_URL,
        grpc_port="50051",
        grpc_secure=False,
        auth_credentials=weaviate.auth.AuthApiKey(WEAVIATE_AUTH)
)

In [None]:
response = client.get_meta()
print(response)

{'hostname': 'http://[::]:8080', 'modules': {'generative-palm': {'documentationHref': 'https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts', 'name': 'Generative Search - Google PaLM'}, 'text2vec-palm': {'documentationHref': 'https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings', 'name': 'Google PaLM Module'}}, 'version': '1.24.3'}


## Create schema

In [15]:
# CAUTION: Running this will delete the collection along with the objects

# client.collections.delete_all()

Create the schema and configure the embedding and language models

**Note: Don't run the schema**

In [16]:
collection = client.collections.create(
    name="Products",
    vectorizer_config=wvcc.Configure.Vectorizer.text2vec_palm
    (
        project_id="next24-demo-bk",
        api_endpoint="generativelanguage.googleapis.com",
        model_id="embedding-gecko-001"
    ),
    generative_config=wvcc.Configure.Generative.palm(
        project_id="next24-demo-bk",
        api_endpoint="generativelanguage.googleapis.com",
        model_id="gemini-pro-vision"
    ),
    properties=[
            Property(name="product_id", data_type=DataType.TEXT),
            Property(name="title", data_type=DataType.TEXT),
            Property(name="category", data_type=DataType.TEXT),
            Property(name="link", data_type=DataType.TEXT),
            Property(name="description", data_type=DataType.TEXT),
            Property(name="brand", data_type=DataType.TEXT),
            Property(name="generated_description", data_type=DataType.TEXT),
      ]
)

## Import Objects

In [17]:
# Correct URL to the raw JSON file
url = 'https://raw.githubusercontent.com/bkauf/next-store/main/first_99_objects.json'
response = requests.get(url)

# Load the entire JSON content
data = json.loads(response.text)

In [18]:
data[0]

{'id': 'id_1',
 'product_id': 'GGOEGAYC135814',
 'title': 'Google Badge Tee',
 'category': 'Apparel  Accessories Tops  Tees Tshirts',
 'link': 'https://shop.googlemerchandisestore.com/store/20160512512/assets/items/images/GGOEGXXX1358.jpg',
 'description': 'A classic crew neck tee made from 100 cotton Its soft and comfortable and features a small Google logo on the chest',
 'color': "['Blue']",
 'gender': 'Unisex',
 'brand': 'Google'}

#### Upload to Weaviate

In [19]:
products = client.collections.get("Products")

for item in data:
  upload = products.data.insert(
      properties={
          "product_id": item['product_id'],
          "title": item['title'],
          "category": item['category'],
          "link": item['link'],
          "description": item['description'],
          "brand": item['brand']
      }
  )

In [20]:
products.query.fetch_objects(limit=1)

QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('0534a177-0bc1-4b00-90ed-8d8db4da0bb5'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'description': 'The Android Unisex Track Jacket is made of 100 polyester and features a black and white color scheme The jacket has a full zip front two front pockets and a ribbed collar cuffs and hem The jacket is also machinewashable', 'generated_description': None, 'product_id': 'GGOEAAEB120517', 'category': 'Jackets  Coats', 'link': 'https://shop.googlemerchandisestore.com/store/20160512512/assets/items/images/GGOEAXXX1205.jpg', 'title': 'Android Track Jacket', 'brand': 'Android'}, references=None, vector={}, collection='Products')])

In [21]:
# count how many objects are in the database

products = client.collections.get("Products")
response = products.aggregate.over_all(total_count=True)

print(response.total_count)

99


## Semantic Search
Pure vector search across our database

In [14]:
products = client.collections.get("Products")

response = products.query.near_text(
        query="travel mug",
        return_properties=["title", "description", "category"], # only return these 3 properties
        limit=3 # limit to 3 object
    )

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "title": "Google Los Angeles Mug",
  "description": "This is a blue mug with a white logo of Google Los Angeles on it",
  "category": "Drinkware"
}
{
  "title": "Google Campus Bike Mug",
  "description": "The Google Campus Bike Corkbase Mug is a blue mug with a cork bottom It features a design of a yellow bicycle with a basket on the front The mug is perfect for coffee tea or any other hot beverage",
  "category": "Home  Garden Mugs"
}
{
  "title": "Google Cloud Certified Professional Security Engineer Tumbler",
  "description": "The YETI Rambler 20 oz Tumbler is made of stainless steel and has a doublewall vacuum insulation to keep drinks cold for up to 24 hours and hot for up to 12 hours It is also dishwashersafe for easy cleaning The tumbler is silver and has the Google Cloud Certified logo on it",
  "category": "Drinkware"
}


## Filters

Example of vector search where category=Drinkware.

Adding a filter to a query can help narrow down the results for an exact match.

In [None]:
products = client.collections.get("Products")

response = products.query.near_text(
    query="travel cup",
    filters=wvc.query.Filter.by_property("category").equal("Drinkware"), # filter where the category is in Drinkware
    return_properties=["title", "description", "category"],
    limit=3
)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "description": "The SPEAR Vacuum Insulated Tumbler is made of stainless steel and has a capacity of 16 ounces It is white and has a blue and purple SPEAR logo on it",
  "category": "Drinkware Tumblers",
  "title": "SPEAR Vacuum Insulated Tumbler"
}
{
  "category": "Drinkware",
  "description": "The YETI Rambler 20 oz Tumbler is made of stainless steel and has a doublewall vacuum insulation to keep drinks cold for up to 24 hours and hot for up to 12 hours It is also dishwashersafe for easy cleaning The tumbler is silver and has the Google Cloud Certified logo on it",
  "title": "Google Cloud Certified Professional Security Engineer Tumbler"
}
{
  "category": "Drinkware",
  "title": "Create Design Code Build Cork Bottom Tumbler",
  "description": "This black tumbler is made of stainless steel and has a cork bottom It is perfect for keeping your drinks hot or cold for hours The tumbler is also dishwasher safe for easy cleaning"
}


#### Retrieve the objects by the filter without a semantic or keyword search query

The `like` operator narrows down results based on partial matches.

By adding `*` we are saying there is one or more unknown characters. In the query below, when we search for `*bag*` we are saying this can match with `bags`, `baggage`, etc.

In [None]:
products = client.collections.get("Products")

response = products.query.fetch_objects(
    filters=wvc.query.Filter.by_property("category").like("*bag*"),
    return_properties=["title", "description", "category"],
    limit=3
)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "description": "The Timbuk2 Spire Messenger Bag is a stylish and functional bag perfect for carrying your laptop and other essentials It features a spacious main compartment and a front pocket for smaller items The bag is made from highquality materials and is durable and waterresistant It is also comfortable to carry with a padded shoulder strap and a trolley sleeve",
  "title": "Google Cloud Certified Hybrid MultiCloud Professional Messenger Bag",
  "category": "Luggage  Bags Briefcases"
}
{
  "title": "Google Art Tote Bag",
  "description": "The Google Art Tote Bag is made of 100 cotton canvas It is a natural white color with a multicolored design of mountains waves and the Google logo",
  "category": "Bags Tote Bags"
}
{
  "description": "The Timbuk2 Incognito Messenger Bag is a great way to carry your laptop and other essentials Its made from durable materials and features a comfortable shoulder strap The bag is also waterresistant so you can be sure your belongings will stay 

## Generative Feedback Loops


### Import libraries

In [None]:
from PIL import Image
from io import BytesIO
import google.generativeai as genai
from weaviate.util import generate_uuid5

### Connect and configure model

In [None]:
genai.configure(api_key='AIzaSyBsIVYgFBmynVqn4d5SGGbHZ36mXzhjaa4') # will remove and create .env file

model = genai.GenerativeModel(model_name='gemini-pro-vision')

### Convert image links into PIL object

In [None]:
def url_to_pil(url):
    response = requests.get(url)

    return Image.open(BytesIO(response.content))

### Generate a description for each object in the database and store it in the `generated_description` property

In [None]:
products = client.collections.get("Products")
data = products.query.fetch_objects()

for item in data:
    if "link" in item.properties:
        id = item.uuid
        img_url = item.properties["link"]

        pil_image = url_to_pil(img_url)

        generated_description = model.generate_content(["Write a description about this product. Please include the company name and other important features.", pil_image])
        generated_description = generated_description.text
        print(generated_description)

        # Update the product with the generated description
        products.data.update(uuid=id, properties={"generated_description": generated_description})

 This is a shirt from Google's official merchandise store. It is a dark green shirt with the word "Seattle" written on it in white. The word "Seattle" is surrounded by a circle, and there is a picture of a mountain and a tree inside the circle. The words "Sea" and "Washington" are also written on the shirt. The shirt is made of 100% cotton and is available in sizes S-XL.
 This is a set of three patches from Google's official merchandise store. The patches are made of embroidered fabric and feature the Google logo, the MTV logo, and a dinosaur skull. The patches can be ironed onto any fabric and are a great way to show your love for Google and MTV.
 This is a Google t-shirt. It is made of 100% cotton and is available in a variety of colors. The shirt has a relaxed fit and features the Google logo on the front. The shirt is perfect for everyday wear or for showing your love of Google.
 This is a Google Cloud Certification swag kit that includes a leatherette folio and a laptop sleeve. Th

## Semantic Search on the `generated_description` property

In [None]:
data = client.collections.get("Products")

response = data.query.near_text(
        query="gray crewneck",
        return_properties=["generated_description", "title"],
        limit=5
    )

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "title": "Android White Heather Crewneck Sweatshirt",
  "generated_description": " This is a men's sweatshirt from Google. It is made of 100% cotton and features a crew neck and a ribbed hem and cuffs. The sweatshirt is a heather gray color with a white Android logo on the chest."
}
{
  "title": "Unisex Waze Logo Hoodie",
  "generated_description": " This is a Waze branded hoodie. It is a heather gray color with a white Waze logo on the front. The hoodie is made of a soft and comfortable cotton blend fabric. It has a relaxed fit and is perfect for everyday wear."
}
{
  "generated_description": " This is a heather gray zip-up hoodie from Google. It has a black drawstring hood and black zipper. The front of the hoodie has a white logo with the words \"Mountain View, California\" in a circle. The left sleeve has a small black Google logo. The hoodie is made of 80% cotton and 20% polyester. It is machine-washable and tumble dry low.",
  "title": "Google Mountain View Campus Zip Hoodie"