In [1]:
from getpass import getpass # hide password
import weaviate # to communicate to the Weaviate instance
from weaviate.wcs import WCS

In [2]:
weaviate_url = 'https://demo-cluster-22.semi.network'
weaviate_url

'https://demo-cluster-22.semi.network'

In [3]:
client = weaviate.Client(weaviate_url)

In [4]:
client.is_ready()

True

In [5]:
import pandas as pd
df = pd.read_csv('zomato.csv')

In [6]:
df.head()

Unnamed: 0,head,URL,Cuisines,PhoneNumber,AverageCost
0,Sri Udupi Park,https://www.zomato.com/bangalore/sri-udupi-par...,"South Indian, North Indian, Chinese, Street Fo...",920000000000.0,450
1,Meghana Foods,https://www.zomato.com/bangalore/meghana-foods...,"Biryani, Andhra, North Indian, Seafood",918000000000.0,700
2,Donne Biriyani House,https://www.zomato.com/bangalore/donne-biriyan...,Biryani,919000000000.0,300
3,Domino's Pizza,https://www.zomato.com/bangalore/dominos-pizza...,"Pizza, Fast Food, Desserts",920000000000.0,400
4,KFC,https://www.zomato.com/bangalore/kfc-indiranagar,"Burger, Fast Food, Biryani, Desserts, Beverages",920000000000.0,400


In [7]:
import uuid
def generate_uuid(class_name: str, identifier: str,
                  test: str = 'teststrong') -> str:
    test = 'overwritten'
    return str(uuid.uuid5(uuid.NAMESPACE_DNS, class_name + identifier))

In [8]:
client.schema.delete_all()
schema = {
    "classes": [
        {
            "class": "Restaurant",
            "description": "A Restaurant which is reviewed",
            "properties": [
                {
                    "name": "head",
                    "description": "The name of the restaurant",
                    "dataType": ["text"]
                }, {
                    "name": "some",
                    "description": "Website URL on zomato app",
                    "dataType": ["text"]
                }, {
                    "name": "thing",
                    "description": "Famous for which Cuisine",
                    "dataType": ["string"]
                }, {
                    "name": "call",
                    "description": "Phone Number",
                    "dataType": ["phoneNumber"]
                }, {
                    "name": "money",
                    "description": "The Average cost",
                    "dataType": ["number"]
                }
                
            ]
      
        }
    ]
}

client.schema.create(schema)

In [9]:
import datetime
def log(i: str) -> str:
    now = datetime.datetime.utcnow()
    print(now, "| " + str(i))

In [10]:
import json
# save schema to file
with open('schema.json', 'w') as outfile: 
    json.dump(schema, outfile)
# remove current schema from Weaviate, removes all the data too
client.schema.delete_all()
# import schema using file path
client.schema.create('schema.json')
# print schema
print(json.dumps(client.schema.get(), indent=2))

{
  "classes": [
    {
      "class": "Restaurant",
      "description": "A Restaurant which is reviewed",
      "invertedIndexConfig": {
        "cleanupIntervalSeconds": 60
      },
      "moduleConfig": {
        "text2vec-contextionary": {
          "vectorizeClassName": true
        }
      },
      "properties": [
        {
          "dataType": [
            "text"
          ],
          "description": "The name of the restaurant",
          "moduleConfig": {
            "text2vec-contextionary": {
              "skip": false,
              "vectorizePropertyName": false
            }
          },
          "name": "head"
        },
        {
          "dataType": [
            "text"
          ],
          "description": "Website URL on zomato app",
          "moduleConfig": {
            "text2vec-contextionary": {
              "skip": false,
              "vectorizePropertyName": false
            }
          },
          "name": "some"
        },
        {
          "dataTy

In [11]:
def add_res(data, batch_size=512, debug_mode=False): 
    no_items_in_batch = 0

    for index, row in data.iterrows():
        res_object = {
            "head": row["head"] + '.',
            "link": row["URL"],
            "bestFood": row["Cuisines"],
            "mobileNo": row["PhoneNumber"],
            "price": row["AverageCost"]
        }

        res_uuid = generate_uuid('wine', row["head"]+row["URL"]+row['Cuisines']+str(row['PhoneNumber'])+str(row['AverageCost']))

        client.batch.add_data_object(res_object, "Restaurant", res_uuid)
        no_items_in_batch += 1

        if no_items_in_batch >= batch_size:
            results = client.batch.create_objects()
            
            if debug_mode:
                for result in results:
                    if result['result'] != {}:
                        log(result['result'])

                message = str(index) + ' / ' + str(data.shape[0]) +  ' items imported'
                log(message)

            no_items_in_batch = 0

    client.batch.create_objects()

In [12]:
add_res(df.head(2500), batch_size=500, debug_mode=True)

2022-03-28 03:58:44.025265 | 499 / 2500 items imported
2022-03-28 03:58:48.355587 | 999 / 2500 items imported
2022-03-28 03:58:52.042157 | 1499 / 2500 items imported
2022-03-28 03:58:56.199518 | 1999 / 2500 items imported
2022-03-28 03:58:58.995467 | 2499 / 2500 items imported


In [13]:
result = client.query.get(class_name='Restaurant', properties="head")\
    .do()

In [14]:
result

{'data': {'Get': {'Restaurant': [{'head': 'Amritam Super Foods.'},
    {'head': 'Call Me Chow.'},
    {'head': 'Desi Dhaba.'},
    {'head': 'Apsara Ice Creams.'},
    {'head': 'Shanmukha.'},
    {'head': 'Thalassery Restaurant.'},
    {'head': 'Falahaar & Kota Kachori.'},
    {'head': 'Sattvam.'},
    {'head': "Andhra Ruchi's Restaurant."},
    {'head': 'Kavuni Kitchen.'},
    {'head': 'Nellore Vaari Andhra Bhojanam And Biryanis.'},
    {'head': 'Momo Point.'},
    {'head': 'Burger Seigneur.'},
    {'head': 'MAA Products Restaurant.'},
    {'head': 'Sundae Everyday Ice Creams.'},
    {'head': 'Paratha Plaza.'},
    {'head': 'The Biryani Zone - By Kouzina.'},
    {'head': 'Ledikeni Sweets & Savouries.'},
    {'head': 'Third Wave Coffee.'},
    {'head': 'The Pizza Bakery - All Day Eatery.'},
    {'head': 'S.M.S.Evening Fast Food.'},
    {'head': 'EasyBites by Empire.'},
    {'head': 'Mainland China.'},
    {'head': 'Aubree.'},
    {'head': 'Fattoush.'},
    {'head': 'The Dessert Zone.'},

In [15]:
client.query.get(class_name='Restaurant', properties="bestFood")\
    .with_limit(10)\
    .with_near_text({'concepts': ['Pizza']})\
    .do()

{'data': {'Get': {'Restaurant': [{'bestFood': 'Pizza, Pasta, Italian, Continental, Fast Food, Salad, Desserts, Beverages'},
    {'bestFood': 'Pizza, Pasta, Italian, Continental, Fast Food, Salad, Desserts, Beverages'},
    {'bestFood': 'Pizza, Burger, Pasta, Fast Food'},
    {'bestFood': 'Pizza, Desserts'},
    {'bestFood': 'Pizza, Fast Food, Desserts, Beverages'},
    {'bestFood': 'Pizza, Pasta, Italian, Continental, Fast Food, Salad, Desserts, Beverages'},
    {'bestFood': 'Pizza, Fast Food, Desserts, Beverages'},
    {'bestFood': 'Pizza, Fast Food, Desserts, Beverages'},
    {'bestFood': 'Pizza, Fast Food, Desserts, Beverages'},
    {'bestFood': 'Pizza, Fast Food, Beverages, Desserts'}]}}}

In [17]:
client.query.get(class_name='Restaurant', properties="head")\
    .with_limit(5)\
    .with_near_text({'concepts': ['Burger']})\
    .do()

{'data': {'Get': {'Restaurant': [{'head': 'Burger King.'},
    {'head': 'Burger King.'},
    {'head': 'Burger King.'},
    {'head': 'Boss Burger.'},
    {'head': 'Meat and Eat.'}]}}}