In [5]:
import csv
import json
import random
import openai
import time
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import edgedb

In [6]:
COLLECTION_NAME = 'reviews'  # Collection name
DIMENSION = 1536  # Embeddings size
MILVUS_HOST = 'localhost'  # Milvus server URI
MILVUS_PORT = '19530'
OPENAI_ENGINE = 'text-embedding-ada-002'  # Which engine to use
openai.api_key = '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'  # Use your own Open AI API Key here
QUERY_PARAM = {
    "metric_type": "L2",
    "params": {"ef": 64},
}

In [7]:
connections.connect(host=MILVUS_HOST, port=MILVUS_PORT)

if utility.has_collection(COLLECTION_NAME):
    utility.drop_collection(COLLECTION_NAME)

fields = [
    FieldSchema(name='id', dtype=DataType.VARCHAR, description='Ids', is_primary=True, auto_id=False,max_length=64000),
    FieldSchema(name='Business_name', dtype=DataType.VARCHAR, description='Business_name',max_length=64000),
    FieldSchema(name='zipcode', dtype=DataType.VARCHAR, description='Zipcode', max_length=64000),
    FieldSchema(name='business_community', dtype=DataType.VARCHAR, description='Business Community', max_length=64000),
    FieldSchema(name='rating', dtype=DataType.FLOAT, description='Rating'),
    FieldSchema(name='review', dtype=DataType.VARCHAR, description='Review',max_length=64000),
    FieldSchema(name='date_reviewed', dtype=DataType.VARCHAR,description='Date of the review' ,max_length=64000),
    FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='Embedding vectors', dim=DIMENSION)
]
schema = CollectionSchema(fields=fields, description='Review collection')
collection = Collection(name=COLLECTION_NAME, schema=schema)

index_params = {
    'index_type': 'IVF_FLAT',
    'metric_type': 'L2',
    'params': {'nlist': 1024}
}
collection.create_index(field_name="embedding", index_params=index_params)
collection.load()

In [8]:
client = edgedb.create_client()
def fetch_reviews():
    query = """
        SELECT Review {
            id,
            rating,
            review,
            review_date,
            reviews_business: {
            ID,
            name,
            in_communityArea:{
              name
            },
              has_address: {
                in_zipcode: {
                    digits
                },
            },

            },  
        };
    """
    result = client.query(query)
    return result

In [9]:
result=fetch_reviews()

In [97]:
result[0]

Object{id := UUID('aac48a54-876c-11ee-aa6d-7f5b84f7eff7'), rating := 5.0, review := "The Staff is GREAT!\nThe Food is outstanding!\nI'd recommend a visit for a Lunch and Try there daily Homemade Soups! Yummmy!", review_date := '2022-10-06', reviews_business := Object{ID := 'jkcd702I7p0V41mz8sVVDA', name := 'Links Grille', in_communityArea := Object{name := 'Chicago'}, has_address := Object{in_zipcode := Object{digits := '60544'}}}}

In [18]:
# Fetch embeddings for each Review to insert into Milvus
from openai import OpenAI
openAiClient = OpenAI(api_key='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')
def embed(text_to_embed):
    # Embed a line of text
    response = openAiClient.embeddings.create(input=text_to_embed, model=OPENAI_ENGINE)

    # Return the list of embeddings
    return response.data[0].embedding


In [19]:
from tqdm import tqdm
data = [[],[],[],[],[],[],[],[]]
BATCH_SIZE=100
for i in tqdm(range(0,len(result))):
    if  result[i]!=None and result[i].reviews_business!=None and result[i].review!='' and result[i].reviews_business.has_address!=None and result[i].reviews_business.in_communityArea!=None and result[i].reviews_business.has_address.in_zipcode!=None:
        data[0].append(result[i].reviews_business.ID)
        data[1].append(result[i].reviews_business.name or '')
        data[2].append(result[i].reviews_business.has_address.in_zipcode.digits or '')
        data[3].append(result[i].reviews_business.in_communityArea.name or '')
        data[4].append(result[i].rating)
        data[5].append(result[i].review or '')
        data[6].append(result[i].review_date or '')
        data[7].append(embed(data[5]))
    if len(data[0])%BATCH_SIZE==0:
#         data[6].append(embed(data[4]))
        collection.insert(data)
        data = [[],[],[],[],[],[],[],[]]
    
if len(data[0])!=0:
#        data[6].append(embed(data[4]))
       collection.insert(data)
       data = [[],[],[],[],[],[],[],[]]


100%|███████████████████████████████████████| 5720/5720 [53:01<00:00,  1.80it/s]


In [90]:
# Filtered Search Function
def query(query, top_k = 5):
    text, expr= query
    res = collection.search([embed(text)], anns_field='embedding', expr=expr, param=QUERY_PARAM, limit = top_k, output_fields=['id', 'zipcode', 'business_community', 'rating', 'review', 'date_reviewed',"Business_name"])
    for i, hit in enumerate(res):
        print(f'\nDisplaying Top {top_k} Results for query "{text}"')
        for ii, hits in enumerate(hit):
            print('\t' + 'Rank:', ii + 1, '| Score:', hits.score)
            print('\t\t' + '  Review:', hits.entity.get('review'))
            print('\t\t' + '  Business_name:', hits.entity.get('Business_name'))
            print('\t\t' + '  Community Area:', hits.entity.get('business_community'))
            print('\t\t' + '  Rating:', hits.entity.get('rating'))
            print('\t\t' + '  Date of review:', hits.entity.get('date_reviewed'))
            print("\n")
            
            
    
            


In [91]:
# “Chicago Style Hotdogs” reviews for the date between 10/29/2023 and 11/5/2023.

query(('Chicago Style Hotdogs','date_reviewed<"2023-11-05" and date_reviewed>"2023-10-29" '), top_k=5)


Displaying Top 5 Results for query "Chicago Style Hotdogs"
	Rank: 1 | Score: 0.3564627766609192
		  Review: DoorDash and customer service disaster. 

DO NOT ORDER DOORDASH HERE!!! 

I experienced the most horrific lunch here. I placed a DoorDash group order with...
		  Business_name: Cafecito
		  Community Area: Chicago
		  Rating: 1.0
		  Date of review: 2023-11-01


	Rank: 2 | Score: 0.3567294180393219
		  Review: I came here for brunch and ordered the Milk Bread French Toast and cold brew. The cold brew was very sour but after a packet of sugar and some cream it was...
		  Business_name: Cherry Circle Room
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-10-31


	Rank: 3 | Score: 0.3567294180393219
		  Review: Found this place on Yelp when I was headed to the airport. Perfect spot to pick up some treats before leaving Chi Town! I love a good Taiwanese bakery,...
		  Business_name: Sweet Bean
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-10

In [95]:
# Top businesses that offer Steak in Community Area The Loop.
query(('Steak', 'business_community like \"Chicago%\"'), top_k=5)


Displaying Top 5 Results for query "Steak"
	Rank: 1 | Score: 0.3896405100822449
		  Review: Papa Fresco's never fails to satisfy. It is our go to pizza place! Pizza is always delivered promptly, still hot and always tastes delicious. It is a great...
		  Business_name: Papa Fresco's
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2018-12-27


	Rank: 2 | Score: 0.3896405100822449
		  Review: This is the best family owned restaurant anywhere. The food is great. They make awesome homemade biscuits and serve them with homemade jam.

My wife loves...
		  Business_name: Harner's Bakery And  Restaurant
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-05-24


	Rank: 3 | Score: 0.3896405100822449
		  Review: The food tastes fabulous - that is...the food that IS included in your order. We drove almost 35 - 40 minutes because my husband wanted rib tips. We ate at...
		  Business_name: Sharko's BBQ
		  Community Area: Chicago
		  Rating: 3.0
		  Date of review:

In [96]:
#Top businesses that offer Hot Dogs in Community Area Lincoln Park
query(('Hot Dogs', 'business_community like \"Lincoln Park%\"'), top_k=5)


Displaying Top 5 Results for query "Hot Dogs"
	Rank: 1 | Score: 0.40317001938819885
		  Review: Sad to say that this stretch of road has been the scene of many traffic crashes over the past few years especially during the overnight hours whereas...
		  Business_name: Jean Baptiste Pointe DuSable
		  Community Area: Lincoln Park
		  Rating: 1.0
		  Date of review: 2023-07-19


	Rank: 2 | Score: 0.40322357416152954
		  Review: I stumbled across Kitchen Keto after starting my new keto diet, and might I say, it is one of my favorite kitchens in Chicago now. They make being on a keto...
		  Business_name: Kitchen Keto
		  Community Area: Lincoln Park
		  Rating: 5.0
		  Date of review: 2022-03-01


	Rank: 3 | Score: 0.40324392914772034
		  Review: Fresh pipping hot delicious wood fired pizza made with the uber fresh ingredients from the green city market. 

Beyond the more traditional varieties (black...
		  Business_name: Nomad Pizza
		  Community Area: Lincoln Park
		  Rating: 4.0
		  Da

In [94]:
# Top businesses that offer Chicago Hot Dogs in Zip-Code 60605 and have ratings greater than 3 stars.
query(('Hot Dogs', 'zipcode=="60605" and rating>3'), top_k=5)


Displaying Top 5 Results for query "Hot Dogs"
	Rank: 1 | Score: 0.4045543372631073
		  Review: This is part of South Loop Elementary and it is awesome for kids in the neighborhood on weekends. There is parking in the school parking lot, a big kid...
		  Business_name: Mary Richardson Jones Park
		  Community Area: Near South Side
		  Rating: 5.0
		  Date of review: 2017-04-09


	Rank: 2 | Score: 0.4045543372631073
		  Review: Gorgeous fountain. 
Honestly visited the fountain because of the show Married with Children, as I'm sure many have.  We had no idea the grandiose the...
		  Business_name: Buckingham Fountain
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-11-16


	Rank: 3 | Score: 0.40473973751068115
		  Review: Date of visit: July 23, 2023

We were staying a block away at the Hilton Chicago and needed a place to grab dinner before seeing Beyoncé over at Soldier...
		  Business_name: Burger Alley Chicago
		  Community Area: Chicago
		  Rating: 4.0
		  Date o