In [2]:
import weaviate

In [60]:
client = weaviate.connect_to_local()
print(client.is_connected())

True


In [65]:
client.collections.delete_all()
client.collections.list_all()

{}

In [66]:
import pandas as pd
amenities  = pd.read_json("amenities.json")
print(amenities.dtypes)
amenities =  amenities.to_dict(orient = 'records')


idAmenity       int64
description    object
nameAmenity    object
categories     object
price          object
dtype: object


In [67]:
from weaviate.classes.config import Property, DataType, Configure
from weaviate.util import get_valid_uuid


client.collections.create(
    name = 'amenities_collection',
    description='this collection stores amenities information like description, name, price, and category',
    vectorizer_config = Configure.Vectorizer.text2vec_ollama(model="llama3.2:latest", api_endpoint="http://host.docker.internal:11434"),
    properties = [
        # Property(name='idAmenity',data_type=DataType.TEXT),
        Property(name='nameAmenity',data_type=DataType.TEXT),
        # Property(name='description',data_type=DataType.TEXT),
        # Property(name='price',data_type=DataType.TEXT),
        Property(name='category',data_type=DataType.TEXT)
    ]

)

<weaviate.collections.collection.sync.Collection at 0x27fd18bfa30>

In [57]:
client.collections.list_all()   



{'Amenities_collection': _CollectionConfigSimple(name='Amenities_collection', description='this collection stores amenities information like description, name, price, and category', generative_config=None, properties=[_Property(name='nameAmenity', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=True), vectorizer='text2vec-ollama'), _Property(name='description', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=True), vectorizer='text2vec-ollama'), _Property(name='category', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True,

In [68]:
from uuid import uuid4  

collection = client.collections.get('amenities_collection')

for amenity in amenities:

    properties = {
                    # "idAmenity": str(amenity['idAmenity']).lower(),
                    "nameAmenity": amenity['nameAmenity'].lower(),
                    # "description":amenity['description'].lower() ,
                    # "price": amenity['price'].lower(),
                    "category":str(amenity['categories']).lower() 
                }
    id = get_valid_uuid(uuid4())
    row = collection.data.insert(uuid = id, properties = properties)
    print(properties)
    

{'nameAmenity': 'alarm clock', 'category': "['room amenities', 'convenience']"}
{'nameAmenity': 'cleaning studio', 'category': "['cleaning services', 'room maintenance']"}
{'nameAmenity': 'cleaning suite', 'category': "['cleaning services', 'room maintenance']"}
{'nameAmenity': 'scooter rental', 'category': "['transportation', 'leisure activities']"}
{'nameAmenity': 'wifi', 'category': "['internet services', 'business amenities']"}
{'nameAmenity': 'view', 'category': "['room amenities', 'scenic views']"}
{'nameAmenity': 'smoking', 'category': "['room amenities', 'health & safety']"}
{'nameAmenity': 'free coffee', 'category': "['dining services', 'convenience']"}
{'nameAmenity': 'tv 55', 'category': "['room amenities', 'entertainment']"}
{'nameAmenity': 'baby cot', 'category': "['family services', 'room amenities']"}
{'nameAmenity': 'breakfast', 'category': "['dining services', 'meal options']"}
{'nameAmenity': 'early check-in', 'category': "['check-in services', 'guest convenience']"}


In [37]:
from gramformer import Gramformer
corrector = Gramformer(models=1, use_gpu=True)
highlighter = Gramformer(models=2, use_gpu=True)



[Gramformer] Grammar error correct/highlight model loaded..
TO BE IMPLEMENTED!!!


In [49]:
s = 'i am goiing to store'
correct = corrector.correct(s)
print(str(correct))
highlight = highlighter.highlight(str(s), correct[0])
print(highlight)

{'i am going to store.'}


TypeError: 'set' object is not subscriptable

In [36]:
from spellchecker import SpellChecker

spell_checker = SpellChecker()
correction = spell_checker.correction("internet")
print(correction)

internet


In [71]:
from weaviate.classes.query import MetadataQuery



collection = client.collections.get('amenities_collection')
guest_note = 'I am looking for good wifi connection and coffee, with good view'

recommeded_amenities_bm25 = collection.query.bm25(guest_note.lower(), limit=10, query_properties=['nameAmenity'],return_metadata=MetadataQuery(distance=True))
recommeded_amenities_hybrid = collection.query.hybrid(guest_note.lower(), limit=5, query_properties=['nameAmenity'],return_metadata=MetadataQuery(distance=True))
recommeded_amenities_near_text = collection.query.near_text(guest_note.lower(), limit=5,return_metadata=MetadataQuery(distance=True))


recomendations_bm25 =[amenity.properties['nameAmenity'] for amenity in recommeded_amenities_bm25.objects]
recomendations_hybrid =[amenity.properties['nameAmenity'] for amenity in recommeded_amenities_hybrid.objects]
recomendations_near_text =[amenity.properties['nameAmenity'] for amenity in recommeded_amenities_near_text.objects]




print("--------------------------------------------------") 
for amenity in recommeded_amenities_bm25.objects:
    print("Name:", amenity.properties['nameAmenity'])
    print("Score:", amenity.metadata.distance)
print("--------------------------------------------------")
for amenity in recommeded_amenities_hybrid.objects:
    print("Name:", amenity.properties['nameAmenity'])
    print("Score:", amenity.metadata.distance)
print("--------------------------------------------------")
for amenity in recommeded_amenities_near_text.objects:
    print("Name:", amenity.properties['nameAmenity'])
    print("Score:", amenity.metadata.distance)


# for amenity in recomendations_bm25:
#     print("Name:", amenity)
# print("--------------------------------------------------")
# for amenity in recomendations_hybrid:
#     print("Name:", amenity)
# print("--------------------------------------------------") 
# for amenity in recomendations_near_text:
#     print("Name:", amenity)



 

--------------------------------------------------
Name: wifi
Score: None
Name: view
Score: None
Name: free coffee
Score: None
--------------------------------------------------
Name: wifi
Score: None
Name: scooter rental
Score: None
Name: private yacht tour
Score: None
Name: breakfast buffet
Score: None
Name: luxury car rental
Score: None
--------------------------------------------------
Name: scooter rental
Score: 0.4596822261810303
Name: private yacht tour
Score: 0.4600679874420166
Name: breakfast buffet
Score: 0.4620260000228882
Name: luxury car rental
Score: 0.4633164405822754
Name: in-room spa treatment
Score: 0.46758830547332764


In [None]:
collection = client.collections.get('amenities_collection')
guest_note = 'wifi'

recommendation = collection.generate.near_text(guest_note, limit=5,grouped_task='explain all details about {nameAmenity}')