# Import Custom Weaviate Client
```python
from src.weaviate.weaviate_client import WeaviateClient
weaviate_client = WeaviateClient()
```
The `WeaviateClient` class provides a convenient way to interact with Weaviate, including creating collections, adding objects, and querying data.

In [1]:
import os
import requests
from dotenv import load_dotenv
load_dotenv()
import sys
sys.path.append('../')
from src.weaviate.weaviate_client import WeaviateClient
from tqdm import tqdm
from weaviate.util import generate_uuid5



In [2]:
embed_url = os.getenv("EMBEDDING_URL")
rerank_url = os.getenv("RERANK_URL")
doc_url = './intent_RAG-update.txt'
collection_name = "intent_RAG_1"

# Clear and Create Collection

In [3]:
weaviate_client = WeaviateClient()
weaviate_client.delete_collection("Intent_01")
weaviate_client.create_collection("Intent_01")

2025-08-22 16:17:52,486 - src.weaviate.weaviate_client - INFO - ✅ Collection Intent_01 deleted
2025-08-22 16:17:52,657 - src.weaviate.weaviate_client - INFO - ✅ 

                    Collection Intent_01 created
 
                    Distance metric: cosine

                    Model: Qwen3-Embedding-0.6B

                    


True

In [4]:
# requests.post(f"{rerank_url}", json={
#     "query": query,
#     "documents":[o.properties.get("main_intent")+" "+o.properties.get("description_intent") for o in response.objects],
#     "top_n": 10,
#     "model": "Qwen/Qwen3-Embedding-0.6B",
#   }).json().get("results")

# Preprocessing Data

In [5]:
doc_url = './intent_RAG-update.txt'
with open(doc_url, 'r') as file:
    lines = [line.replace('\n#','#').replace(":","").split('\n') for line in file.read().split('\n\n')]
lines[0]

['# BOOKING SERVICE INTENT - Đặt lịch dọn nhà',
 'booking_service',
 '- đặt lịch dọn nhà',
 '- Đặt lịch dọn nhà cho tôi',
 '- lịch dọn nhà',
 '- muốn đặt lịch dọn nhà',
 '- cần đặt lịch dọn nhà',
 '- tôi muốn đặt lịch dọn nhà',
 '- anh muốn đặt lịch dọn nhà',
 '- em muốn đặt lịch dọn nhà',
 '- đặt lịch dọn nhà giúp tôi',
 '- đặt lịch dọn nhà hộ tôi',
 '- book lịch dọn nhà',
 '- đặt lịch dọn',
 '- lịch dọn']

In [6]:
intent_collection = weaviate_client.client.collections.get("Intent_01")

In [7]:
from weaviate.classes.data import DataObject

# Tạo list DataObject
data_objects = []
for intent in lines:
    for i in range(len(intent)):
        main_intent = intent[1]
        description_intent = intent[i].replace("-","")
        uuid = generate_uuid5(main_intent + description_intent)
        
        data_objects.append(DataObject(
            properties={
                "title": main_intent,
                "body": description_intent
            },
            uuid=uuid,
            vector={
                "title_vector": weaviate_client._custom_vectorizer(str(main_intent)),
                "body_vector": weaviate_client._custom_vectorizer(str(description_intent))
            }
        ))

# Insert many objects
intent_collection.data.insert_many(data_objects)
print("Done")

2025-08-22 16:17:52,698 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings
2025-08-22 16:17:52,759 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings
2025-08-22 16:17:52,812 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings
2025-08-22 16:17:52,850 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings
2025-08-22 16:17:52,901 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings
2025-08-22 16:17:52,952 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings
2025-08-22 16:17:53,003 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings
2025-08-22 16:17:53,055 - src.weaviate.weaviate_client - INFO - ✅ Using defa

Done


In [8]:
weaviate_client.client.collections.list_all()['Intent_01'].properties

[_Property(name='title', description='The title of document', data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.LOWERCASE: 'lowercase'>, vectorizer_config=None, vectorizer=None, vectorizer_configs={}),
 _Property(name='body', description='The content of document', data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer=None, vectorizer_configs={})]

In [9]:
from weaviate.classes.query import MetadataQuery
query = "anh muốn đặt lịch dọn nhà"
response_1 = weaviate_client.client.collections.get("Intent_01").query.hybrid(
    query=query,
    alpha=0.25,
    vector=weaviate_client._custom_vectorizer(query),
    target_vector="body_vector",
    limit=10,
    include_vector=False,
    return_metadata=MetadataQuery(
        score=True, 
        explain_score=True,
        distance=True,
        ),
)

2025-08-22 16:18:11,744 - src.weaviate.weaviate_client - INFO - ✅ Using default embed_url: http://192.168.88.165:3390/v1/embeddings


In [10]:
for o in response_1.objects:
    print(o.properties)
    print(o.metadata.score)

{'body': ' anh muốn đặt lịch dọn nhà', 'title': 'booking_service'}
0.75
{'body': ' Anh muốn biết giá dọn dẹp nhà', 'title': 'other_intent'}
0.4981544315814972
{'body': ' muốn đặt lịch dọn nhà', 'title': 'booking_service'}
0.46052855253219604
{'body': ' em muốn đặt lịch dọn nhà', 'title': 'booking_service'}
0.40769487619400024
{'body': ' tôi muốn đặt lịch dọn nhà', 'title': 'booking_service'}
0.40769487619400024
{'body': ' chị muốn đặt lịch dọn nhà', 'title': 'booking_service'}
0.40769487619400024
{'body': ' muốn đặt lịch dọn dẹp', 'title': 'booking_service'}
0.3691539168357849
{'body': ' Tôi muốn đặt lịch dọn dẹp nhà', 'title': 'booking_service'}
0.364230751991272
{'body': ' đặt lịch dọn nhà', 'title': 'booking_service'}
0.3497169017791748
{'body': ' muốn đặt lịch cleaning', 'title': 'booking_service'}
0.33647620677948


In [11]:
response_1.objects

[Object(uuid=_WeaviateUUIDInt('90330774-47b6-53c6-8dfb-c0f5f2e5af0d'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=0.75, explain_score='\nHybrid (Result Set keyword,bm25) Document 90330774-47b6-53c6-8dfb-c0f5f2e5af0d: original score 3.285489, normalized score: 0.75', is_consistent=None, rerank_score=None), properties={'body': ' anh muốn đặt lịch dọn nhà', 'title': 'booking_service'}, references=None, vector={}, collection='Intent_01'),
 Object(uuid=_WeaviateUUIDInt('b576c529-ca35-5e08-9597-712137cad37b'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=0.4981544315814972, explain_score='\nHybrid (Result Set keyword,bm25) Document b576c529-ca35-5e08-9597-712137cad37b: original score 2.2915533, normalized score: 0.49815443', is_consistent=None, rerank_score=None), properties={'body': ' Anh muốn biết giá dọn dẹp nhà', 'title': 'other_intent'}, references=None, vector={}, col