In [4]:

import os
import requests
import numpy as np
# Cấu hình Qdrant
QDRANT_API_URL = os.getenv(
    "QDRANT_API_URL",
    "https://fcbf96b5-0f95-47b1-b088-dd1eba2a2758.us-east4-0.gcp.cloud.qdrant.io:6333",
)
QDRANT_API_KEY = os.getenv(
    "QDRANT_API_KEY", "WbQ_8KeZKchBfQ-atnt5zfbkIShw6slMNvF0PK8qIOEIgaYqTyZLmw"
)
QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION", "cmc_final_db")


def test_qdrant_search():
    """
    Gửi một truy vấn đơn giản tới Qdrant để kiểm tra kết nối và response.
    """
    url = f"{QDRANT_API_URL}/collections/{QDRANT_COLLECTION}/points/search"
    headers = {
        "Authorization": f"Bearer {QDRANT_API_KEY}",
        "Content-Type": "application/json",
    }

    # Tạo một vector ngẫu nhiên kích thước 768
    query_vector = list(np.random.rand(768))  # Hoặc vector thực tế từ pipeline của bạn
    payload = {
        "vector": query_vector,
        "limit": 5,  # Số lượng kết quả muốn lấy
    }

    print("Testing Qdrant search...")
    try:
        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()
        print(f"Response Status: {response.status_code}")
        print("Response Data:", response.json())
    except requests.exceptions.HTTPError as e:
        print("HTTP Error:", e.response.status_code, e.response.text)
    except requests.exceptions.RequestException as e:
        print("Error occurred:", str(e))


if __name__ == "__main__":
    test_qdrant_search()


Testing Qdrant search...
Response Status: 200
Response Data: {'result': [{'id': '7c9b8dbf-e631-429d-985d-b443b04a9dad', 'version': 0, 'score': 0.056992024}, {'id': 'a3fc7706-2812-4495-838e-18cbcd5bba89', 'version': 0, 'score': 0.04741749}, {'id': '6a20096d-8342-4b6f-8860-6ac2bc0ad306', 'version': 1, 'score': 0.04589819}, {'id': 'e28045df-f779-462f-a4e7-2d5a7ca66166', 'version': 1, 'score': 0.045791358}, {'id': 'a5693358-ca3e-4d1d-9d4c-b383348a989d', 'version': 2, 'score': 0.04545533}], 'status': 'ok', 'time': 0.001576648}


## Add thêm Embedding Query

In [3]:
import os
import requests
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Qdrant configuration
QDRANT_API_URL = os.getenv("QDRANT_API_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")  # Thay bằng API key của bạn
QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION")

# OpenAI embedding configuration (nếu dùng OpenAI)
OPENAI_API_URL = os.getenv("OPENAI_API_URL")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  # Thay bằng API key OpenAI của bạn
MODEL_NAME = os.getenv("MODEL_NAME")  # Model embedding từ OpenAI


# Function to generate embedding using OpenAI
def generate_embedding(query_text):
    headers = {
        "Authorization": f"Bearer {OPENAI_API_KEY}",
        "Content-Type": "application/json",
    }
    payload = {
        "input": query_text,
        "model": "text-embedding-ada-002",
    }

    print("Generating embedding for query...")
    response = requests.post(OPENAI_API_URL, json=payload, headers=headers)
    response.raise_for_status()
    data = response.json()
    return data["data"][0]["embedding"]

# Function to search in Qdrant
def search_qdrant(embedding_vector):
    url = f"{QDRANT_API_URL}/collections/{QDRANT_COLLECTION}/points/search"
    headers = {
        "Authorization": f"Bearer {QDRANT_API_KEY}",
        "Content-Type": "application/json",
    }
    payload = {
        "vector": embedding_vector,
        "limit": 5,  # Số lượng kết quả muốn lấy
    }

    print("Testing Qdrant search...")
    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()
    return response.json()

# Main flow
def main():
    # Query text
    query_text = "Find information about deep learning and AI models."

    try:
        # Step 1: Generate embedding
        embedding_vector = generate_embedding(query_text)
        print(f"Generated embedding: {embedding_vector[:5]}...")  # In 5 giá trị đầu

        # Step 2: Search in Qdrant
        search_results = search_qdrant(embedding_vector)
        print("Search Results:", search_results)

    except requests.exceptions.HTTPError as e:
        print("HTTP Error:", e.response.status_code, e.response.text)
    except requests.exceptions.RequestException as e:
        print("Error occurred:", str(e))

# Run the main function
if __name__ == "__main__":
    main()


Generating embedding for query...
Generated embedding: [-0.0054861484, -0.006567113, 0.010118369, -0.023354253, -0.010823199]...
Testing Qdrant search...
HTTP Error: 400 {"status":{"error":"Wrong input: Vector dimension error: expected dim: 768, got 1536"},"time":0.000635589}


```
Generating embedding for query...
Generated embedding: [-0.0054861484, -0.006567113, 0.010118369, -0.023354253, -0.010823199]...
Testing Qdrant search...
HTTP Error: 400 {"status":{"error":"Wrong input: Vector dimension error: expected dim: 768, got 1536"},"time":0.000876167}
```

Đảm bảo rằng MODEL_NAME trong .env là mô hình phù hợp. Ví dụ:
text-embedding-ada-002 (kích thước 1536).
text-similarity-babbage-001 (kích thước 768).

- Nếu muốn dùng OpenAI text-embedding-ada-002, thì chỉnh Qdrant vector size thành 1536.
- Nếu muốn giữ Qdrant vector size là 768, thì đổi sang model OpenAI tương ứng (như text-similarity-babbage-001).


#### **Khi nào chọn 768 hoặc 1536?**
| **Tiêu chí**          | **Vector 768**                                    | **Vector 1536**                                   |
|-----------------------|--------------------------------------------------|--------------------------------------------------|
| **Ứng dụng**          | Tìm kiếm cơ bản, dữ liệu nhỏ, bài toán đơn giản   | Tìm kiếm phức tạp, dữ liệu lớn, bài toán yêu cầu cao |
| **Tài nguyên hệ thống** | Hạn chế về bộ nhớ hoặc tài nguyên tính toán       | Tài nguyên đủ mạnh để xử lý kích thước lớn       |
| **Độ chính xác**       | Không yêu cầu cực cao, chấp nhận sai số nhỏ       | Yêu cầu chính xác cao và biểu diễn ngữ nghĩa sâu |
| **Chi phí**           | Tiết kiệm chi phí lưu trữ và xử lý               | Sẵn sàng chi phí cao hơn để đạt hiệu quả tốt hơn |

---

### **Tóm lại**:
- **768**: Lựa chọn tốt nếu bạn ưu tiên tốc độ, tài nguyên hạn chế, và bài toán không quá phức tạp.
- **1536**: Phù hợp khi bạn muốn tối ưu độ chính xác và có đủ tài nguyên để xử lý bài toán phức tạp.

## Sử dụng model BERT 768 (thay vì openAI 1536)

In [1]:
import os
import requests
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings

# Load environment variables from .env file
load_dotenv()

# Hugging Face configuration
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
EMBEDDINGS_MODEL_NAME = os.getenv("EMBEDDINGS_MODEL_NAME")

# Qdrant configuration
QDRANT_API_URL = os.getenv("QDRANT_API_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION")

# Initialize Hugging Face embeddings
embeddings = HuggingFaceInferenceAPIEmbeddings(
    model_name=EMBEDDINGS_MODEL_NAME,
    api_key=HUGGINGFACE_API_KEY,
    model_kwargs={'device': 'auto'}
)

# Function to generate embedding using Hugging Face
def generate_embedding(query_text):
    print("Generating embedding for query...")
    embedding_vector = embeddings.embed_query(query_text)
    return embedding_vector

# Function to search in Qdrant
def search_qdrant(embedding_vector):
    url = f"{QDRANT_API_URL}/collections/{QDRANT_COLLECTION}/points/search"
    headers = {
        "Authorization": f"Bearer {QDRANT_API_KEY}",
        "Content-Type": "application/json",
    }
    payload = {
        "vector": embedding_vector,
        "limit": 5,  # Number of results to retrieve
    }

    print("Testing Qdrant search...")
    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()
    return response.json()

# Main flow
def main():
    # Query text
    query_text = "Find information about deep learning and AI models."

    try:
        # Step 1: Generate embedding
        embedding_vector = generate_embedding(query_text)
        print(f"Generated embedding: {embedding_vector[:5]}...")  # Print first 5 values

        # Step 2: Search in Qdrant
        search_results = search_qdrant(embedding_vector)
        print("Search Results:", search_results)

    except requests.exceptions.HTTPError as e:
        print("HTTP Error:", e.response.status_code, e.response.text)
    except requests.exceptions.RequestException as e:
        print("Error occurred:", str(e))

# Run the main function
if __name__ == "__main__":
    main()


Generating embedding for query...
Generated embedding: [-0.14006862044334412, 0.12466755509376526, 0.002916180994361639, 0.06585323065519333, 0.056434180587530136]...
Testing Qdrant search...
Search Results: {'result': [{'id': '6e28e1cf-83d2-4c59-9824-d3d21a83bfeb', 'version': 0, 'score': 0.15543127}, {'id': 'b01a0f6c-4f08-495f-9986-00d89df298cb', 'version': 1, 'score': 0.14606589}, {'id': 'ada60796-74d4-4e16-b081-5cf8ba70a787', 'version': 1, 'score': 0.13553998}, {'id': 'c28a734b-1530-47a5-81a3-2c969404106d', 'version': 0, 'score': 0.13342223}, {'id': 'bef4d151-b14e-43ae-826a-80aefc9c412f', 'version': 2, 'score': 0.13268083}], 'status': 'ok', 'time': 0.000981311}


# ver 1 pipeline: 

```python 

"""
title: Qdrant Cloud Search Pipeline
author: YourName
date: 2025-01-10
version: 1.0
license: MIT
description: A pipeline to interact with Qdrant Cloud for vector search.
requirements: requests
"""

import os
import requests
from typing import List, Union, Generator, Iterator
from pydantic import BaseModel


class Pipeline:
    class Valves(BaseModel):
        QDRANT_API_URL: str
        QDRANT_API_KEY: str
        QDRANT_COLLECTION: str

    def __init__(self):
        self.valves = self.Valves(
            **{
                "QDRANT_API_URL": os.getenv("QDRANT_API_URL", "https://fcbf96b5-0f95-47b1-b088-dd1eba2a2758.us-east4-0.gcp.cloud.qdrant.io:6333"),
                "QDRANT_API_KEY": os.getenv("QDRANT_API_KEY", "WbQ_8KeZKchBfQ-atnt5zfbkIShw6slMNvF0PK8qIOEIgaYqTyZLmw"),
                "QDRANT_COLLECTION": os.getenv("QDRANT_COLLECTION", "cmc_final_db"),
            }
        )

    async def on_startup(self):
        print("Qdrant Cloud Pipeline started.")

    async def on_shutdown(self):
        print("Qdrant Cloud Pipeline stopped.")

    def search_vectors(self, query_vector: List[float], top_k: int = 5) -> dict:
        """
        Search Qdrant collection for nearest neighbors to the query vector.
        """
        url = f"{self.valves.QDRANT_API_URL}/collections/{self.valves.QDRANT_COLLECTION}/points/search"
        headers = {
            "Authorization": f"Bearer {self.valves.QDRANT_API_KEY}",
            "Content-Type": "application/json",
        }
        payload = {
            "vector": query_vector,
            "limit": top_k,
        }

        try:
            response = requests.post(url, json=payload, headers=headers)
            response.raise_for_status()
            return response.json()  # Assuming API returns JSON
        except requests.exceptions.RequestException as e:
            print(f"Error querying Qdrant: {e}")
            return {"error": "Unable to query Qdrant Cloud"}

    def pipe(
        self, user_message: str, model_id: str, messages: List[dict], body: dict
    ) -> Union[str, Generator, Iterator]:
        """
        Process user message and query Qdrant for vector search.
        """
        print(f"User message: {user_message}")

        # Convert the user message to a query vector (dummy example, replace with real embedding logic)
        query_vector = [0.1, 0.2, 0.3, 0.4, 0.5]  # Replace with embedding generation logic

        # Search in Qdrant
        qdrant_response = self.search_vectors(query_vector)

        # Process response
        if "error" in qdrant_response:
            return qdrant_response["error"]

        # Format the results
        results = qdrant_response.get("result", [])
        if not results:
            return "No relevant data found in Qdrant Cloud."

        formatted_results = "\n".join([f"- ID: {item['id']}, Score: {item['score']}" for item in results])
        return f"Here are the top results from Qdrant Cloud:\n\n{formatted_results}"

```