# ABC

In [1]:
from abc import ABC, abstractmethod
from typing import Any, List, Dict

class VectorStore(ABC):
    """
    Abstract base class for vector store integrations.
    """

    def __init__(self, host: str = None, port: int = None, db_name: str = None):
        self.host = host
        self.port = port
        self.db_name = db_name

    @abstractmethod
    def connect(self) -> None:
        """Connect to the vector database."""
        pass

    @abstractmethod
    def create_collection(self, name: str, **kwargs) -> None:
        """Create a collection or index."""
        pass

    @abstractmethod
    def drop_collection(self, name: str) -> None:
        """Drop a collection or index."""
        pass

    @abstractmethod
    def insert_embeddings(self, embeddings: List[List[float]], metadata: List[Dict[str, Any]]) -> None:
        """Insert embeddings and metadata into the collection."""
        pass

    @abstractmethod
    def delete_embeddings(self, name: str) -> None:
        """Delete embeddings associated with the collection."""
        pass

    @abstractmethod
    def query(self, vector: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
        """Query the store to find similar vectors."""
        pass

    @staticmethod
    @abstractmethod
    def embed_data(data: Any) -> List[float]:
        """Generate embeddings for the given data."""
        pass


# EXP

In [21]:
docs = [
    "Table: customers — Column: customer_id (INTEGER) — Unique ID for each customer.",
    "Table: customers — Column: name (VARCHAR) — Full name of the customer.",
    "Table: orders — Column: order_id (INTEGER) — Unique ID for each order.",
    "Table: orders — Column: customer_id (INTEGER) — References customers.customer_id.",
    "Table: orders — Column: order_date (DATE) — The date the order was placed.",
    "Table: payments — Column: payment_id (INTEGER) — Unique ID for each payment.",
    "Table: payments — Column: order_id (INTEGER) — References orders.order_id.",
    "Table: payments — Column: amount (FLOAT) — Payment amount in USD."
]


In [22]:
metadatas = [
    {"table_name": "customers", "column_name": "customer_id", "type": "INTEGER", "description": "Unique ID for each customer"},
    {"table_name": "customers", "column_name": "name", "type": "VARCHAR", "description": "Full name of the customer"},
    {"table_name": "orders", "column_name": "order_id", "type": "INTEGER", "description": "Unique ID for each order"},
    {"table_name": "orders", "column_name": "customer_id", "type": "INTEGER", "description": "References customers.customer_id"},
    {"table_name": "orders", "column_name": "order_date", "type": "DATE", "description": "The date the order was placed"},
    {"table_name": "payments", "column_name": "payment_id", "type": "INTEGER", "description": "Unique ID for each payment"},
    {"table_name": "payments", "column_name": "order_id", "type": "INTEGER", "description": "References orders.order_id"},
    {"table_name": "payments", "column_name": "amount", "type": "FLOAT", "description": "Payment amount in USD"}
]


In [8]:
import os

ELASTIC_CONFIG: dict = {
    "hosts": os.environ.get('ELASTIC_HOSTS', 'https://127.0.0.1:9200'),
    "api_key": os.environ.get('ELASTIC_API_KEY', None),
    "index_name": os.environ.get('ELASTIC_INDEX', 'sql_index'),
    "user_name": os.environ.get('ELASTIC_USERNAME', 'elastic'),
    "user_password": os.environ.get('ELASTIC_PASSWORD', 'SMPLoWTp'),
    "properties":{
            "text":{"type":"text"},
            "question":{"type":"text"},
            "metadata":{"type":"object"},
            "embedding":{"type":"dense_vector",
                         "dims":384,
                         "index":True,
                         "similarity":"cosine"}
        },
    "settings":{
        "number_of_shards":2,
        "number_of_replicas":0
    }
}

In [6]:
from langchain_ollama import OllamaEmbeddings
model = "all-minilm:l6-v2"
base_url = 'http://localhost:11434'
embeddings = OllamaEmbeddings(model=model, base_url=base_url)

In [9]:
from langchain_elasticsearch import ElasticsearchStore, DenseVectorStrategy
config = ELASTIC_CONFIG
# es_client = _get_db_client(db_type="elastic")

elastic_client =  ElasticsearchStore(
    es_url=config["hosts"],
    index_name=config["index_name"],
    es_user=config["user_name"],
    es_password=config["user_password"],
    embedding=embeddings,
    strategy=DenseVectorStrategy(hybrid=True, rrf=True)
)

TlsError: TLS error caused by: TlsError(TLS error caused by: SSLError([SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1010)))