In [1]:
# !pip  install -U docarray
# !pip  install pydantic
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from docarray import BaseDoc, DocList
from docarray.typing import NdArray
from vectordb import HNSWVectorDB
services = [
    ("Web Hosting", "Reliable and secure web hosting services with 99.9% uptime guarantee and 24/7 support."),
    ("Cloud Storage", "Scalable cloud storage solutions with advanced security features and easy access from anywhere."),
    ("SEO Optimization", "Comprehensive SEO services to boost your website's search engine ranking and drive more traffic."),
    ("Digital Marketing", "Targeted digital marketing campaigns to increase your brand visibility and reach more customers."),
    ("E-commerce Platform", "Robust e-commerce platform with integrated payment gateways, inventory management, and analytics."),
    ("Data Analytics", "Advanced data analytics services to help you make data-driven decisions and improve business outcomes."),
    ("Cybersecurity", "Comprehensive cybersecurity solutions to protect your business from threats and ensure data integrity."),
    ("Mobile App Development", "Custom mobile app development services for iOS and Android platforms with user-friendly interfaces."),
    ("Customer Support", "24/7 customer support services to help your customers with any queries or issues they may have."),
    ("Blockchain Solutions", "Innovative blockchain solutions to streamline your business processes and enhance security."),
]

queries = [
    "I need a reliable service to store my files and access them from anywhere.",
    "Looking for services to improve my website's search engine ranking.",
    "How can I protect my business from cyber threats?",
    "I'm interested in developing a mobile app for my business.",
    "I need help with my online marketing strategy to reach more customers.",
    "What are some solutions for hosting my website with high uptime?",
    "Can I get a service to analyze my business data for better decisions?",
    "Looking for an online platform to sell my products.",
    "How can I implement blockchain technology in my business?",
    "Need customer support solutions for my e-commerce website."
]


# Define constants
vector_dimension = 768

# Define the ServiceDoc class
class ServiceDoc(BaseDoc):
    text: str = ''
    embedding: NdArray[vector_dimension]

# Mean Pooling - Takes attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

# Embedding Model
def embedding_model(doc: str):
    tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2')
    model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2')

    # Tokenize the document
    encoded_input = tokenizer(doc, padding=True, truncation=True, return_tensors='pt')

    # Compute token embeddings
    with torch.no_grad():
        model_output = model(**encoded_input)

    # Perform pooling
    doc_embedding = mean_pooling(model_output, encoded_input['attention_mask'])

    # Normalize embeddings
    doc_embedding = F.normalize(doc_embedding, p=2, dim=1)

    return doc_embedding.numpy()[0]

# Push Service Descriptions to the Vector Database
def vector_db_push(service_name:str,service_description:str, workspace: str):
#     doc_embedding = embedding_model(service_description)

    db = HNSWVectorDB[ServiceDoc](workspace=workspace)
    # Index a list of documents with random embeddings
    doc_list = [ServiceDoc(text=service_name, embedding=embedding_model(service_description))]
    db.index(inputs=DocList[ServiceDoc](doc_list))
#     db.index(inputs=DocList[ServiceDoc](record))


# Perform a Similarity Search Query
def vector_search(user_query: str, limit: int, workspace: str):
    db = HNSWVectorDB[ServiceDoc](workspace=workspace)
    
    # Generate embedding for the query
    query_embedding = embedding_model(user_query)
    len(query_embedding)
    query_doc = ServiceDoc(text=user_query, embedding=query_embedding)
    
    # Perform a search query
    results = db.search(inputs=DocList[ServiceDoc]([query_doc]), limit=limit)
    
    # Print out the matches
    print(f"Search results for query: '{user_query}'")
    for match in results[0].matches:
        print(match.text)


workspace_path = "./vector_db_files"

# vector_db_push(services=services, workspace=workspace_path)
from tqdm import tqdm
# [ vector_db_push(service_name=service_name, service_description=service_description,workspace=workspace_path) for service_name,service_description in services]
# Assuming 'services' is a list of tuples containing service_name and service_description
# For example: services = [('service1', 'description1'), ('service2', 'description2'), ...]

# Adding a progress bar to the loop
[vector_db_push(service_name=service_name, service_description=service_description, workspace=workspace_path) 
 for service_name, service_description in tqdm(services, desc="Pushing to vector DB")]


print("Service descriptions have been pushed to the vector database.")

for query in queries:
    vector_search(query, limit=3, workspace=workspace_path)
    print("\n" + "-"*50 + "\n")



  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
Pushing to vector DB: 100%|██████████| 10/10 [00:17<00:00,  1.77s/it]


Service descriptions have been pushed to the vector database.
Search results for query: 'I need a reliable service to store my files and access them from anywhere.'
Cloud Storage
Web Hosting
Mobile App Development

--------------------------------------------------

Search results for query: 'Looking for services to improve my website's search engine ranking.'
SEO Optimization
Digital Marketing
Web Hosting

--------------------------------------------------

Search results for query: 'How can I protect my business from cyber threats?'
Cybersecurity
Digital Marketing
Blockchain Solutions

--------------------------------------------------

Search results for query: 'I'm interested in developing a mobile app for my business.'
Mobile App Development
Web Hosting
Data Analytics

--------------------------------------------------

Search results for query: 'I need help with my online marketing strategy to reach more customers.'
Digital Marketing
SEO Optimization
E-commerce Platform

--------

In [8]:
vector_search("'Looking for having a hot drink.", limit=3, workspace=workspace_path)

Search results for query: ''Looking for having a hot drink.'
Coffee shop
Customer Support
Web Hosting


In [7]:
vector_db_push(service_name="Coffee shop", service_description="it's a coffe shop where it serve hot coffee and other drinks", workspace=workspace_path)