<a href="https://colab.research.google.com/github/Gireesheruvaram/LLM-Beginer/blob/vector-database-kata2-branch/Kata2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install chromadb
!pip install chromadb

# Install openai
!pip install openai


Collecting chromadb
  Downloading chromadb-0.6.3-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.20.0-py2.py3-none-any.whl.metadata (2.9 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.21.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.31.0-py

In [6]:
import chromadb
import openai
import os
from google.colab import userdata

# Set up OpenAI API Key
os.environ['OPENAI_API_KEY'] = userdata.get('LLma')
openai.api_key = os.environ['OPENAI_API_KEY']


In [7]:
# Initialize ChromaDB client
client = chromadb.Client()

# Create a collection
collection = client.get_or_create_collection(
    name="hybrid_search",
    metadata={"description": "Hybrid search with metadata filtering"}
)

def create_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding

In [8]:

# Expanded list of documents with metadata
documents = [
    {
        "id": "doc1",
        "text": "This is a document about machine learning.",
        "metadata": {"author": "Alice", "date": "2023-01-01", "title": "Machine Learning Basics"}
    },
    {
        "id": "doc2",
        "text": "An in-depth guide to neural networks.",
        "metadata": {"author": "Bob", "date": "2023-02-15", "title": "Neural Networks"}
    },
    {
        "id": "doc3",
        "text": "Understanding deep learning and its applications.",
        "metadata": {"author": "Charlie", "date": "2023-03-10", "title": "Deep Learning Overview"}
    },
    {
        "id": "doc4",
        "text": "A beginner’s introduction to AI and machine learning.",
        "metadata": {"author": "Alice", "date": "2023-04-05", "title": "AI for Beginners"}
    },
    {
        "id": "doc5",
        "text": "The evolution of artificial intelligence and future trends.",
        "metadata": {"author": "Bob", "date": "2023-05-20", "title": "AI Evolution"}
    }
]


In [9]:

# Add documents to the collection
for doc in documents:
    embedding = create_embedding(doc["text"])
    collection.add(
        ids=[doc["id"]],
        documents=[doc["text"]],
        metadatas=[doc["metadata"]],
        embeddings=[embedding]
    )

# Function to perform hybrid search with metadata filtering
def hybrid_search(query, metadata_filter=None, top_k=5):
    query_embedding = create_embedding(query)
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k,
        where=metadata_filter  # Metadata filter
    )
    return results




In [10]:





# Example 1: Search by text query only
query_text = "What is machine learning?"
results_text_search = hybrid_search(query_text)

# Example 2: Search by author
metadata_filter_author = {"author": "Alice"}
results_author_search = hybrid_search(query_text, metadata_filter_author)

# Example 3: Search by date
metadata_filter_date = {"date": "2023-04-05"}
results_date_search = hybrid_search(query_text, metadata_filter_date)

# Display function for results
def display_results(results, search_type):
    print(f"\nResults for {search_type}:")
    for i, (doc, metadata) in enumerate(zip(results['documents'], results['metadatas'])):
        print(f"Result {i + 1}:")
        print(f"Document: {doc}")
        print(f"Metadata: {metadata}")
        print("-" * 50)

# Display results
display_results(results_text_search, "Text Query")
display_results(results_author_search, "Author Filter")
display_results(results_date_search, "Date Filter")



Results for Text Query:
Result 1:
Document: ['This is a document about machine learning.', 'A beginner’s introduction to AI and machine learning.', 'Understanding deep learning and its applications.', 'The evolution of artificial intelligence and future trends.', 'An in-depth guide to neural networks.']
Metadata: [{'author': 'Alice', 'date': '2023-01-01', 'title': 'Machine Learning Basics'}, {'author': 'Alice', 'date': '2023-04-05', 'title': 'AI for Beginners'}, {'author': 'Charlie', 'date': '2023-03-10', 'title': 'Deep Learning Overview'}, {'author': 'Bob', 'date': '2023-05-20', 'title': 'AI Evolution'}, {'author': 'Bob', 'date': '2023-02-15', 'title': 'Neural Networks'}]
--------------------------------------------------

Results for Author Filter:
Result 1:
Document: ['This is a document about machine learning.', 'A beginner’s introduction to AI and machine learning.']
Metadata: [{'author': 'Alice', 'date': '2023-01-01', 'title': 'Machine Learning Basics'}, {'author': 'Alice', 'dat

In [4]:
!pip show openai


Name: openai
Version: 1.61.1
Summary: The official Python library for the openai API
Home-page: https://github.com/openai/openai-python
Author: 
Author-email: OpenAI <support@openai.com>
License: 
Location: /usr/local/lib/python3.11/dist-packages
Requires: anyio, distro, httpx, jiter, pydantic, sniffio, tqdm, typing-extensions
Required-by: 
