In [1]:
#%pip install -r requirements.txt

In [2]:
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import chromadb
from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings
from langchain_chroma import Chroma
import os
import pandas as pd
import random
from dotenv import load_dotenv

# Load configuration settings from the .env file
load_dotenv()

CHROMA_DB_PATH = os.getenv('CHROMA_DB_PATH')

# Set up chromadb client
client = chromadb.PersistentClient(
    path=CHROMA_DB_PATH,
    settings=Settings(),
    tenant=DEFAULT_TENANT,
    database=DEFAULT_DATABASE,
)

# Function to list collections
def list_collections():
    collections = client.list_collections()
    return [collection.name for collection in collections]

# Function to display collection details
def show_details(collection_name):
    try:
        collection = client.get_collection(collection_name)
        details_output.clear_output()
        with details_output:
            print(f"Collection Name: {collection_name}")
            print(f"Document Count: {collection.count()}")
            if collection.metadata:
                for key, value in collection.metadata.items():
                    print(f"{key}: {value}")
            else:
                print("No metadata available.")
            
            # Display details of the documents in the collection
            raw_data = collection.get(limit=5)
            print("Raw Document Data:", raw_data)  # Print out raw data for inspection
            
            # Extract and display relevant data
            ids = raw_data.get('ids', [])
            metadatas = raw_data.get('metadatas', [])

            if not ids or not metadatas:
                print("No documents available or missing data.")
                return

            # Create table with clickable IDs
            table_html = "<table><tr><th>ID</th><th>Metadata</th></tr>"
            for doc_id, metadata in zip(ids, metadatas):
                table_html += f"<tr><td><a href='#' onclick='IPython.notebook.kernel.execute(\"show_document_details('{collection_name}', '{doc_id}')\")'>{doc_id}</a></td>"
                table_html += f"<td>{metadata}</td></tr>"
            table_html += "</table>"
            
            display(HTML(table_html))
    except Exception as e:
        details_output.clear_output()
        with details_output:
            print(f"Error while fetching details for collection '{collection_name}': {str(e)}")

# Function to display individual document details
def show_document_details(collection_name, doc_id):
    try:
        collection = client.get_collection(collection_name)
        document_details_output.clear_output()
        with document_details_output:
            print(f"Collection: {collection_name}, Document ID: {doc_id}")
            
            # Retrieve document details
            raw_data = collection.get(ids=[doc_id])
            if not raw_data or not raw_data.get('metadatas'):
                print(f"No details available for document ID: {doc_id}")
                return

            # Assuming getting first (and only one) document metadata and embeddings
            metadata = raw_data['metadatas'][0]
            print("Metadata:", metadata)
    except Exception as e:
        document_details_output.clear_output()
        with document_details_output:
            print(f"Error while fetching details for document ID '{doc_id}': {str(e)}")


# Function to delete collection
def delete_collection(collection_name):
    try:
        client.delete_collection(collection_name)
        refresh_collections()
    except Exception as e:
        error_output.clear_output()
        with error_output:
            print(f"Error while deleting collection '{collection_name}': {str(e)}")

# Function to refresh the list of collections
def refresh_collections():
    collection_names = list_collections()
    collection_buttons.children = [
        widgets.HBox([
            widgets.Label(value=collection_name, layout=widgets.Layout(width='200px')),
            widgets.Button(description="Delete", button_style='danger', layout=widgets.Layout(width='100px')),
            widgets.Button(description="Details", button_style='info', layout=widgets.Layout(width='100px'))
        ])
        for collection_name in collection_names
    ]
    for box in collection_buttons.children:
        name_label, delete_button, details_button = box.children
        
        # Use default arguments in lambda to capture the current collection name
        delete_button.on_click(lambda b, name=name_label.value: delete_collection(name))
        details_button.on_click(lambda b, name=name_label.value: show_details(name))

# Set up widgets
details_output = widgets.Output()
document_details_output = widgets.Output()
error_output = widgets.Output()
collection_buttons = widgets.VBox()
refresh_button = widgets.Button(description="Refresh Collections", button_style='primary')
refresh_button.on_click(lambda b: refresh_collections())

# Display widgets in the Jupyter notebook
display(refresh_button, collection_buttons, details_output, document_details_output, error_output)

# Initialize the list of collections
refresh_collections()

Button(button_style='primary', description='Refresh Collections', style=ButtonStyle())

VBox()

Output()

Output()

Output()