In [1]:
import google.generativeai as genai
import os

genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import chromadb
import numpy as np
from scipy.spatial.distance import cosine

# Initialize Chroma client
client = chromadb.PersistentClient(path="chroma_db")
collection = client.get_or_create_collection(name="paper_summaries")

def get_closest_document(query):
    # Step 1: Embed the query
    result = genai.embed_content(
        model="models/text-embedding-004",
        content=query,
    )
    
    query_embedding = result['embedding']
    
    results = collection.get(include=["embeddings", "documents", "metadatas"])
    embeddings = results["embeddings"]
    documents = results["documents"]
    metadatas = results["metadatas"]
    
    if embeddings is None or len(embeddings) == 0:
        return {"error": "No documents found in the database."}
    
    similarities = []
    for embedding in embeddings:
        embedding = np.array(embedding) if isinstance(embedding, list) else embedding
        similarity = 1 - cosine(query_embedding, embedding) # Compute cosine similarity (1 - cosine distance)
        similarities.append(similarity)
    
    closest_index = np.argmax(similarities)  # Index of the highest similarity score
    closest_similarity = similarities[closest_index]
    
    # Step 5: Extract the closest document's details
    closest_document = {
        "title": metadatas[closest_index].get("title", "No title available"),
        "content": documents[closest_index],
        "similarity_score": closest_similarity
    }
    
    return closest_document

In [3]:
def format_and_display_result(closest_doc):
    if "error" in closest_doc:
        print(closest_doc["error"])
        return
    
    title = closest_doc.get("title", "No title available")
    content = closest_doc.get("content", "No content available")
    similarity_score = closest_doc.get("similarity_score", 0.0)
    
    # Print formatted output
    print("\nClosest Document:")
    print("=" * 50)
    print(f"Title: {title}")
    print("-" * 50)
    print(f"Similarity Score: {similarity_score:.4f}")
    print("-" * 50)
    print("Content:")
    print(content)
    print("=" * 50)

In [8]:
query = "I want to learn about attention transformer in deep learning"
result = get_closest_document(query)

format_and_display_result(result)


Closest Document:
Title: Attention Is All You Need
--------------------------------------------------
Similarity Score: 0.6909
--------------------------------------------------
Content:
## Summary of "Attention Is All You Need"

This research paper introduces the Transformer, a novel neural network architecture designed for sequence transduction tasks, such as machine translation. Unlike previous dominant models that rely on complex recurrent or convolutional neural networks, the Transformer is based solely on attention mechanisms, eliminating the need for recurrence and convolutions. The authors demonstrate that the Transformer achieves superior performance in machine translation tasks while offering greater parallelization and requiring significantly less training time.

**1. Research Problem:**

*   **Main Objective:** The primary objective of this study is to develop a new neural network architecture for sequence transduction that overcomes the limitations of recurrent and convol