# Core Search Types

> Core types and interfaces for building a personal search engine

This module provides the fundamental building blocks for creating a personal search engine:
- `Document`: Represents any searchable content
- `SearchResult`: Represents a matched document with relevance info
- `Query`: Encapsulates search parameters and preferences
- `Index`: Base class for different indexing strategies

In [None]:
#| default_exp core

#| export
from fastcore.basics import *
from fastcore.foundation import *
from fastcore.test import *
from datetime import datetime
from typing import Optional

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
class Document:
    """A searchable document with content and metadata
    
    A Document represents any piece of content that can be indexed and searched. This could be:
    - A web page
    - A local file
    - An email
    - A note or document
    """
    def __init__(self, 
                 id:str,                      # Unique identifier for the document
                 content:str,                 # The main content of the document
                 metadata:dict[str,any]=None, # Flexible metadata storage
                 created_at:datetime=None,    # When the document was created
                 source:str=None              # Where this document came from
                ):
        store_attr()
        if metadata is None: self.metadata = {}
        if created_at is None: self.created_at = datetime.now()
    
    def __repr__(self): return f"Document(id={self.id}, source={self.source})"
    
    @property
    def summary(self) -> str:
        "Returns a brief summary of the document content"
        return self.content[:100] + "..." if len(self.content) > 100 else self.content

In [None]:
# Examples of Document usage
doc1 = Document(id="email_1", 
                content="Meeting scheduled for tomorrow at 2pm",
                metadata={"type": "email", "from": "alice@example.com"},
                source="outlook")
doc2 = Document(id="webpage_1",
                content="FastAI is a deep learning library",
                metadata={"url": "fast.ai", "type": "webpage"},
                source="web")

test_eq(doc1.metadata["type"], "email")
test_eq(doc2.source, "web")

In [None]:
long_doc = Document(id="2", content="x"*200)
test_eq(len(long_doc.summary), 103)  # 100 chars + "..."

In [None]:
#| export
class SearchResult:
    """A document match with relevance information
    
    SearchResult objects are returned by Index implementations to represent matches,
    including:
    - The matched document
    - A relevance score (higher is better)
    - Optional highlighted snippets showing match context
    """
    def __init__(self,
                 document:Document,          # The document that matched
                 score:float,                # Relevance score
                 highlights:list[str]=None   # Highlighted snippets from the document
                ):
        store_attr()
        if highlights is None: self.highlights = []
    
    def __repr__(self): return f"SearchResult(doc={self.document.id}, score={self.score:.3f})"
    
    @property
    def best_highlight(self) -> Optional[str]:
        "Returns the most relevant highlight if available"
        return self.highlights[0] if self.highlights else None

In [None]:
# Example of SearchResult usage
result = SearchResult(
    document=doc1,
    score=0.85,
    highlights=["Meeting scheduled for tomorrow", "2pm"]
)
test_eq(result.score, 0.85)
test_eq(result.best_highlight, "Meeting scheduled for tomorrow")

In [None]:
#| export
class Query:
    """A search request with filters and preferences
    
    Query objects encapsulate all parameters for a search request:
    - Search text (what to look for)
    - Filters (metadata constraints)
    - Boost factors (field importance weights)
    """
    def __init__(self,
                 text:str,                   # The search text
                 filters:dict[str,any]=None, # Metadata filters to apply
                 boost:dict[str,float]=None  # Boost factors for specific fields
                ):
        store_attr()
        if filters is None: self.filters = {}
        if boost is None: self.boost = {}
    
    def __repr__(self): return f"Query(text='{self.text}')"

In [None]:
# Example of Query usage
query = Query(
    text="meeting tomorrow",
    filters={"type": "email"},
    boost={"subject": 2.0, "content": 1.0}
)
test_eq(query.text, "meeting tomorrow")
test_eq(query.filters["type"], "email")

In [None]:
#| export
class Index:
    """Base class for all index implementations
    
    This abstract base class defines the interface that all index implementations
    must provide. Implementations might include:
    - Inverted index for text search
    - Vector index for semantic search
    - Graph index for relationship-based search
    """
    def __init__(self): pass
    
    def add(self, doc:Document):
        "Add a document to the index"
        raise NotImplementedError
    
    def remove(self, doc_id:str):
        "Remove a document from the index"
        raise NotImplementedError
    
    def search(self, query:Query) -> list[SearchResult]:
        "Search the index with the given query"
        raise NotImplementedError
    
    def clear(self):
        "Clear all documents from the index"
        raise NotImplementedError

# Concrete Index classes implemented in 01_indexes.ipynb

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()