<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/WeaviateIndex_metadata_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Vespa Vector Store demo


If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.


In [None]:
%pip install llama-index-vector-stores-vespa

In [None]:
!pip install llama-index pyvespa

#### Setting up API key


In [1]:
import os
import openai

os.environ["OPENAI_API_KEY"] = "sk-..."
openai.api_key = os.environ["OPENAI_API_KEY"]

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

#### Load documents, build the VectorStoreIndex


In [3]:
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.vespa import VespaVectorStore
from IPython.display import Markdown, display

## Defining some sample data

Let's insert some documents.


In [5]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

In [6]:
?VespaVectorStore

[1;31mInit signature:[0m
[0mVespaVectorStore[0m[1;33m([0m[1;33m
[0m    [0mapplication_package[0m[1;33m:[0m [0mvespa[0m[1;33m.[0m[0mpackage[0m[1;33m.[0m[0mApplicationPackage[0m [1;33m=[0m [0mApplicationPackage[0m[1;33m([0m[1;34m'hybridsearch'[0m[1;33m,[0m [1;33m[[0m[0mSchema[0m[1;33m([0m[1;34m'doc'[0m[1;33m,[0m [0mDocument[0m[1;33m([0m[1;33m[[0m[0mField[0m[1;33m([0m[1;34m'id'[0m[1;33m,[0m [1;34m'string'[0m[1;33m,[0m [1;33m[[0m[1;34m'summary'[0m[1;33m][0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mTrue[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;32mNone[0m[1;33m,[0m [1;33m[[0m[1;33m][0m[1;33m,[0m [1;32mNone[0m[1;33m)[0m[1;33m,[0m [0mField[0m[1;33m([0m[1;34m'metadata'[0m[1;33m,[0m [1;34m'string'[0m

### Initilizing the VespaVectorStore


TODO:

- Explain template used
- Option to define your own application package
- Option to use own embedding model (also outside), but must then change template
- Link to pyvespa documentation


In [7]:
from llama_index.core import StorageContext

vector_store = VespaVectorStore()
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

INFO:llama_index.vector_stores.vespa.base:Using default hybrid template. Please make sure that the Vespa application is set up with the correct schema and rank profile.
Using default hybrid template. Please make sure that the Vespa application is set up with the correct schema and rank profile.
Waiting for configuration server, 0/300 seconds...
Waiting for configuration server, 5/300 seconds...
Waiting for configuration server, 10/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 0/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 5/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 10/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 15/300 seconds...
Using plain http against endpoint http://localhost:8080/

## As retriever


In [10]:
retriever = index.as_retriever(vector_store_query_mode="default")
retriever.retrieve("Who directed inception?")

INFO:llama_index.vector_stores.vespa.base:Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Who directed inception?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.DEFAULT: 'default'>, alpha=None, filters=None, mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)
Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Who directed inception?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.DEFAULT: 'default'>, alpha=None, filters=None, mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)
INFO:llama_index.vector_stores.vespa.base:Vespa Query body:
 {'hits': 2, 'ranking.profile': 'bm25', 'query': 'Who directed inception?', 'tracelevel': 9, 'yql': 'select * from sources * where userQuery()'}
Vespa Query body:
 {'hits': 2, 'ranking.profile': 'bm25', 'query': 'Who directed inception?', 'tracelevel': 9, 'yql': 'select * from sources * wh

[NodeWithScore(node=TextNode(id_='c9d6c88f-e4e1-4ea1-9ad7-1dd6754351a0', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=2.3017175961610485)]

In [9]:
retriever = index.as_retriever(vector_store_query_mode="semantic_hybrid")
retriever.retrieve("Who directed inception?")

INFO:llama_index.vector_stores.vespa.base:Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Who directed inception?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.SEMANTIC_HYBRID: 'semantic_hybrid'>, alpha=None, filters=None, mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)
Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Who directed inception?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.SEMANTIC_HYBRID: 'semantic_hybrid'>, alpha=None, filters=None, mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)
Embedding field not provided. Using default embedding field embedding
INFO:llama_index.vector_stores.vespa.base:Vespa Query body:
 {'hits': 2, 'ranking.profile': 'fusion', 'query': 'Who directed inception?', 'tracelevel': 9, 'yql': 'select * from sources * where rank({targetHits:10}nearestNeighbor(embedding,q), userQ

[NodeWithScore(node=TextNode(id_='c9d6c88f-e4e1-4ea1-9ad7-1dd6754351a0', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.03278688524590164),
 NodeWithScore(node=TextNode(id_='830e48de-cef2-4d5c-a03a-dd59e2a0b418', embedding=None, metadata={'author': 'George Orwell', 'theme': 'Totalitarianism', 'year': 1949}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.03225806451612903)]

### As query engine


In [11]:
query_engine = index.as_query_engine()
query_engine.query("Who directed inception?")

INFO:llama_index.vector_stores.vespa.base:Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Who directed inception?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.DEFAULT: 'default'>, alpha=None, filters=None, mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)
Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Who directed inception?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.DEFAULT: 'default'>, alpha=None, filters=None, mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)
INFO:llama_index.vector_stores.vespa.base:Vespa Query body:
 {'hits': 2, 'ranking.profile': 'bm25', 'query': 'Who directed inception?', 'tracelevel': 9, 'yql': 'select * from sources * where userQuery()'}
Vespa Query body:
 {'hits': 2, 'ranking.profile': 'bm25', 'query': 'Who directed inception?', 'tracelevel': 9, 'yql': 'select * from sources * wh

Response(response='Christopher Nolan', source_nodes=[NodeWithScore(node=TextNode(id_='c9d6c88f-e4e1-4ea1-9ad7-1dd6754351a0', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=2.3017175961610485)], metadata={'c9d6c88f-e4e1-4ea1-9ad7-1dd6754351a0': {'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}})

In [13]:
from llama_index.core.vector_stores import (
    FilterOperator,
    FilterCondition,
    MetadataFilter,
    MetadataFilters,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.OR,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

INFO:llama_index.vector_stores.vespa.base:Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Harry Potter?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.DEFAULT: 'default'>, alpha=None, filters=MetadataFilters(filters=[MetadataFilter(key='theme', value='Fiction', operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='year', value=1997, operator=<FilterOperator.GT: '>'>)], condition=<FilterCondition.OR: 'or'>), mmr_threshold=None, sparse_top_k=None, hybrid_top_k=None)
Query: VectorStoreQuery(query_embedding=None, similarity_top_k=2, doc_ids=None, node_ids=[], query_str='Harry Potter?', output_fields=None, embedding_field=None, mode=<VectorStoreQueryMode.DEFAULT: 'default'>, alpha=None, filters=MetadataFilters(filters=[MetadataFilter(key='theme', value='Fiction', operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='year', value=1997, operator=<FilterOperator.GT: '>'>)], condition=<FilterCondition.OR: 'or'

[NodeWithScore(node=TextNode(id_='180bb151-3eb8-4bb5-92be-5a7753c23fb7', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=2.1663224434456927)]