<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/WeaviateIndex_metadata_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Weaviate Vector Store Metadata Filter

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [None]:
%pip install llama-index-vector-stores-weaviate

Collecting llama-index-vector-stores-weaviate
  Downloading llama_index_vector_stores_weaviate-1.1.3-py3-none-any.whl.metadata (717 bytes)
Collecting llama-index-core<0.12.0,>=0.11.0 (from llama-index-vector-stores-weaviate)
  Downloading llama_index_core-0.11.23-py3-none-any.whl.metadata (2.5 kB)
Collecting weaviate-client<5.0.0,>=4.5.7 (from llama-index-vector-stores-weaviate)
  Downloading weaviate_client-4.9.3-py3-none-any.whl.metadata (3.6 kB)
Collecting dataclasses-json (from llama-index-core<0.12.0,>=0.11.0->llama-index-vector-stores-weaviate)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.12.0,>=0.11.0->llama-index-vector-stores-weaviate)
  Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)
Collecting filetype<2.0.0,>=1.2.0 (from llama-index-core<0.12.0,>=0.11.0->llama-index-vector-stores-weaviate)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting nltk>3.8

In [None]:
!pip install llama-index weaviate-client

#### Creating a Weaviate Client

In [None]:
import os
import openai

os.environ["OPENAI_API_KEY"] = ""
openai.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
import weaviate

# cloud
cluster_url = "https://u8s7zofurnxvyrvtb26tw.c0.asia-southeast1.gcp.weaviate.cloud"
api_key = "7mwNDmgaubLjxMKgKNlUjOKu4udi0SAI34Q1"

client = weaviate.connect_to_wcs(
    cluster_url=cluster_url,
    auth_credentials=weaviate.auth.AuthApiKey(api_key),
)

# local
# client = weaviate.connect_to_local()

#### Load documents, build the VectorStoreIndex

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from IPython.display import Markdown, display

## Metadata Filtering

Let's insert a dummy document, and try to filter so that only that document is returned.

In [None]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "FictionChina",
            "year": 1997,
        },
    ),
]

In [None]:
from llama_index.core import StorageContext

vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="LlamaIndex_filter"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

In [None]:
retriever = index.as_retriever()
retriever.retrieve("What is inception?")

In [None]:
from weaviate.classes.query import Filter

LlamaIndex_filter = client.collections.get("LlamaIndex_filter")
response = LlamaIndex_filter.query.fetch_objects(
    filters=Filter.by_property("theme").like("*China*"),
    limit=1
)

for o in response.objects:
    print(o.properties)

{'text': "Harry Potter and the Sorcerer's Stone", 'year': 1997.0, '_node_type': 'TextNode', 'director': None, 'document_id': 'None', 'ref_doc_id': 'None', 'relationships': None, 'theme': 'FictionChina', 'author': 'J.K. Rowling', '_node_content': '{"id_": "434ecbe8-45b6-4a42-92c0-f3b484b53aa9", "embedding": null, "metadata": {"author": "J.K. Rowling", "theme": "FictionChina", "year": 1997}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {}, "text": "", "mimetype": "text/plain", "start_char_idx": null, "end_char_idx": null, "text_template": "{metadata_str}\\n\\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\\n", "class_name": "TextNode"}', 'doc_id': 'None', 'node_info': None}


In [None]:
from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", operator=FilterOperator.CONTAINS, value="China"),
    ]
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

ValueError: Filter operator contains not supported

In [None]:
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Mafia"),
        MetadataFilter(key="year", value=1972),
    ]
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='34d778a1-b6bf-4a24-a1bf-ac659a9959ea', embedding=[-0.0017794573213905096, -0.023969227448105812, -0.01290263794362545, -0.035538844764232635, -0.00970841757953167, 0.02575497329235077, -0.0005831966991536319, 0.0009125220822170377, -0.02186909131705761, -0.0278173815459013, 0.023969227448105812, 0.018712596967816353, 0.028471317142248154, -0.0018627711106091738, 0.006259539630264044, 0.015468074008822441, 0.029024647548794746, -0.007985550910234451, 0.010418943129479885, -0.00027961216983385384, 0.010318337008357048, 0.006847452372312546, -0.029955245554447174, -0.0007384276250377297, 0.004885647911578417, -0.0011467438889667392, 0.004489514045417309, -0.026987388730049133, 0.021567273885011673, -0.017505332827568054, 0.012072643265128136, -0.024069832637906075, -0.006407303735613823, 0.0021127124782651663, 0.010173717513680458, -0.0029820057097822428, 0.005731361452490091, -0.010488108731806278, 0.0010052676079794765, 0.014700958505272865, 0.014021872

In [None]:
from llama_index.core.vector_stores import FilterOperator, FilterCondition


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.OR,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='b9a4dffd-b9f1-4d83-9c13-f4402d1036b8', embedding=[0.012515314854681492, -0.014948848634958267, -0.04071340337395668, -0.006991580594331026, -0.010674070566892624, 0.016596956178545952, -0.029305409640073776, -0.050885315984487534, -0.021270886063575745, -0.01666133478283882, 0.024966251105070114, 0.013841526582837105, 0.017202120274305344, 0.0007604792481288314, -0.010571063496172428, -0.000707366387359798, 0.022494090721011162, -0.01047449465841055, 0.01530937198549509, -0.014923096634447575, -0.016712838783860207, -0.009611813351511955, -0.008382171392440796, 0.010004526935517788, -0.010493808425962925, -0.0017655993578955531, 0.02235245518386364, -0.04220699891448021, 0.019970426335930824, 0.0035215418320149183, 0.00806027464568615, -0.0053756628185510635, -0.025931939482688904, -0.022506965324282646, -0.03512528911232948, 0.00804739911109209, -0.026833247393369675, -0.009341420605778694, 0.00688857352361083, -0.0037597448099404573, 0.03002645634114