<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/pinecone_metadata_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pinecone Vector Store - Metadata Filter

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [34]:
!pip install llama-index pinecone-client



In [35]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

Build a Pinecone Index and connect to it

In [36]:
from pinecone import Pinecone, ServerlessSpec

# You must set your Pinecone API key from https://app.pinecone.io
os.environ["PINECONE_API_KEY"] = ""

api_key = os.environ["PINECONE_API_KEY"]
pc = Pinecone(api_key=api_key)

In [37]:
# dimensions are for text-embedding-ada-002
pc.create_index(
    name='quickstart-index',
    dimension=1536,
    metric="euclidean",
    spec=ServerlessSpec(
       cloud='aws',
       region='us-west-2'
    )
)

In [38]:
pinecone_index = pc.Index("quickstart-index")

Build the PineconeVectorStore and VectorStoreIndex

In [39]:
from llama_index import VectorStoreIndex, StorageContext
from llama_index.vector_stores import PineconeVectorStore

In [40]:
from llama_index.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

In [41]:
import openai

openai.api_key = ""


#vector_store = PineconeVectorStore(
#    pinecone_index=pinecone_index, namespace="test_05_14"
#)

vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

Upserted vectors:   0%|          | 0/7 [00:00<?, ?it/s]

Define metadata filters

In [42]:
from llama_index.vector_stores.types import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", operator=FilterOperator.EQ, value="Mafia"),
    ]
)

Retrieve from vector store with filters

In [47]:
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

[NodeWithScore(node=TextNode(id_='70821fde-b507-4d10-8764-077a241519ab', embedding=[0.00310940156, -0.0246712118, -0.0222742166, -0.0364649445, -0.00715911388, 0.0117236068, -0.0400604382, -0.0275654588, -0.0157589763, 0.00740136392, 0.0278459582, 0.029962454, 0.0187679715, -0.00440511806, 0.00412780605, 0.00442743069, 0.0276674572, -0.00760536361, -0.00303608901, -0.012947605, -0.00570242899, -0.01923972, 0.00208462193, 0.0092118606, 0.00205274695, -0.0128328558, 0.0136297289, -0.0292994548, 0.0149939768, -0.0193289705, 0.0219937172, -0.00944136083, -0.0221977159, -0.00928198546, -0.00940948538, -0.00429993076, -0.0119786067, -0.0272339582, 0.0158864763, -0.0080261128, 0.0143437283, 0.00407999381, -0.00648655277, -0.0170722231, 0.00394293154, -0.0149939768, -0.00831298716, -0.0212669671, -0.0228479654, 0.0105123585, 0.0167662241, 0.0527849197, -0.0421514362, -0.0262139607, -0.00156506011, -0.00443380559, -0.00724836392, 0.00597655354, 0.0354959443, -0.00842773728, -0.00259143347, -0.0

Multiple Metadata Filters with `AND` condition

In [48]:
from llama_index.vector_stores.types import (
    FilterOperator,
    FilterCondition,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.AND,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

[NodeWithScore(node=TextNode(id_='70821fde-b507-4d10-8764-077a241519ab', embedding=[0.00310940156, -0.0246712118, -0.0222742166, -0.0364649445, -0.00715911388, 0.0117236068, -0.0400604382, -0.0275654588, -0.0157589763, 0.00740136392, 0.0278459582, 0.029962454, 0.0187679715, -0.00440511806, 0.00412780605, 0.00442743069, 0.0276674572, -0.00760536361, -0.00303608901, -0.012947605, -0.00570242899, -0.01923972, 0.00208462193, 0.0092118606, 0.00205274695, -0.0128328558, 0.0136297289, -0.0292994548, 0.0149939768, -0.0193289705, 0.0219937172, -0.00944136083, -0.0221977159, -0.00928198546, -0.00940948538, -0.00429993076, -0.0119786067, -0.0272339582, 0.0158864763, -0.0080261128, 0.0143437283, 0.00407999381, -0.00648655277, -0.0170722231, 0.00394293154, -0.0149939768, -0.00831298716, -0.0212669671, -0.0228479654, 0.0105123585, 0.0167662241, 0.0527849197, -0.0421514362, -0.0262139607, -0.00156506011, -0.00443380559, -0.00724836392, 0.00597655354, 0.0354959443, -0.00842773728, -0.00259143347, -0.0

Multiple Metadata Filters with `OR` condition

In [45]:
from llama_index.vector_stores.types import (
    FilterOperator,
    FilterCondition,
)


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.OR,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

[]

Use keyword arguments specific to pinecone

In [46]:
retriever = index.as_retriever(
    vector_store_kwargs={"filter": {"theme": "Mafia"}}
)
retriever.retrieve("What is inception about?")

[]