In [1]:
import pinecone
import os 

PINECONE_API_KEY = "bf349a22-3a4e-487a-a05a-14f9193ed5e6"
PINECONE_ENV_KEY = "asia-southeast1-gcp-free"

pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENV_KEY
)

  from tqdm.autonotebook import tqdm


In [2]:
index_name = "pinecone-metadata-filtering"

if index_name in pinecone.list_indexes():
    pinecone.delete_index(index_name)

pinecone.create_index(
    name=index_name,
    dimension=2,
    metric="euclidean",
    shards=1
)

In [3]:
index = pinecone.Index(
    index_name=index_name
)

In [5]:
import pandas as pd 

df = pd.DataFrame() 
df["id"] = ["F-1", "F-2", "S-1", "S-2"]
df["vector"] = [[1., 1.], [2., 2.], [3., 3.], [4., 4.]]
df["metadata"] = [
    {"category": "finance", "published": 2015},
    {"category": "finance", "published": 2016},
    {"category": "sport", "published": 2017},
    {"category": "sport", "published": 2018}
]
df 

Unnamed: 0,id,vector,metadata
0,F-1,"[1.0, 1.0]","{'category': 'finance', 'published': 2015}"
1,F-2,"[2.0, 2.0]","{'category': 'finance', 'published': 2016}"
2,S-1,"[3.0, 3.0]","{'category': 'sport', 'published': 2017}"
3,S-2,"[4.0, 4.0]","{'category': 'sport', 'published': 2018}"


In [7]:
index.upsert(
    vectors=zip(df["id"], df["vector"], df["metadata"])
)
index.describe_index_stats()

{'dimension': 2,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 4}},
 'total_vector_count': 4}

In [8]:
index.fetch(
    ids=["F-1"]
)

{'namespace': '',
 'vectors': {'F-1': {'id': 'F-1',
                     'metadata': {'category': 'finance', 'published': 2015.0},
                     'values': [1.0, 1.0]}}}

In [10]:
query_results = index.query(
    queries=df[df["id"] == "F-1"]["vector"],
    top_k=2
)
query_results

{'results': [{'matches': [{'id': 'F-1', 'score': 0.0, 'values': []},
                          {'id': 'F-2', 'score': 1.99999905, 'values': []}],
              'namespace': ''}]}

In [11]:
filter_condition = {
    "category": {"$eq": "finance"},
    "published": {"$gt": 2015}
}

query_results = index.query(
    queries=df[df["id"] == "F-1"]["vector"],
    top_k=2,
    filter=filter_condition
)

query_results

{'results': [{'matches': [{'id': 'F-2', 'score': 1.99999905, 'values': []}],
              'namespace': ''}]}

In [12]:
pinecone.delete_index(index_name)