<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/pinecone_auto_retriever.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pinecone Vector Store - Auto Retriever

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [None]:
!pip install llama-index

#### Creating a Pinecone Index

In [2]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
import os
os.environ["PINECONE_API_KEY"] = ""

In [None]:
import pinecone

api_key = os.environ["PINECONE_API_KEY"]
pinecone.init(api_key=api_key, environment="gcp-starter")

In [None]:
# dimensions are for text-embedding-ada-002
try:
    pinecone.create_index(
        "quickstart-index", dimension=1536, metric="euclidean", pod_type="p1"
    )
except Exception as e:
    # most likely index already exists
    print(e)
    pass

In [6]:
pinecone_index = pinecone.Index("quickstart-index")

In [7]:
# Optional: delete data in your pinecone index
pinecone_index.delete(delete_all=True, namespace="test")

{}

#### Load documents, build the PineconeVectorStore and VectorStoreIndex

In [None]:
from llama_index import VectorStoreIndex, StorageContext
from llama_index.vector_stores import PineconeVectorStore

In [9]:
from llama_index.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

In [10]:
vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index, 
    namespace="test",
    # this is a hack to allow for blank queries in pinecone
    default_empty_query_vector=[0] * 1536
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
index = VectorStoreIndex(nodes, storage_context=storage_context)

In [12]:
from llama_index.indices.vector_store.retrievers import (
    VectorIndexAutoRetriever,
)
from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo


vector_store_info = VectorStoreInfo(
    content_info="famous books and movies",
    metadata_info=[
        MetadataInfo(
            name="directory",
            type="str",
            description=(
                "Name of the director"
            ),
        ),
        MetadataInfo(
            name="theme",
            type="str",
            description=(
                "Theme of the book/movie"
            ),
        ),
        MetadataInfo(
            name="year",
            type="int",
            description=(
                "Year of the book/movie"
            ),
        ),
    ],
)
retriever = VectorIndexAutoRetriever(
    index, 
    vector_store_info=vector_store_info,
    empty_query_top_k=10
)

In [None]:
nodes = retriever.retrieve("Tell me about some books/movies after the year 2000")

In [16]:
for node in nodes:
    print(node.get_content(metadata_mode="all"))

director: Christopher Nolan
theme: Fiction
year: 2010

Inception


In [None]:
nodes = retriever.retrieve("Tell me about some books that are Fiction")

In [18]:
for node in nodes:
    print(node.id_)
    print(node.get_content(metadata_mode="all"))

f7b1ad3d-00b9-45e2-951f-ed70f4256e3c
director: Christopher Nolan
theme: Fiction
year: 2010

Inception
d3b347d6-a4e3-4529-bf8a-0ab92f971044
author: J.K. Rowling
theme: Fiction
year: 1997

Harry Potter and the Sorcerer's Stone
