# Pinecone Vector Store - Auto Retriever

#### Creating a Pinecone Index

In [1]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
import openai
openai.api_base = "https://oai.hconeai.com/v1"


INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


In [3]:
import pinecone

api_key = os.environ['PINECONE_API_KEY']
pinecone.init(api_key=api_key, environment="eu-west1-gcp")

  from tqdm.autonotebook import tqdm


In [4]:
# dimensions are for text-embedding-ada-002
try:
    pinecone.create_index("quickstart-index", dimension=1536, metric="euclidean", pod_type="p1")
except Exception:
    # most likely index already exists
    pass

In [5]:
pinecone_index = pinecone.Index("quickstart-index")

#### Load documents, build the PineconeVectorStore and GPTVectorStoreIndex

In [6]:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores import PineconeVectorStore

In [7]:
from llama_index.data_structs.node import Node

nodes = [
    Node("Michael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.", extra_info={
        "category": "Sports",
        "country": "United States",
    }),
    Node("Angelina Jolie is an American actress, filmmaker, and humanitarian. She has received numerous awards for her acting and is known for her philanthropic work.", extra_info={
        "category": "Entertainment",
        "country": "United States",
    }),
    Node("Elon Musk is a business magnate, industrial designer, and engineer. He is the founder, CEO, and lead designer of SpaceX, Tesla, Inc., Neuralink, and The Boring Company.", extra_info={
        "category": "Business",
        "country": "United States",
    }),
    Node("Rihanna is a Barbadian singer, actress, and businesswoman. She has achieved significant success in the music industry and is known for her versatile musical style.", extra_info={
        "category": "Music",
        "country": "Barbados",
    }),
    Node("Cristiano Ronaldo is a Portuguese professional footballer who is considered one of the greatest football players of all time. He has won numerous awards and set multiple records during his career.", extra_info={
        "category": "Sports",
        "country": "Portugal",
    })
]

In [8]:
from langchain import OpenAI
from llama_index.indices.service_context import ServiceContext
from llama_index.llm_predictor.base import LLMPredictor


vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace='test_new_new')
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(
  llm_predictor=LLMPredictor(
    llm=OpenAI(
      headers={
        "Helicone-Auth": "Bearer sk-ss3no7a-o2zeqpi-qlqeipy-it3e4zi"
      }
    )
  )
)

                    headers was transfered to model_kwargs.
                    Please confirm that headers is what you intended.
                    headers was transfered to model_kwargs.
                    Please confirm that headers is what you intended.


In [9]:
index = GPTVectorStoreIndex(nodes, storage_context=storage_context, service_context=service_context)

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 211 tokens
> [build_index_from_nodes] Total embedding token usage: 211 tokens


In [10]:
from llama_index.indices.vector_store.auto_retriever.auto_retriever import VectorIndexAutoRetriever
from llama_index.indices.vector_store.auto_retriever.schema import MetadataInfo, VectorStoreInfo


vector_store_info = VectorStoreInfo(
    content_info='brief biography of celebrities',
    metadata_info=[
        MetadataInfo(
            name='category', 
            type='str', 
            description='Category of the celebrity, one of [Sports, Entertainment, Business, Music]'),
        MetadataInfo(name='country', type='str', description='Country of the celebrity, one of [United States, Barbados, Portugal]'),
    ]
)
retriever = VectorIndexAutoRetriever(index, vector_store_info=vector_store_info)

In [11]:
retriever.retrieve('Tell me about fifty celebrities from United States')