In [2]:
import json, os, time
import copy


import pprint

# OPENAI
from openai import OpenAI

# LANGCHAIN
from langchain_core.documents import Document
from langchain.vectorstores import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings
from langchain.prompts.prompt import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import Chroma
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_pinecone import PineconeVectorStore

# PINECONE
import pinecone
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec


# GENERAL
from dotenv import find_dotenv, load_dotenv
from rich.console import Console

In [3]:
console = Console()
load_dotenv()
if load_dotenv():
    print("Success: .env file found with some environment variables")
else:
    print(
        "Caution: No environment variables found. Please create .env file in the root directory or add environment variables in the .env file"
    )
api_key = os.environ["OPENAI_API_KEY"]
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
PINECONE_ENV = os.environ["PINECONE_ENV"]
PINCONE_INDEX = os.environ["PINECONE_INDEX"]

print(f"{PINECONE_API_KEY} | {PINECONE_ENV} | {PINCONE_INDEX}")
client = OpenAI()


if api_key:
    try:
        client.models.list()
        print("OPENAI_API_KEY is set and is valid:", api_key)
    except openai.APIError as e:
        print(f"OpenAI API returned an API Error: {e}")
        pass
    except openai.APIConnectionError as e:
        print(f"Failed to connect to OpenAI API: {e}")
        pass
    except openai.RateLimitError as e:
        print(f"OpenAI API request exceeded rate limit: {e}")
        pass

else:
    print("Please set you OpenAI API key as an environment variable OPENAI_API_KEY")

Success: .env file found with some environment variables
69a6ef84-1e2b-49ad-b93d-c012c8be1ca2 | us-east-1 | test
OPENAI_API_KEY is set and is valid: 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


In [4]:
cloud = os.environ.get("PINECONE_CLOUD") or "aws"
region = os.environ.get("PINECONE_REGION") or "us-east-1"

spec = ServerlessSpec(cloud=cloud, region=region)

In [5]:
index_name = os.environ.get("PINECONE_INDEX")

In [7]:
pc = Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 872}},
 'total_vector_count': 872}

## Retrieval

---

In [12]:
client = OpenAI()


def get_embedding(text, model="text-embedding-3-small"):

    # text = text.replace("\n", " ")

    try:
        embedding = (
            client.embeddings.create(input=[text], model=model).data[0].embedding
        )

    except Exception as e:

        print(f"Embedding failed: {text} | {e}")

        embedding = None

    return embedding

In [32]:


query_embed = get_embedding("What is the introduction about?")
res = index.query(
    namespace="", vector=query_embed, top_k=5, include_values=True, include_metadata=True
)

In [34]:
for i in res["matches"]:
    print(i["metadata"]["content"])



1 Introduction
INTRODUCTION
Introduction
Introduction
Introduction
