In [3]:
import os
import time
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from langchain_mistralai.embeddings import MistralAIEmbeddings

In [4]:
load_dotenv()

True

In [5]:
pinecone_api_key = os.getenv("PINECONE_API_KEY")
if not pinecone_api_key:
    raise ValueError("PINECONE_API_KEY is not set")

pc = Pinecone(api_key=pinecone_api_key)

In [6]:
index_name = "langchain-test-index" 

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

In [8]:
if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1024,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

PineconeApiException: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2024-07', 'X-Cloud-Trace-Context': 'fc666f0a847861df7d51a083ab281723', 'Date': 'Thu, 05 Sep 2024 07:19:01 GMT', 'Server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}


In [16]:
index

<pinecone.data.index.Index at 0x10506bcb0>

In [17]:
mistral_api_key = os.getenv("MISTRAL_API_KEY")
if not mistral_api_key:
    raise ValueError("MISTRAL_API_KEY is not set")

embeddings = MistralAIEmbeddings(model="mistral-embed", api_key=mistral_api_key)




In [18]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [19]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)



In [20]:
documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

In [21]:
vector_store.add_documents(documents=documents, ids=uuids)

['7a4f8cd5-3459-4ff1-82b7-d56ba30562c0',
 '06cce81b-e90f-4ef1-8a19-e889573aaf7a',
 'b43a2719-bb1d-443e-808c-f83390e2d3f8',
 'd56403f9-1aa1-474e-ab00-9edf95e0614d',
 '24839cea-3ab9-4407-a6d9-88bb254e93ad',
 'e9a38856-594b-4465-bb2f-aa58b81029a0',
 '5b39f3c3-03cf-4f37-966e-03cdf9b8d440',
 'bcb18732-fea1-4f50-a85a-39e4f647c8c8',
 '0339696f-2d59-4d74-bbb7-5ee8228bdadb',
 '2a910a32-367f-4b42-a736-5fa255857abc']

In [29]:
results = vector_store.similarity_search(
    query="What is stock market down today?",
)
results

[Document(metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.')]

[Document(metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.')]

In [2]:
def checker_function():
    """
    This is a checker function
    """
    return "Hello World"

checker_function.__doc__

'\n    This is a checker function\n    '

In [3]:
import uuid

id = uuid.uuid4()

In [6]:
str(id)

'a27649eb-97c1-41b9-a842-82e8627e7758'

In [5]:
id.__str__

<bound method UUID.__str__ of UUID('a27649eb-97c1-41b9-a842-82e8627e7758')>