In [1]:
%%capture --no-stderr
%pip install -q -U langchain-pinecone langchain-google-genai

In [2]:
import os
from google.colab import  userdata
from pinecone import Pinecone, ServerlessSpec

os.environ["PINECONE_API_KEY"] = userdata.get("PINECONE_API_KEY")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

In [3]:
import time

index_name = "langchain-test-index"

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [4]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("Hello World")
vector[:7]

[0.04656680300831795,
 -0.0376756377518177,
 -0.0274836253374815,
 -0.02519204653799534,
 0.023942284286022186,
 -0.0026659469585865736,
 0.03314977511763573]

In [5]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [6]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['81185244-25b0-41e3-9eaf-1a1ad0f851b5',
 'c34dd7c5-2ce0-409a-9d80-3ed7f458c7c1',
 'dce2fd05-54aa-4c75-b19a-97811d68ff91',
 'dde093fc-1324-449f-8708-03a2a594c295',
 'c78c66c1-4688-4369-baa0-0e2194279277',
 '313d4794-8d1a-49db-a33c-f47b6c2eb844',
 'd75cf0c5-778b-465e-bf7a-96642d5bbf57',
 '99ed24ea-6cce-42ac-afcd-99497792b3e1',
 '4b4ec43a-d323-4122-a94a-e6279f226192',
 '01315018-37fa-4445-8bef-4214636ba5a5']

In [9]:
result = vector_store.similarity_search("What is LangChain ?")

for res in result:
  print(f"Content: {res.page_content}   Source: {res.metadata}")

Content: Building an exciting new project with LangChain - come check it out!   Source: {'source': 'tweet'}
Content: LangGraph is the best framework for building stateful, agentic applications!   Source: {'source': 'tweet'}
Content: I have a bad feeling I am going to get deleted :(   Source: {'source': 'tweet'}
Content: I had chocalate chip pancakes and scrambled eggs for breakfast this morning.   Source: {'source': 'tweet'}


In [12]:
result = vector_store.similarity_search_with_score("What is LangChain ?")

# result
for res, score in result:
  print(f"Score: {score}  Content: {res.page_content}   Source: {res.metadata}")

Score: 0.718181193  Content: Building an exciting new project with LangChain - come check it out!   Source: {'source': 'tweet'}
Score: 0.681360066  Content: LangGraph is the best framework for building stateful, agentic applications!   Source: {'source': 'tweet'}
Score: 0.546544492  Content: I have a bad feeling I am going to get deleted :(   Source: {'source': 'tweet'}
Score: 0.518182695  Content: I had chocalate chip pancakes and scrambled eggs for breakfast this morning.   Source: {'source': 'tweet'}
