In [25]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [26]:
from dotenv import load_dotenv
load_dotenv()
import os
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')

In [27]:
len(embeddings.embed_query("Hello world"))

384

In [28]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

In [29]:
len(embeddings.embed_query("Hello world"))

768

In [30]:
from pinecone import Pinecone

os.environ['PINECONE_API_KEY'] = os.getenv("PINECONE_API_KEY")

In [31]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [32]:
from pinecone import ServerlessSpec

#### Servlerless means that the server will be managed by the cloud Provider

In [33]:
index_name = "rhy-genai"

In [34]:
pc.has_index(index_name)

True

### Creating an Index

In [35]:
if not pc.has_index(index_name):
    pc.create_index(name=index_name, dimension=768, metric="cosine", spec=ServerlessSpec(cloud="aws",region="us-east-1"))

### Loading the index

In [36]:
index = pc.Index(index_name)

In [37]:
from langchain_pinecone import PineconeVectorStore

In [38]:
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [39]:
vector_store.similarity_search("What is Langchain?", k=3)

[Document(id='fcc31f4e-b144-4b0d-a435-b56721dcdcad', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='555aae72-fc9c-4d78-a35f-22527becdb7a', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='0a91c53f-9c1d-47ee-900b-21bc6aecd26c', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [40]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.", metadata={"source": "tweet"})

document_2 = Document(page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.", metadata={"source": "news"})

document_3 = Document(page_content="Building an exciting new project with LangChain - come check it out!", metadata={"source": "tweet"})

document_4 = Document(page_content="Robbers broke into the city bank and stole $1M in cash", metadata={"source": "news"})

document_5 = Document(page_content="wow! That was an amazing movie. I can't wait to see it again.",metadata={"source": "tweet"})

document_6 = Document(page_content="Is the new iPhone worth the price? Read this review to find out.", metadata={"source":"website"})

document_7 = Document(page_content="The top 10 soccer players in the world right now.", metadata={"source": "website"})

document_8 = Document(page_content="LangGraph is the best framework for building stateful, agentic applications!", metadata={"source": "tweet"})

document_9 = Document(page_content="The stock market is down 500 points today due to fears of a recession.",metadata={"source": "news"})

document_10 = Document(page_content="I have a bad feeling I am going to get deleted :(", metadata={"source": "tweet"})

documents = [document_1, document_2, document_3, document_4, document_5, document_6, document_7, document_8, document_9, document_10]

In [41]:
uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents, ids=uuids)

['14cbc4aa-ee95-442e-8e3f-8d0bfab2c1a3',
 '73f1e0b6-9533-40ba-a87f-e0732e52d1a9',
 '1fd7ead9-bea1-409c-bca6-0b3cd81a7b17',
 'c74fe61b-bf68-4520-b8e5-c166f9e0ddf0',
 'dbe0752f-ed27-4277-bed9-1a78f80364a8',
 'eedc0761-68ea-4124-9e19-b6d7a85202a7',
 '62c01a55-f453-485e-b51c-9c5ba70fe79d',
 '7f4b9f85-8bc7-4180-a961-404cee9a45c0',
 'ea52a3e9-e3b7-4f67-ac1e-af7ee98d8ef7',
 'db173d2d-916e-4e9a-a513-a01a200af247']

In [42]:
vector_store.similarity_search("What is Langchain?", k=3)

[Document(id='fcc31f4e-b144-4b0d-a435-b56721dcdcad', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='1fd7ead9-bea1-409c-bca6-0b3cd81a7b17', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='7f4b9f85-8bc7-4180-a961-404cee9a45c0', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]