In [32]:
import os
from dotenv import load_dotenv


In [33]:
load_dotenv()

True

In [34]:
# PineCorn API key loaded
pinecorn_key = os.getenv("pinecornAPI")

In [35]:
from pinecone import Pinecone
from langchain_huggingface import HuggingFaceEmbeddings

In [36]:
pc = Pinecone(api_key= pinecorn_key)

In [50]:
embeddings = HuggingFaceEmbeddings(
    model = "BAAI/bge-large-en" # this model only have dimensions of 384, it can't have 1024
)

### connect it to database

In [None]:
from pinecone import ServerlessSpec

index_name = "rag" 

if not pc.has_index(index_name) :
    pc.create_index(
        name = index_name,
        dimension = 1024,
        metric = "cosine",
        spec = ServerlessSpec(cloud = "aws", region= "us-east-1")
    )

index = pc.Index(index_name)
index # vector database is ready


In [None]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index= index, embedding= embeddings)

vector_store

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x329288710>

In [40]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [None]:
vector_store.add_documents(documents= documents)

In [None]:
result = vector_store.similarity_search(
    "LangChain provides abstraction to make working with LLMs easy",
    k=2,
    filter = {"source" : "tweet"}
)


for res in result :
    print(f"{res.page_content}" [{res.metadata}])

In [None]:
retriever = vector_store.as_retriever(
    search_type = "similarity_score_threshold",
    search_kwargs = {"k" : 1, "score_threshold" : 0.4} 
)

retriever.invoke("Stealing from the bank is a crime", filter = {"source" : "news"})
