In [1]:
# -q (Quiet Mode) :
# Useful when you don’t want to see a lot of logs, especially in scripts or notebooks.

# -U (Upgrade Mode) : 
# Ensures that the latest version of the package is installed. 
# If an older version is already installed, it will upgrade to the latest available version.

! pip install -qU langchain-pinecone langchain-google-genai

In [2]:

import os
from dotenv import load_dotenv  # helps to load env files


load_dotenv()

PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
# print("GOOGLE_API_KEY : : : ", GOOGLE_API_KEY)


In [3]:
from pinecone import Pinecone , ServerlessSpec

# Create PineCone Object 
pc = Pinecone(api_key=PINECONE_API_KEY )

pc

  from tqdm.autonotebook import tqdm


<pinecone.control.pinecone.Pinecone at 0x1bdc45641a0>

### Creating PineCone Index 

In [4]:
import time

index_name = "langchain-test-index"  # change if desired

# First delete previous one then re-initialise index on pinecone where our data will goona save 
pc.delete_index(index_name)  # Deletes the existing index
time.sleep(10)  # Wait for the deletion to complete
print("Index Recreate Successfully")
print(f"Connected to existing index: {index_name}")

pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        # ServerlessSpec allows to make a region to store data on a given index 
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)


index = pc.Index(index_name)
index , pc.index_api

Index Recreate Successfully
Connected to existing index: langchain-test-index


(<pinecone.data.index.Index at 0x1bdc4566120>,
 <pinecone.core.openapi.control.api.manage_indexes_api.ManageIndexesApi at 0x1bdc4663770>)

### Create Embedding 
#### Use Google Embedding Technique 

In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding  = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embedding


GoogleGenerativeAIEmbeddings(client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001BDC7C97620>, model='models/embedding-001', task_type=None, google_api_key=SecretStr('**********'), credentials=None, client_options=None, transport=None, request_options=None)

In [6]:
vector = embedding.embed_query("We are building a rag system")

vector

[0.016110654920339584,
 -0.02008357085287571,
 -0.041943229734897614,
 -0.027132369577884674,
 -0.030067667365074158,
 0.03631262108683586,
 0.024447506293654442,
 -0.042094066739082336,
 0.010570426471531391,
 0.07912363857030869,
 0.018259946256875992,
 0.06809314340353012,
 -0.023410169407725334,
 -0.010770240798592567,
 0.0011473031481727958,
 0.004785176366567612,
 -0.005052696913480759,
 0.021912693977355957,
 0.023414108902215958,
 -0.034687433391809464,
 0.003807725617662072,
 0.022651413455605507,
 0.029435686767101288,
 0.01847793534398079,
 0.0032262131571769714,
 0.03378104418516159,
 0.02063453383743763,
 -0.04224745184183121,
 0.02168904058635235,
 -0.004437553230673075,
 -0.03348710387945175,
 0.005776851903647184,
 -0.08401726931333542,
 0.021970562636852264,
 -0.021756507456302643,
 -0.019567569717764854,
 0.022737422958016396,
 -0.008521494455635548,
 0.007043466437608004,
 0.027777494862675667,
 0.008334580808877945,
 -0.034330375492572784,
 -0.02418278343975544,
 -0

### Add PineCone vector store to store created embedding on a particular index 

In [7]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index , embedding=embedding)

vector_store

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1bdc7cf9940>

### Data Save  with some dummy document

In [8]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)


document_1

Document(metadata={'source': 'tweet'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.')

In [9]:
# Library to generate unique ids
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [10]:
len(documents)

10

In [11]:
# Documents based embedding stores on unique indexes

uuids = [str(uuid4()) for _ in range(len(documents))]

uuids

vector_store.add_documents(documents=documents, ids=uuids)

['f1a938b4-092b-424d-bba9-2165c073b797',
 '10e841a0-c667-4ece-bead-2ce7c343e68c',
 'ab74b5c2-5e3e-47bc-bc47-a1e8299a9f81',
 '2c1f8ad0-29f7-4e87-aac1-62690cd56931',
 '46aaa96f-dc6e-47a5-8b0b-b55d79ea491a',
 'e2c6ec80-719b-4526-a2dc-64800048f423',
 '974444ad-50d3-458f-a126-4783851ce670',
 'f4bc0328-409b-4b2b-bf19-94c82c2e8048',
 '0b7e9e8b-a2cf-47db-96d3-f19d4a0b0a7b',
 '005de6f5-84e8-46dc-ade1-3bd8b438db4d']

### Data Reterival 

In [12]:
result = vector_store.similarity_search(
    "Langchain provides abstractions to make working with LLMs easy?",
    k = 2,
    filter={"source": "tweet"}
)

for res in result:
    print(f"* {res.page_content} [{res.metadata}]")


### Similarity search with score We can also search with score

In [13]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?"
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

### Creating Model

In [14]:
from langchain_google_genai import ChatGoogleGenerativeAI

# Initialize model
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [15]:
def answer_to_user(query: str):
    search_results = vector_store.similarity_search(query)
    
    retrieved_texts = "\n".join([doc.page_content for doc in search_results])

    prompt = f"Use the following information to answer the query:\n\n{retrieved_texts}\n\nQuery: {query}"

    final_answer = model.invoke(prompt)
    return final_answer

In [16]:
# Example usage
query = "Tell me about Langchain?"
response = answer_to_user(query)

print(response.content)

Okay, I need some information about Langchain to answer your query effectively. I don't have any pre-existing knowledge about it.

To give you a good overview of Langchain, please provide me with some text or a document describing it. For example, you could give me:

*   **A link to the Langchain website.**
*   **A description of Langchain from a blog post or article.**
*   **A summary of Langchain from its documentation.**

Once you provide me with some information, I will be happy to summarize it for you.
