# Project 2: LangChain RAG Project

In [4]:
%pip install -qU langchain-pinecone langchain-google-genai

In [5]:
from google.colab import userdata

from pinecone import Pinecone, ServerlessSpec

pinecone_api_key = userdata.get('PINECONE_API_KEY')

pc = Pinecone(api_key=pinecone_api_key)

In [11]:
import time

index_name = "langchain-rag-project"

pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

index = pc.Index(index_name)

In [7]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [8]:
vector = embeddings.embed_query("We Are Building A Rag Text Based Output")

In [9]:
vector[:5]

[0.02381775714457035,
 -0.014614936895668507,
 -0.05852246657013893,
 -0.012175202369689941,
 0.015380159951746464]

In [12]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [13]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

In [14]:
document_1

Document(metadata={'source': 'tweet'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.')

In [15]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [16]:
len(documents)

10

In [17]:
from uuid import uuid4
uuid4()

UUID('88183d96-fe29-4fc0-bf87-fa9a2c9f9d4b')

In [18]:
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['c07f9b53-df6f-4706-9ca8-2081befac1ed',
 'a3cfd5cc-34c3-47a4-899c-842d3dc0d215',
 '29099c26-1392-4f0b-ab71-1361c06f3ed7',
 '6db34eca-28cd-485c-8ad6-b8ff16be0119',
 '1fad759f-ef3f-46db-8afe-3209ec0b8b64',
 'cf0371dd-bf7a-41b2-917e-16012e346bb1',
 '952d291c-8aff-447d-93ee-f9cc49c514e6',
 '4abcbe39-34a6-40db-8aed-5a05c964225d',
 '199c37f8-7fc3-4d31-b86c-e1fb6fb56509',
 '98bac295-b186-461b-9609-628f87c35988']

In [19]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=10,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

In [20]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?", k=1, filter={"source": "news"}
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

In [21]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-exp",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [22]:
def answer_to_user(query: str):

    ## Vector Search
    # results = vector_store.similarity_search(query, k=2)
    vector_results = vector_store.similarity_search(query, k=2)
    print(len(vector_results))

    ## Todo - Pass To Model Vector Results + User Query
    # final_answer = model(query, results)
    final_answer = llm.invoke(f"ANSWER THIS USER QUERY: {query}, Here are some references to asnwer {vector_results} ")

    return final_answer

In [23]:
answer = answer_to_user("LangChain provides abstractions to make working with LLMs easy")

2


In [24]:
answer.content

'Based on the provided documents, here\'s what we can say about LangChain:\n\n*   **LangChain is used for building applications.** The second tweet mentions "Building an exciting new project with LangChain," indicating its use in application development.\n*   **LangChain is a framework.** The first tweet refers to "LangGraph," which is described as a framework, and it\'s mentioned in the context of "agentic applications." This suggests that LangChain (or at least its related components like LangGraph) is a framework for building applications.\n*   **LangChain can be used for building stateful, agentic applications.** The first tweet specifically mentions "LangGraph" as the best framework for this type of application. This implies that LangChain, through components like LangGraph, supports the development of applications that have state and act like agents.\n\n**In summary, based on these limited references, LangChain is a framework used for building applications, including stateful, ag

## The End...