In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
embedded_query = embeddings.embed_query("Hello AI.")

In [5]:
len(embedded_query)

384

In [6]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [7]:
embeddings.embed_query("Hello AI.")

[0.00781067181378603,
 -0.04950970411300659,
 -0.0035673705860972404,
 -0.019869284704327583,
 0.017555052414536476,
 -0.02408524975180626,
 0.03718045353889465,
 0.011884482577443123,
 0.023935791105031967,
 0.0069458624348044395,
 0.05247059091925621,
 0.03164972364902496,
 -0.016280371695756912,
 -0.04358460009098053,
 -0.01278084609657526,
 -0.027691984549164772,
 -0.004162181634455919,
 0.0534166656434536,
 -0.022664489224553108,
 -0.021502653136849403,
 0.034393101930618286,
 -0.0015792304184287786,
 0.0007697034161537886,
 -0.028361313045024872,
 0.02985135279595852,
 0.0003370810009073466,
 0.030844228342175484,
 -0.043944284319877625,
 -0.04046711325645447,
 0.028150955215096474,
 -0.0448278933763504,
 0.0426693819463253,
 -0.04995980113744736,
 -0.006280964240431786,
 0.05793306976556778,
 -0.07464122027158737,
 0.04636314511299133,
 0.017015298828482628,
 0.010442662984132767,
 0.002596951089799404,
 0.003630775725468993,
 -0.07558967918157578,
 -0.01444965973496437,
 -0.013

In [8]:
len(embeddings.embed_query("Hello AI."))

768

In [9]:
from pinecone import Pinecone
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

In [10]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [11]:
pc

<pinecone.pinecone.Pinecone at 0x319c312b0>

### Vector DB Comparison: http://superlinked.com/vector-db-comparison

# RAG Pipeline Diagram

```
PDF --extract--> Pages --chunking--> Document Objects (1. Page Content, 2. Metadata) 
                                            |
                                            |
                                        Indexing (HNSW / IVF / Flat)
                                            |
                                            |
                                           \ /
                                    --------------------
                                    |                   |
                                    |                   |<-Similarity Search("k" hyperparameter uses KNN)--embedding--Query
                                    |___________________|       (Cosine Similarity, Dot product, 
                                                                    L2, L1(Manhattan), Jacquered)

```
Suppose in a scenario we are not using Flat-indexing, we use HNSSW or IVF index, we are taking a graph or a cluster based approach, respectively. So we are using * Approximate Nearest Neighbour *, unlike exact-match (KNN) that Flat-indexing uses.

--------------------------------------------------------------------------
### Multimodal Data => text, Image, Audios, Video

LLM that can handle multi data formats, is known as Multimodality.

--------------------------------------------------------------------------

In [12]:
from pinecone import ServerlessSpec

# ServerlessSpec is a class that is used to create a serverless specification for a Pinecone index. Server will b emanaged byth ecloud provider.

In [17]:
index_name = "agentic2-0"

In [18]:
pc.has_index(index_name)

False

In [19]:
# Creating an index

if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension = 768,
        metric = "cosine",
        spec = ServerlessSpec(cloud="aws", region="us-east-1")
    )

In [20]:
# Loading the index

index = pc.Index(index_name) 

In [23]:
from langchain_pinecone import PineconeVectorStore

In [26]:
#initializing the vector store

vector_store =PineconeVectorStore(index=index, embedding=embeddings)

In [30]:
# executing the vector store similarity search for our query

results = vector_store.similarity_search("What is the capital of France?")

results

# no putput because pur vector store is empty

[]

In [31]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [32]:
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [37]:
uuids = [str(uuid4()) for _ in range(len(documents))]

uuids

['cc37874f-1f4a-4cc7-99a5-c5ee6b29a2f9',
 '8fed28fc-1fb5-463f-b900-37c3bcd24ea5',
 '648f8705-11eb-4428-9968-9f50a8ebe7b1',
 '23c2823c-bee7-4e3d-980e-9bd0c9d75174',
 '1b867046-fa76-4b43-b7f6-55374aeefaec',
 '3b898657-4aab-42ad-b6e8-bc4f1d43772a',
 'bed7ffa5-9cc1-418d-a431-b6d1624afda7',
 '24bc4bc1-57f3-422d-858d-4bf2683c19f6',
 '8f5a186d-95e9-4623-8dff-3a377fc03ef6',
 '9afbba1a-6f14-447d-bc06-8d6e386ee49e']

In [38]:
vector_store.add_documents(documents=documents, ids=uuids)

['cc37874f-1f4a-4cc7-99a5-c5ee6b29a2f9',
 '8fed28fc-1fb5-463f-b900-37c3bcd24ea5',
 '648f8705-11eb-4428-9968-9f50a8ebe7b1',
 '23c2823c-bee7-4e3d-980e-9bd0c9d75174',
 '1b867046-fa76-4b43-b7f6-55374aeefaec',
 '3b898657-4aab-42ad-b6e8-bc4f1d43772a',
 'bed7ffa5-9cc1-418d-a431-b6d1624afda7',
 '24bc4bc1-57f3-422d-858d-4bf2683c19f6',
 '8f5a186d-95e9-4623-8dff-3a377fc03ef6',
 '9afbba1a-6f14-447d-bc06-8d6e386ee49e']

In [43]:
results = vector_store.similarity_search("What is Langchain?", k=6, filter={"source": {"$eq":"tweet"}})

In [44]:
results

[Document(id='648f8705-11eb-4428-9968-9f50a8ebe7b1', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='24bc4bc1-57f3-422d-858d-4bf2683c19f6', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='9afbba1a-6f14-447d-bc06-8d6e386ee49e', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='1b867046-fa76-4b43-b7f6-55374aeefaec', metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(id='cc37874f-1f4a-4cc7-99a5-c5ee6b29a2f9', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.')]

In [52]:
retriever = vector_store.as_retriever(search_type="similarity_score_threshold",
                                      search_kwargs={"k":5, "score_threshold":0.8})

In [53]:
retriever.invoke("langchain?")

[Document(id='648f8705-11eb-4428-9968-9f50a8ebe7b1', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='24bc4bc1-57f3-422d-858d-4bf2683c19f6', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

### Now building a RAG Pipeline

In [54]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)

In [55]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [56]:
import pprint
pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]

In [62]:
# Custom Prompt Template

from langchain_core.prompts import PromptTemplate

PromptTemplate(
    template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
    input_variables=['context', 'question']
)

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:")

In [63]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [65]:
prompt.invoke({"question":"What is a Langchain","context":"Langchain is very super framework for lLM."})

ChatPromptValue(messages=[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: What is a Langchain \nContext: Langchain is very super framework for lLM. \nAnswer:", additional_kwargs={}, response_metadata={})])

In [57]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [59]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [66]:
rag_chain.invoke("What is LLAMA Model?")

No relevant docs were retrieved using the relevance score threshold 0.8


"LLAMA (Large Language Model Meta AI) is a large language model developed by Meta AI.  It's designed for generating text, translating languages, and answering questions.  Its architecture allows for efficient training and deployment."

# ASSIGNMENT 

## First, complete the remaining part of this notebook(create a proper rag)

## Second, assisgnment is: take a multiple pdf with text,image,table
1. fetch the data from pdf
2. at lesat there should be 200 pages
3. if chunking(use the sementic chunking technique) required do chunking and then embedding
4. store it inside the vector database(use any of them 1. mongodb 2. astradb 3. opensearch 4.milvus) ## i have not discuss then you need to explore
5. create a index with all three index machnism(Flat, HNSW, IVF) ## i have not discuss then you need to explore
6. create a retriever pipeline
7. check the retriever time(which one is fastet)
8. print the accuray score of every similarity search
9. perform the reranking either using BM25 or MMR ## i have not discuss then you need to explore
10. then write a prompt template
11. generte a oputput through llm
12. render that output over the DOCx ## i have not discuss then you need to explore
as a additional taip: you can follow rag playlist from my youtube

after completing it keep it on your github and share that link on my  mail id:
snshrivas3365@gmail.com

and share the assignment in your community chat as well by tagging krish and sunny

deadline is: till firday 9PM
   