In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [6]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
embeddings.embed_query("Hello AI")

[-0.03338826447725296,
 0.03453978896141052,
 0.059474579989910126,
 0.05928613990545273,
 -0.06353534758090973,
 -0.06819580495357513,
 0.08823321759700775,
 0.03444082289934158,
 -0.03278519585728645,
 -0.015814965590834618,
 0.0209816787391901,
 -0.018340284004807472,
 -0.03983219712972641,
 -0.08047071844339371,
 -0.014469241723418236,
 0.03326484188437462,
 0.014259262941777706,
 -0.03404995799064636,
 -0.1429157704114914,
 -0.023083403706550598,
 -0.021380173042416573,
 0.002633568597957492,
 -0.047292713075876236,
 -0.010752708651125431,
 -0.0686679407954216,
 0.031125016510486603,
 0.07594595849514008,
 0.0011282487539574504,
 0.011631973087787628,
 -0.036039188504219055,
 0.04483756795525551,
 0.018390754237771034,
 0.12672804296016693,
 -0.001359762973152101,
 0.008206652477383614,
 0.06909966468811035,
 -0.08076358586549759,
 -0.05841311812400818,
 0.0537545382976532,
 0.026227526366710663,
 -0.006828584708273411,
 -0.05635837838053703,
 0.0032929701264947653,
 -0.0725018307

## Metric Evaluation

### 1. Cosine Similarity:

Cosine similarity is a metric used to measure the similarity between two non-zero vectors in an inner product space, by calculating the cosine of the angle between them. It essentially quantifies how closely the vectors point in the same direction, regardless of their magnitude. A value of 1 indicates perfect similarity (vectors are identical), 0 indicates no similarity (vectors are orthogonal), and -1 indicates opposite directions. 

In simpler terms: Imagine two arrows in a multi-dimensional space. Cosine similarity tells you how much those arrows are pointing in the same direction, without considering how long they are. 

### 2. Euclidean Distances

Euclidean distance is the straight-line distance between two points in Euclidean space, which can be calculated using the Pythagorean theorem. It's the most common way to measure distance and is also known as the Pythagorean distance. 



In [8]:
from sklearn.metrics.pairwise import cosine_similarity

In [9]:
documents = ["What is a capital of USA?",
             "Who is a president of USA?",
             "Who is the prime minister of India?"]

In [10]:
my_query = "Narendra Modi is the Prime Minister of India"

In [12]:
doc_embeddings = embeddings.embed_documents(documents)
doc_embeddings

[[0.11998696625232697,
  -0.021302683278918266,
  -0.04288090392947197,
  0.06645581871271133,
  -0.0643523707985878,
  -0.04424864798784256,
  0.02240850031375885,
  -0.04987308382987976,
  -0.02343761920928955,
  -0.03397199138998985,
  -0.014047965407371521,
  -0.06065930798649788,
  -0.003906782250851393,
  -0.017782114446163177,
  -0.04797101020812988,
  -0.06668157875537872,
  0.0041031623259186745,
  -0.013092738576233387,
  0.04439769685268402,
  0.022350646555423737,
  0.009459533728659153,
  -0.020564502105116844,
  -0.0003355864610057324,
  -0.0056858183816075325,
  0.05558688938617706,
  0.025123219937086105,
  -0.0028171888552606106,
  0.008758985437452793,
  0.0032552804332226515,
  -0.015963463112711906,
  0.014263669960200787,
  -0.11220843344926834,
  0.08968563377857208,
  -0.031083697453141212,
  -0.024223871529102325,
  0.006152125541120768,
  0.08058718591928482,
  0.018249982967972755,
  0.05568317696452141,
  0.01670263148844242,
  0.015895981341600418,
  0.00034

In [14]:
query_embeddings = embeddings.embed_query(my_query)
query_embeddings

[-0.024652237072587013,
 0.06448185443878174,
 -0.015243325382471085,
 -0.008853206411004066,
 0.0497870035469532,
 -0.04778725281357765,
 0.07992971688508987,
 -0.011070616543293,
 -0.026301443576812744,
 0.0025293477810919285,
 0.015217958949506283,
 -0.049890536814928055,
 -0.0014494945062324405,
 0.03467871621251106,
 0.048125896602869034,
 0.01783384010195732,
 -0.0016865321667864919,
 0.004727161023765802,
 -0.03536824882030487,
 -0.08200477808713913,
 0.05238308385014534,
 0.09946004301309586,
 0.015678854659199715,
 -0.018165597692131996,
 0.010221647098660469,
 -0.02443786710500717,
 -0.03731079399585724,
 -0.038322821259498596,
 -0.013971858657896519,
 -0.008656066842377186,
 0.04228417947888374,
 0.017439648509025574,
 -0.07628332078456879,
 -0.027769681066274643,
 -0.02359374426305294,
 0.01221583504229784,
 -0.09990189969539642,
 0.06312437355518341,
 0.09850537776947021,
 -0.06997863203287125,
 0.054955340921878815,
 -0.04872512072324753,
 0.004754524677991867,
 -0.067817

In [16]:
cosine_similarity([query_embeddings], doc_embeddings)

array([[0.03208762, 0.28085592, 0.73996878]])

In [17]:
from sklearn.metrics.pairwise import euclidean_distances

In [18]:
euclidean_distances([query_embeddings], doc_embeddings)

array([[1.39133918, 1.19928651, 0.72115357]])

| Metric            | Similarity Score Range | Behavior                              |
| ----------------- | ---------------------- | ------------------------------------- |
| Cosine Similarity | \[-1, 1]               | Focuses on angle only |
| L2 Distance       | \[0, ∞)                | Focuses on **magnitude + direction**  |


### FAISS (Facebook AI Similarity Search)

Faiss, short for Facebook AI Similarity Search, is an open-source library developed by Meta AI (formerly Facebook AI Research) for efficient similarity search and clustering of dense vectors. It allows users to build indices and perform searches on large datasets of vectors with high speed and memory efficiency. Faiss is particularly useful for applications involving large-scale machine learning and deep learning models, such as image retrieval, recommendation systems, and natural language processing. 

In [20]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [22]:
index = faiss.IndexFlatL2(384)


- IndexFlatL2: A FAISS index type that uses L2 (Euclidean) distance for similarity
- 384: The dimensionality of vectors (each vector has 384 dimensions)
- Why 384 dimensions?
This is from an embedding model (like all-MiniLM-L6-v2)
- Each document/text gets converted to a 384-dimensional vector
- The index stores these vectors for fast similarity search

In [23]:
index

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x16a0d8cf0> >

In [107]:
vector_store = FAISS(
    embedding_function = embeddings,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {},
)

'\n# FAISS Vector Store Initialization Explained\n\n## Overview\n\nFAISS (Facebook AI Similarity Search) is a library for efficient similarity search and clustering of dense vectors. This document explains how to initialize a FAISS vector store in LangChain.\n\n## Code Structure\n\n```python\nvector_store = FAISS(\n    embedding_function = embeddings,\n    index = index,\n    docstore = InMemoryDocstore(),\n    index_to_docstore_id = {},\n)\n```\n\n## Components Breakdown\n\n### 1. `embedding_function = embeddings`\n\n**What it is:**\n- **Embedding model** that converts text to vectors\n- Usually a HuggingFace or OpenAI embedding model\n\n**Example:**\n```python\nfrom langchain_huggingface import HuggingFaceEmbeddings\n\nembeddings = HuggingFaceEmbeddings(\n    model_name="sentence-transformers/all-MiniLM-L6-v2"\n)\n```\n\n**Purpose:**\n- Converts your documents/texts into 384-dimensional vectors\n- Used for similarity search\n\n### 2. `index = index`\n\n**What it is:**\n- **FAISS inde

##### FAISS Vector Store Initialization Explained

##### Overview

FAISS (Facebook AI Similarity Search) is a library for efficient similarity search and clustering of dense vectors. This document explains how to initialize a FAISS vector store in LangChain.

##### Code Structure

```python
vector_store = FAISS(
    embedding_function = embeddings,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {},
)
```

##### Components Breakdown

##### 1. `embedding_function = embeddings`

**What it is:**
- **Embedding model** that converts text to vectors
- Usually a HuggingFace or OpenAI embedding model

**Example:**
```python
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
```

**Purpose:**
- Converts your documents/texts into 384-dimensional vectors
- Used for similarity search

##### 2. `index = index`

**What it is:**
- **FAISS index object** that stores and searches vectors
- The actual vector database

**Example:**
```python
import faiss
index = faiss.IndexFlatL2(384)  # 384 dimensions
```

**Purpose:**
- Stores all your document vectors
- Performs fast similarity search
- Handles the mathematical operations

##### 3. `docstore = InMemoryDocstore()`

**What it is:**
- **Document storage** that keeps your original texts
- **In-memory** means stored in RAM (not on disk)

**Purpose:**
- Maps vector IDs back to original documents
- When you find similar vectors, you can retrieve the actual text
- Temporary storage (lost when program ends)

**Alternative:**
```python
# For persistent storage
from langchain_community.docstore.document import Document
docstore = PersistentDocstore()  # Saves to disk
```

##### 4. `index_to_docstore_id = {}`

**What it is:**
- **Mapping dictionary** between vector indices and document IDs
- **Empty initially** - gets populated as you add documents

**How it works:**
```python
# Initially empty
index_to_docstore_id = {}

# After adding documents, it becomes:
index_to_docstore_id = {
    0: "doc_1",    # Vector at index 0 → Document "doc_1"
    1: "doc_2",    # Vector at index 1 → Document "doc_2"
    2: "doc_3",    # Vector at index 2 → Document "doc_3"
}
```

**Purpose:**
- Links FAISS vector positions to actual documents
- Enables retrieval of original text after similarity search

##### Complete Workflow Example

```python
# 1. Setup components
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
index = faiss.IndexFlatL2(384)
docstore = InMemoryDocstore()
index_to_docstore_id = {}

# 2. Create vector store
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=docstore,
    index_to_docstore_id=index_to_docstore_id
)

# 3. Add documents
texts = ["AI is amazing", "Machine learning is powerful", "Deep learning rocks"]
vector_store.add_texts(texts)

# 4. Search
results = vector_store.similarity_search("artificial intelligence", k=2)
```

###### What Happens When You Add Documents

```python
# Before adding documents
index_to_docstore_id = {}

# After adding 3 documents
index_to_docstore_id = {
    0: "doc_0",  # First document
    1: "doc_1",  # Second document  
    2: "doc_2"   # Third document
}

# FAISS index now contains 3 vectors
# Docstore contains 3 original texts
```

##### Why This Architecture?

##### Separation of Concerns:
- **FAISS index**: Handles vector math and similarity search
- **Docstore**: Stores original documents
- **Mapping**: Links vectors to documents

##### Benefits:
- **Fast search**: FAISS optimizes vector operations
- **Memory efficient**: Only store vectors in FAISS
- **Flexible**: Can use different storage backends

##### Alternative (Simpler) Creation

```python
# LangChain provides a simpler way
vector_store = FAISS.from_texts(
    texts=["doc1", "doc2", "doc3"],
    embedding=embeddings
)
# This does all the setup automatically!
```

##### Using the Vector Store

##### Basic Search
```python
# Simple similarity search
results = vector_store.similarity_search("query", k=5)
```

##### As Retriever
```python
# Convert to retriever for use in chains
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

# Use in RAG pipeline
chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | llm
)
```

##### Save and Load
```python
# Save vector store
vector_store.save_local("my_vectorstore")

# Load vector store
loaded_store = FAISS.load_local("my_vectorstore", embeddings)
```

##### Index Types Comparison

| Index Type | Speed | Accuracy | Memory | Best For |
|------------|-------|----------|---------|----------|
| **Flat** | Slow | 100% | Low | Small datasets |
| **HNSW** | Fast | 95-99% | Medium | Large datasets |
| **IVF** | Medium | 90-95% | Low | Medium datasets |

##### Summary

This initialization creates a **complete vector database** with:
- **Vector storage** (FAISS index)
- **Document storage** (InMemoryDocstore)
- **Mapping system** (index_to_docstore_id)
- **Embedding function** (for text→vector conversion)

It's the foundation for building RAG applications with similarity search!

In [29]:
vector_store.add_texts(["AI is Future", "AI is powerful", "Dogs are cute"])

# wrt each sentence, we get an UUID

['6866f0f9-5961-41ef-9fa5-0ee427f203a8',
 'ba936691-62f2-4d96-87ec-8508fb1ef155',
 'd118dfa6-7af8-46ed-a0c3-5c00b018b6bc']

In [28]:
vector_store.index_to_docstore_id

{0: '267afab9-c241-4f70-ac2e-f62a4980795d',
 1: '4f000165-25ad-405a-a9da-737895ba89e6',
 2: 'd4fc0810-cc44-49b6-9d8c-63069c170871'}

In [36]:
results = vector_store.similarity_search("Tell me about AI", k = 3)

In [37]:
results

[Document(id='4f000165-25ad-405a-a9da-737895ba89e6', metadata={}, page_content='AI is powerful'),
 Document(id='ba936691-62f2-4d96-87ec-8508fb1ef155', metadata={}, page_content='AI is powerful'),
 Document(id='267afab9-c241-4f70-ac2e-f62a4980795d', metadata={}, page_content='AI is Future')]

# RAG Pipeline Diagram

## What is a Vector Database?

We know about SQL, NoSQL, now Vector DB is a set of numbers stored in array.

We can use Similarity search and implement in our RAG pipeline.

```
Data -> chunking -> Embedding -> Vector DB <--Similarity Search--- user
                                    |                               ^
                                    |                               |
                                    |                               |
                                Retrieval                       Output
                                    |                               |
                                    |                               |
                                    v                               |
                                Relevant Data -------------------> LLM
```

## Creating Vectore Database

own server: 
 1. In Memory Database
 2. On Disk Database 

cloud server: \
 3. Cloud Database (AWS, CLoud GCP, Pinecone, Qdrant)

### AGENDA:

In memory and On Disk Vectore Store

Few important features in Vector databases:
1. Index - way of referring the data
2. Similarity Search

```
PDF --extract--> Pages --chunking--> Document Objects (1. Page Content, 2. Metadata) 
                                            |
                                            |
                                        Indexing (HNSW / IVF / Flat)
                                            |
                                            |
                                           \ /
                                    --------------------
                                    |                   |
                                    |                   | <--------- Similarity Search ----------- Query
                                    |___________________|       (Cosine Similarity, Dot product, 
                                                                    L2, L1(Manhattan), Jacquered)

```

1. **FLAT INDEXING (IndexFlatL2)**: We navigate each and every indexes 

How it works:

- Exhaustive search: Compares query vector with ALL stored vectors
- 100% accuracy: Always finds the exact nearest neighbors
- Slow: O(n) complexity - gets slower as you add more vectors
- Use case: Small datasets (< 1M vectors)

2. **HNSW (Hierarchical Navigable Small World)**: 

How it works:

- Graph-based: Creates a network of connected vectors
- Multi-layer structure: Fast navigation through layers
- High speed: O(log n) complexity
- Good accuracy: ~95-99% accuracy
- Use case: Large datasets, real-time search

3. **IVF (Inverted File Index)**: 

How it works:

- Clustering: Groups similar vectors into clusters
- Coarse search: First finds relevant clusters
- Fine search: Searches within those clusters only
- Balanced: Good speed/accuracy trade-off
- Use case: Medium to large datasets


## Comparison Table:

| Index Type | Speed | Accuracy | Memory | Best For |
|------------|-------|----------|---------|----------|
| **Flat** | Slow | 100% | Low | Small datasets |
| **HNSW** | Fast | 95-99% | Medium | Large datasets |
| **IVF** | Medium | 90-95% | Low | Medium datasets |

\

| Dataset Size              | Recommended Index                 |
| ------------------------- | --------------------------------- |
| UPTO 1L                     | `IndexFlatL2` or `IndexFlatIP`    |
| UPTO 1M                  | `IndexIVFFlat` or `IndexHNSWFlat` |
| > 1M                      | `IndexIVFPQ` or `IndexHNSWFlat`   |


In [45]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [48]:
index = faiss.IndexFlatIP(384) # FlatIP for Cosine Similarity

vector_store = FAISS(
    embedding_function = embeddings,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {},
)

In [49]:
vector_store.add_documents(documents=documents)

['1ece7626-146b-49d6-a51c-591b8321092e',
 '87460740-66b5-4c5b-bde3-f655737139f9',
 '78256921-3c53-406e-a718-a981b5742d93',
 'fe2cc0d0-1360-46f0-bc09-86d40a01fd9c',
 '50a11abb-f039-4640-99a1-b8a9b64bff59',
 '9a578c07-1853-4f10-b7c6-1b82df9d76ba',
 '60745d82-bac1-4e43-bcba-2c8937b3523b',
 'c5eb4f62-9472-4d85-ab84-0fdfd25d89c6',
 'c0129860-3cea-4d13-973a-18b7d07746f1',
 'db8ce266-7a33-45f4-ab5f-33fb1057faa5']

In [52]:
vector_store.similarity_search("LangChain provides abstractions to make working with LLMs easy", k=2)

[Document(id='78256921-3c53-406e-a718-a981b5742d93', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='c5eb4f62-9472-4d85-ab84-0fdfd25d89c6', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [62]:
result = vector_store.similarity_search("Langchain provides abstractions to make working with LLMs easier", 
                                k=5, #hyperparameter
                                score_threshold=0.8, #hyperparameter
                                filter={"source": {"$eq":"tweet"}}
                                )

In [63]:
result[0].metadata

{'source': 'tweet'}

In [66]:
result[0].page_content

'Building an exciting new project with LangChain - come check it out!'

In [None]:
retriever=vector_store.as_retriever(search_kwargs={"k":5})

"""

1. as_retriever() Method
What is a Retriever?
A retriever is a LangChain component that:
- Fetches relevant documents from a data source
- Returns documents in a standardized format
- Can be chained with other LangChain components (LLMs, prompts, etc.)

This converts your vector store into a retriever object that:
- Can be used in LangChain chains
- Has standardized methods like get_relevant_documents()
- Integrates seamlessly with RAG pipelines


2. search_kwargs Parameter
What are kwargs?
- kwargs = "keyword arguments"
- Dictionary of parameters passed to the search function
- Configurable options for how the retriever behaves

"""


In [70]:
retriever.invoke("Langchain provides abstractions to make working with LLMs easier")

[Document(id='78256921-3c53-406e-a718-a981b5742d93', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='c5eb4f62-9472-4d85-ab84-0fdfd25d89c6', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='db8ce266-7a33-45f4-ab5f-33fb1057faa5', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='9a578c07-1853-4f10-b7c6-1b82df9d76ba', metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(id='fe2cc0d0-1360-46f0-bc09-86d40a01fd9c', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

inmemory(server) \
ondisk(server) \
cloud(yet to discuss)

In [72]:
vector_store.save_local("today's class faiss index") #on disk 

In [75]:
new_vector_store = FAISS.load_local(
    "today's class faiss index", embeddings, allow_dangerous_deserialization=True
)

In [77]:
new_vector_store.similarity_search("Langchain")

[Document(id='78256921-3c53-406e-a718-a981b5742d93', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='c5eb4f62-9472-4d85-ab84-0fdfd25d89c6', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='50a11abb-f039-4640-99a1-b8a9b64bff59', metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(id='60745d82-bac1-4e43-bcba-2c8937b3523b', metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.')]

# Creating the RAG Pipeline

In [82]:
# Loading the PDF document

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/Users/sanyuktatuti/Documents/AGENTIC_AI_Krish_Naik/2-Langchain/data/llama2.pdf")

pages =loader.load()

In [83]:
len(pages)

77

In [81]:
# Chunking the PDF document

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=50)

In [85]:
split_docs = text_splitter.split_documents(pages)

split_docs

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '/Users/sanyuktatuti/Documents/AGENTIC_AI_Krish_Naik/2-Langchain/data/llama2.pdf', 'total_pages': 77, 'page': 0, 'page_label': '1'}, page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†'),
 Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '/Users/sanyuktatuti/D

In [86]:
len(split_docs)

4291

In [88]:
# Now Storing the data in the Vector Database

# starting with indexing

index =faiss.IndexFlatIP(4291)

vectore_store = FAISS(
    embedding_function = embeddings,
    index =index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id ={},
)


In [91]:
vector_store.add_documents(split_docs)

['ee9736f5-2374-45b5-b908-4bb1abaf2b95',
 'a3cb32f0-7d09-44af-a54b-596add8e7a0e',
 'ae3483fd-4c59-4d79-a4dc-f6d505ff9f64',
 'ce6a86bf-304e-48f8-af13-ea34116e0d91',
 '0f626b12-a5fc-4160-9e37-65d4449add77',
 '94abdf97-c9a3-42b1-ae54-e2c4876c9ab5',
 '1ae0d330-9056-4a85-a652-2002dd506598',
 '70697082-48bb-47e7-befb-075d4e489e00',
 '4071fb96-cf82-4354-a883-1c4e985810aa',
 '8b3ca5df-e02f-41b0-bcba-39f32c5a535e',
 'd5d39ba5-a2ec-460f-ad41-6d3c9c2074b7',
 'adb58cc8-4f20-4c2e-8f93-12e19c08fed7',
 'fb024f4c-8526-4d8c-97f5-208088dc5682',
 'e7c98f96-6bf2-415a-8a50-34b5c128c1a1',
 'e1b34d10-70be-4f0f-9529-40fb74a5766b',
 '19bd2002-98d4-489d-9f7c-59fe6af10991',
 '833868db-62a5-46fb-bfdf-f6069164d576',
 '2d25034a-0aa6-4041-9f7d-4213ccd09113',
 'e3378b3c-32e0-4af1-b12a-b4b57e62be44',
 '019a4d8f-56e8-4e2a-9dea-f2d3f76c2a99',
 '9fa799f5-8a9e-49af-8f1a-006a56d5db44',
 'f200796a-58ad-4495-8dc5-37349fa58947',
 '08449cba-2f4c-4343-be85-921c91cd12a5',
 '5162d948-25e2-4b51-b877-560b054abd98',
 '8e852af2-8584-

In [93]:
retriever = vector_store.as_retriever(
    search_kwargs={"k":5}
)

In [94]:
retriever.invoke("What is a LLAMA Model")

[Document(id='a02a4b37-3003-4e38-8427-51dc495a2ba4', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '/Users/sanyuktatuti/Documents/AGENTIC_AI_Krish_Naik/2-Langchain/data/llama2.pdf', 'total_pages': 77, 'page': 1, 'page_label': '2'}, page_content='2.3 Llama 2Pretrained Model Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .'),
 Document(id='21443ae8-4a13-4f61-9812-57413f5c9955', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea versio

--------------------------------------------------------------------------------------------------------------
### RAG PIPELINE
```
PDF ----> Load ----> Pages ----> Chunking ----> Embeddings 
                                                    |
                                                    |
                                                    |
                                                   \ /
                                                ---------          {2. Retriever}
                                                |       |<- as_retriever <- embeddings <- Query
                         {1. DATA INGESTION}    | FAISS |
                                                |       | ------> k=N ------> LLM ------> Output
                                                ---------           {3. Generation}
                                                {Document Object:
                                                page_content 
                                                and metadata} can be filtered

```
1. Why use Vector database?

- To create a retriever pipeline

-------------------------------------------------------------------------------------------------------

In [96]:
# LLM for Generation

from langchain_openai import ChatOpenAI
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [101]:
# need to chain: context.prompt.model.parser

# context from retriever
# prompt from hub
# model from openai
# parser from langchain

In [99]:
# using the readily available prompt

from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [100]:
import pprint 

pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [108]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# RunnablePassthrough is a utility component in LangChain that simply passes data through without any modification. It's like a "pass-through pipe" in your chain.

In [104]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [105]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [106]:
rag_chain.invoke("what is the llama2?")

'Llama 2 is a language model developed by Meta, designed for various natural language processing tasks. It is an improvement over its predecessor, Llama 1, offering enhanced performance and capabilities. Specific details about its features or applications are not provided in the context.'