In [15]:
from dotenv import load_dotenv

load_dotenv("../.env")

True

## Documents and Document Loaders

In [None]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

## Loading documents

In [11]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "C:/Users/tiago/OneDrive/Documentos/Github/ai-agents/Tutorials/data/readme.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

2


In [12]:
print(f"{docs[0].page_content[:200]}\n")
print(docs[0].metadata)

readme.md 2024-04-04
1 / 2
Simple Flask CRUD API
This project is a simple CRUD (Create, Read, Update, Delete) API built with Flask. It demonstrates basic
operations to manage items in an in-memory lis

{'producer': 'Skia/PDF m80', 'creator': 'Chromium', 'creationdate': '2024-04-04T07:49:17+00:00', 'moddate': '2024-04-04T07:49:17+00:00', 'source': 'C:/Users/tiago/OneDrive/Documentos/Github/ai-agents/Tutorials/data/readme.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}


## Splitting

In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)

all_splits = text_splitter.split_documents(docs)

len(all_splits)

3

## Embeddings

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07")

vector = embeddings.embed_query("hello, world!")

In [17]:
vector_1 = embeddings.embed_query(all_splits[0].page_content)
vector_2 = embeddings.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
print(f"Generated vectors of length {len(vector_1)}\n")
print(vector_1[:10])

Generated vectors of length 3072

[-0.006182866171002388, 0.00182988157030195, 0.025529654696583748, -0.0706188976764679, 0.0103186946362257, 0.008643293753266335, 0.00036104366881772876, -0.008506260812282562, -0.0002575563848949969, 0.01603693701326847]


## Vector stores

É possível armazenar em memória ou bancos de dados bancos de dados vetorizados.

In [18]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [19]:
ids = vector_store.add_documents(documents=all_splits)

## Usage

Retornando documentos baseado na similaridade com uma query de texto.

In [20]:
results = vector_store.similarity_search(
    "How to retrieve an item?"
)

print(results[0])

page_content='readme.md 2024-04-04
2 / 2
Retrieving All Items
To retrieve all items, send a GET request to /items:
curl http://127.0.0.1:5000/items 
Retrieving a Single Item
To retrieve a single item by its ID, send a GET request to /items/<item_id>:
curl http://127.0.0.1:5000/items/0 
Updating an Item
To update an existing item by its ID, send a PUT request to /items/<item_id> with a JSON body:
curl -X PUT http://127.0.0.1:5000/items/0 -H "Content-Type: 
application/json" -d "{\"name\":\"Updated Item\",\"description\":\"Updated 
description.\"}"
Deleting an Item
To delete an item by its ID, send a DELETE request to /items/<item_id>:
curl -X DELETE http://127.0.0.1:5000/items/0 
Contributing
Contributions to this project are welcome. Please fork the repository and submit a pull request with your
improvements.
License
This project is licensed under the MIT License - see the LICENSE file for details.' metadata={'producer': 'Skia/PDF m80', 'creator': 'Chromium', 'creationdate': '2024-04-0

Query assincrona

In [None]:
results = await vector_store.asimilarity_search("How to retrieve an item?")

print(results[0])

page_content='readme.md 2024-04-04
2 / 2
Retrieving All Items
To retrieve all items, send a GET request to /items:
curl http://127.0.0.1:5000/items 
Retrieving a Single Item
To retrieve a single item by its ID, send a GET request to /items/<item_id>:
curl http://127.0.0.1:5000/items/0 
Updating an Item
To update an existing item by its ID, send a PUT request to /items/<item_id> with a JSON body:
curl -X PUT http://127.0.0.1:5000/items/0 -H "Content-Type: 
application/json" -d "{\"name\":\"Updated Item\",\"description\":\"Updated 
description.\"}"
Deleting an Item
To delete an item by its ID, send a DELETE request to /items/<item_id>:
curl -X DELETE http://127.0.0.1:5000/items/0 
Contributing
Contributions to this project are welcome. Please fork the repository and submit a pull request with your
improvements.
License
This project is licensed under the MIT License - see the LICENSE file for details.' metadata={'producer': 'Skia/PDF m80', 'creator': 'Chromium', 'creationdate': '2024-04-0

Retornando a pontuação do retorno

In [None]:
results = vector_store.similarity_search_with_score("How to retrieve an item?")

doc, score = results[0]

print(f"Score: {score}\n")
print(doc)

Score: 0.7464487078053971

page_content='readme.md 2024-04-04
2 / 2
Retrieving All Items
To retrieve all items, send a GET request to /items:
curl http://127.0.0.1:5000/items 
Retrieving a Single Item
To retrieve a single item by its ID, send a GET request to /items/<item_id>:
curl http://127.0.0.1:5000/items/0 
Updating an Item
To update an existing item by its ID, send a PUT request to /items/<item_id> with a JSON body:
curl -X PUT http://127.0.0.1:5000/items/0 -H "Content-Type: 
application/json" -d "{\"name\":\"Updated Item\",\"description\":\"Updated 
description.\"}"
Deleting an Item
To delete an item by its ID, send a DELETE request to /items/<item_id>:
curl -X DELETE http://127.0.0.1:5000/items/0 
Contributing
Contributions to this project are welcome. Please fork the repository and submit a pull request with your
improvements.
License
This project is licensed under the MIT License - see the LICENSE file for details.' metadata={'producer': 'Skia/PDF m80', 'creator': 'Chromium',

Retornando documentos com base na similaridade com uma consulta incorporada

In [24]:
embedding = embeddings.embed_query("How to retrieve an item?")

results = vector_store.similarity_search_by_vector(embedding)

print(results[0])

page_content='readme.md 2024-04-04
2 / 2
Retrieving All Items
To retrieve all items, send a GET request to /items:
curl http://127.0.0.1:5000/items 
Retrieving a Single Item
To retrieve a single item by its ID, send a GET request to /items/<item_id>:
curl http://127.0.0.1:5000/items/0 
Updating an Item
To update an existing item by its ID, send a PUT request to /items/<item_id> with a JSON body:
curl -X PUT http://127.0.0.1:5000/items/0 -H "Content-Type: 
application/json" -d "{\"name\":\"Updated Item\",\"description\":\"Updated 
description.\"}"
Deleting an Item
To delete an item by its ID, send a DELETE request to /items/<item_id>:
curl -X DELETE http://127.0.0.1:5000/items/0 
Contributing
Contributions to this project are welcome. Please fork the repository and submit a pull request with your
improvements.
License
This project is licensed under the MIT License - see the LICENSE file for details.' metadata={'producer': 'Skia/PDF m80', 'creator': 'Chromium', 'creationdate': '2024-04-0

## Retrievers

In [25]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    return vector_store.similarity_search(query, k=1)


retriever.batch(
    [
        "How to retrieve an item?",
        "How to send an item?",
    ],
)

[[Document(id='4d58c374-2c10-406e-9f58-e96d2526fec0', metadata={'producer': 'Skia/PDF m80', 'creator': 'Chromium', 'creationdate': '2024-04-04T07:49:17+00:00', 'moddate': '2024-04-04T07:49:17+00:00', 'source': 'C:/Users/tiago/OneDrive/Documentos/Github/ai-agents/Tutorials/data/readme.pdf', 'total_pages': 2, 'page': 1, 'page_label': '2', 'start_index': 0}, page_content='readme.md 2024-04-04\n2 / 2\nRetrieving All Items\nTo retrieve all items, send a GET request to /items:\ncurl http://127.0.0.1:5000/items \nRetrieving a Single Item\nTo retrieve a single item by its ID, send a GET request to /items/<item_id>:\ncurl http://127.0.0.1:5000/items/0 \nUpdating an Item\nTo update an existing item by its ID, send a PUT request to /items/<item_id> with a JSON body:\ncurl -X PUT http://127.0.0.1:5000/items/0 -H "Content-Type: \napplication/json" -d "{\\"name\\":\\"Updated Item\\",\\"description\\":\\"Updated \ndescription.\\"}"\nDeleting an Item\nTo delete an item by its ID, send a DELETE request