### Indexing API

In [None]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.pgvector import PGVector
from langchain_community.document_loaders import TextLoader

embeddings = OpenAIEmbeddings()

CONNECTION_STRING = "postgresql+psycopg2://admin:admin@127.0.0.1:5433/vectordb"
COLLECTION_NAME = "vectordb"


vectorstore = PGVector.from_documents(
    [],
    embeddings,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING
)

Lets add Documents and Embeddings!

In [None]:
loader = TextLoader("./bella_vista.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=150, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
print(len(docs))

In [None]:
from langchain.indexes import SQLRecordManager, index

In [None]:
# Update namespace to reflect PGVector
namespace = f"pgvector/{COLLECTION_NAME}"
record_manager = SQLRecordManager(
    namespace, db_url=CONNECTION_STRING
)

In [None]:
# Create schema for the record manager
record_manager.create_schema()

Update the documents to see some changes (2nd run)

In [None]:
loader = TextLoader("./bella_vista.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=150, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
for doc in docs:
    print(doc)

In [None]:
index(
    docs,
    record_manager,
    vectorstore,
    cleanup=None,
    source_id_key="source",
)

In [None]:
from langchain.schema import Document

docs[1].page_content = "updated"
del docs[6]
docs.append(Document(page_content="new content", metadata={"source": "important"}))

In [None]:
index(
    docs,
    record_manager,
    vectorstore,
    cleanup=None,
    source_id_key="source",
)

In [None]:
docs[1].page_content = "updated again"
del docs[2]
del docs[3]
del docs[4]
docs.append(Document(page_content="more new content", metadata={"source": "important"}))

In [None]:
index(
    docs,
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index(
    [],
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index([], record_manager, vectorstore, cleanup="full", source_id_key="source")