### Add Documents the standard way

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.pgvector import PGVector
from langchain_community.document_loaders import DirectoryLoader
import os
from dotenv import load_dotenv

app_dir = os.path.join(os.getcwd(), "app")
load_dotenv(os.path.join(app_dir, ".env"))

In [None]:
embeddings = OpenAIEmbeddings()

CONNECTION_STRING = "postgresql+psycopg2://admin:admin@127.0.0.1:5432/vectordb"
COLLECTION_NAME = "vectordb"

loader = DirectoryLoader("./data", glob="**/*.txt")
docs = loader.load()
print(f"{len(docs)} documents loaded!")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(docs)
print(f"{len(chunks)} chunks from {len(docs)} docs created!")

In [None]:
vectorstore = PGVector(
    connection_string=CONNECTION_STRING,
    embedding_function=embeddings,
    collection_name=COLLECTION_NAME,
)

In [None]:
vectorstore.add_documents(chunks)

In [None]:
import psycopg2

TABLE_NAME = "langchain_pg_embedding"
CONN_STRING = "dbname='vectordb' user='admin' host='127.0.0.1' password='admin'"

conn = psycopg2.connect(CONN_STRING)
cur = conn.cursor()

query = f"SELECT COUNT(*) FROM {TABLE_NAME};"

cur.execute(query)
row_count = cur.fetchone()[0]

print(f"Total rows in '{TABLE_NAME}': {row_count}")

cur.close()
conn.close()

In [None]:
delete_query = f"DELETE FROM {TABLE_NAME};"

conn = psycopg2.connect(CONN_STRING)
cur = conn.cursor()
cur.execute(delete_query)
conn.commit()

print(f"All rows from '{TABLE_NAME}' have been deleted.")

cur.close()
conn.close()

### Indexing API

In [None]:
from langchain.indexes import SQLRecordManager, index

In [None]:
namespace = f"pgvector/{COLLECTION_NAME}"
record_manager = SQLRecordManager(namespace, db_url=CONNECTION_STRING)

In [None]:
record_manager.create_schema()

Update the documents to see some changes (2nd run)

In [None]:
index(
    chunks,
    record_manager,
    vectorstore,
    cleanup=None,
    source_id_key="source",
)

In [None]:
from langchain.schema import Document

chunks[1].page_content = "updated"
del chunks[6]
chunks.append(Document(page_content="new content", metadata={"source": "important"}))

In [None]:
index(
    chunks,
    record_manager,
    vectorstore,
    cleanup=None,
    source_id_key="source",
)

In [None]:
chunks[1].page_content = "updated again"
del chunks[2]
del chunks[3]
del chunks[4]
chunks.append(Document(page_content="more new content", metadata={"source": "important"}))

In [None]:
index(
    chunks,
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index(
    [],
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index([], record_manager, vectorstore, cleanup="full", source_id_key="source")