### Add Documents the standard way

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.pgvector import PGVector
from langchain_community.document_loaders import DirectoryLoader
import os
from dotenv import load_dotenv

app_dir = os.path.join(os.getcwd(), 'app')
load_dotenv(os.path.join(app_dir, '.env'))

In [None]:
embeddings = OpenAIEmbeddings()

CONNECTION_STRING = "postgresql+psycopg2://admin:admin@127.0.0.1:5432/vectordb"
COLLECTION_NAME = "vectordb"

loader = DirectoryLoader('./data', glob="**/*.txt")
docs = loader.load()
print(f"{len(docs)} documents loaded!")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(docs)
print(f"{len(chunks)} chunks from {len(docs)} docs created!")

In [None]:
vectorstore = PGVector(
    connection_string=CONNECTION_STRING,
    embedding_function=embeddings,
    collection_name=COLLECTION_NAME,
)

In [35]:
vectorstore.add_documents(chunks)

['ff632663-e47a-11ee-bc36-e848b8c82000',
 'ff632664-e47a-11ee-a5a7-e848b8c82000',
 'ff632665-e47a-11ee-a294-e848b8c82000',
 'ff632666-e47a-11ee-b904-e848b8c82000',
 'ff632667-e47a-11ee-80a4-e848b8c82000',
 'ff632668-e47a-11ee-bc6e-e848b8c82000',
 'ff632669-e47a-11ee-9117-e848b8c82000',
 'ff63266a-e47a-11ee-8da0-e848b8c82000',
 'ff63266b-e47a-11ee-90e1-e848b8c82000',
 'ff63266c-e47a-11ee-8348-e848b8c82000',
 'ff63266d-e47a-11ee-a767-e848b8c82000',
 'ff63266e-e47a-11ee-abb3-e848b8c82000',
 'ff63266f-e47a-11ee-9e1f-e848b8c82000',
 'ff632670-e47a-11ee-bcf6-e848b8c82000',
 'ff632671-e47a-11ee-a409-e848b8c82000',
 'ff632672-e47a-11ee-ae88-e848b8c82000',
 'ff632673-e47a-11ee-bfaf-e848b8c82000',
 'ff632674-e47a-11ee-b6d5-e848b8c82000',
 'ff632675-e47a-11ee-b513-e848b8c82000',
 'ff632676-e47a-11ee-95d0-e848b8c82000',
 'ff632677-e47a-11ee-8f99-e848b8c82000',
 'ff632678-e47a-11ee-8e4d-e848b8c82000',
 'ff632679-e47a-11ee-bb17-e848b8c82000',
 'ff63267a-e47a-11ee-809b-e848b8c82000',
 'ff63267b-e47a-

In [40]:
import psycopg2

TABLE_NAME = "langchain_pg_embedding"
CONNECTION_STRING = "dbname='vectordb' user='admin' host='127.0.0.1' password='admin'"

conn = psycopg2.connect(CONNECTION_STRING)
cur = conn.cursor()

query = f"SELECT COUNT(*) FROM {TABLE_NAME};"

cur.execute(query)
row_count = cur.fetchone()[0]

print(f"Total rows in '{TABLE_NAME}': {row_count}")

cur.close()
conn.close()

Total rows in 'langchain_pg_embedding': 0


In [39]:
delete_query = f"DELETE FROM {TABLE_NAME};"

conn = psycopg2.connect(CONNECTION_STRING)
cur = conn.cursor()
cur.execute(delete_query)
conn.commit()

print(f"All rows from '{TABLE_NAME}' have been deleted.")

cur.close()
conn.close()

All rows from 'langchain_pg_embedding' have been deleted.


### Indexing API

In [None]:
from langchain.indexes import SQLRecordManager, index

In [None]:
namespace = f"pgvector/{COLLECTION_NAME}"
record_manager = SQLRecordManager(
    namespace, db_url=CONNECTION_STRING
)

In [None]:
record_manager.create_schema()

Update the documents to see some changes (2nd run)

In [None]:
index(
    chunks,
    record_manager,
    vectorstore,
    cleanup=None,
    source_id_key="source",
)

In [None]:
from langchain.schema import Document

docs[1].page_content = "updated"
del docs[6]
docs.append(Document(page_content="new content", metadata={"source": "important"}))

In [None]:
index(
    docs,
    record_manager,
    vectorstore,
    cleanup=None,
    source_id_key="source",
)

In [None]:
docs[1].page_content = "updated again"
del docs[2]
del docs[3]
del docs[4]
docs.append(Document(page_content="more new content", metadata={"source": "important"}))

In [None]:
index(
    docs,
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index(
    [],
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index([], record_manager, vectorstore, cleanup="full", source_id_key="source")