### Indexing API

In [None]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv('../application/.env'))

Lets add Documents and Embeddings!

In [None]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter

loader = DirectoryLoader('./restaurant', glob="**/*.txt", loader_cls=TextLoader)
data = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=150, chunk_overlap=20)
docs = text_splitter.split_documents(data)
print(len(docs))

In [None]:
import os

host = os.getenv("PG_VECTOR_HOST")
user = os.getenv("PG_VECTOR_USER")
password = os.getenv("PG_VECTOR_PASSWORD")
database = os.getenv("PGDATABASE")
COLLECTION_NAME = "langchain_collection"

CONNECTION_STRING = f"postgresql+psycopg2://{user}:{password}@{host}:5432/{database}"
CONNECTION_STRING

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector

embeddings = OpenAIEmbeddings()

vector_store = PGVector(
    embedding_function=embeddings,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [None]:
from langchain.indexes import SQLRecordManager, index

In [None]:
namespace = f"pgvector/{COLLECTION_NAME}"
record_manager = SQLRecordManager(
    namespace, db_url=CONNECTION_STRING
)

In [None]:
record_manager.create_schema()

Updat the documents to see changes (2nd run)

In [None]:
index(
    docs,
    record_manager,
    vector_store,
    cleanup=None,
    source_id_key="source",
)

In [None]:
from langchain.schema import Document

docs[1].page_content = "updated"
del docs[6]
docs.append(Document(page_content="new content", metadata={"source": "important"}))

In [None]:
index(
    docs,
    record_manager,
    vector_store,
    cleanup=None,
    source_id_key="source",
)

In [None]:
docs[1].page_content = "updated again"
del docs[2]
del docs[3]
del docs[4]
docs.append(Document(page_content="more new content", metadata={"source": "important"}))

In [None]:
index(
    docs,
    record_manager,
    vector_store,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index(
    [],
    record_manager,
    vector_store,
    cleanup="incremental",
    source_id_key="source",
)

In [None]:
index([], record_manager, vector_store, cleanup="full", source_id_key="source")