In [4]:
# indexing pipeline

# indexing
# 1. loader
from langchain_community.document_loaders import DirectoryLoader, TextLoader
# 2. splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
# 3. embeddings
from langchain_google_vertexai.embeddings import VertexAIEmbeddings
# 4. vectorstore
from langchain_chroma import Chroma

In [2]:
directory_loader = DirectoryLoader(
    path="../data/updates/IT_Helpdesk_KB_Articles",
    glob="*.txt",
    loader_cls=TextLoader,
)
documents = directory_loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)
chunks = text_splitter.split_documents(documents)
embedding = VertexAIEmbeddings(
    model_name="text-embedding-005")
vector_store = Chroma(
    collection_name="kb_collection",
    embedding_function=embedding,
    persist_directory="../vectordb/kb_collection_db_s1",
)
vector_store.add_documents(documents=chunks)

  embedding = VertexAIEmbeddings(
  embedding = VertexAIEmbeddings(


['e428155a-9188-4765-a22b-81493eef5290',
 'af513010-2ecd-4b8b-88d5-cf3c445bc8b0',
 '4c0230d8-06eb-4102-87f2-8ec72adb1326',
 '4c4d5c00-d1c5-4140-87ff-3a6d78229e0e',
 'd708bd37-4abe-4a63-8bbb-68c9acf4d1ce',
 '1a559e7a-934f-4002-91ea-3200d4a7d6bb',
 '7d5efdb3-e7bf-405b-8d30-93f6f036b83b',
 'c18c6606-bbdc-44c0-99ca-b4fa22e1b486',
 '1278da6d-68c9-400f-b4e1-96eed57abdcf',
 '614a032d-f259-49a6-9c53-68bb431a99c6',
 '9ef6a38d-6f5a-4241-b8de-076452355b5d',
 '34ff3023-83e4-463c-970a-2d338ab4f6c3',
 '3898e6f6-50b4-448b-86ac-c41ff37cc54d',
 'aa8e848b-a759-4296-9298-05f7a8416675',
 '27ae59eb-a5f8-4b19-9451-bfafd4bd5651',
 '377e2a5c-a490-4658-b721-c72e89d11645',
 '41918d4e-9b02-45b9-9dd1-ad66469edfce',
 '5400440c-b87d-4642-ac73-b71ff51db209',
 '10a16523-70d8-4909-9650-35bb0f0bd2dd',
 '13d3ad8a-f3fc-47c5-8075-e86e15e69e63',
 'b855e827-b177-48b7-ba4d-370bfe84ec2a',
 'd75e92fa-5f66-4eae-af97-d8a3e8df0302',
 '842a475e-b662-4aa3-ae24-ec894bf4f4d4',
 '59dff732-10ef-4167-a8cf-1167acf0816b',
 '73b0c90a-2721-

In [4]:
retriever = vector_store.as_retriever()
results = retriever.invoke("How do I reset my corporate password?")
for result in results:
    print(result.page_content)
    print("\n metadata \n")
    print(result.metadata)
    print("\n")


Resolution:
1. Go to the company password reset portal.
2. Enter your username or employee ID.

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles\\01_Password_Reset_Guide.txt'}


Knowledge Base Article
Title: How to Reset Your Password

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles\\01_Password_Reset_Guide.txt'}


5. Log in again with the new password.

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles\\01_Password_Reset_Guide.txt'}


3. Verify identity using OTP or security questions.
4. Set a new strong password.

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles\\01_Password_Reset_Guide.txt'}




In [2]:
# Documents have changed
directory_loader = DirectoryLoader(
    path="../data/updates/IT_Helpdesk_KB_Articles_v2",
    glob="*.txt",
    loader_cls=TextLoader,
)
documents = directory_loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)
chunks = text_splitter.split_documents(documents)
embedding = VertexAIEmbeddings(
    model_name="text-embedding-005")
vector_store = Chroma(
    collection_name="kb_collection",
    embedding_function=embedding,
    persist_directory="../vectordb/kb_collection_db_s1",
)
vector_store.add_documents(documents=chunks)

  embedding = VertexAIEmbeddings(
  embedding = VertexAIEmbeddings(


['e24247bb-489c-4a35-8ea7-75580cf5c426',
 '7748f893-e3e0-4317-a224-1248ebbdb6ad',
 '4b28588c-59ae-475d-aa16-3454107c39c0',
 'c116fe13-ff3c-48dd-9e17-1b382439ee7a',
 'f9a62f7d-b4ce-4a89-ab42-22deac63b831',
 'e5c20466-d68a-486c-92da-84e673b4a181',
 '510e1d06-5055-4fe7-a041-39c1d242bece',
 'dd0c69f7-fdcf-431e-b1fc-d69aaba45c27',
 '5aa5553a-0bd2-4c0e-a20c-da4b39caf715',
 'abe78a26-291d-46ae-9e9b-cfe4035c6891',
 'b2456685-8d8c-4671-93e2-312ea220bbc2',
 'd2c46ad7-f284-444d-a175-1cb361512bdd',
 'b3fa733c-12e6-4213-a46d-d95993b266a5',
 '0724b256-cc6a-41ec-8242-7add12fa8b0a',
 '737a3d0f-c808-4147-9788-9bd4b32911bc',
 '2673b7fc-835a-4287-a7e2-5fa87c143bf5',
 '1d00208f-db2f-4b86-946d-107422668293',
 '2717b477-238a-4a58-bbc3-ad0bffbd26b1',
 'e4ee6172-9a26-4302-ad44-e3cb0117c842',
 '079757d8-2136-4715-81f6-cbcd27bfbf32',
 '02044a07-fb23-455c-aa99-4d0da1d54afc',
 'c753795c-2b24-460b-bef9-7b655695f267',
 '012a6470-b738-495a-8bb3-a9cf23cd518b',
 '4b880cb6-a75b-4877-8c5b-42fd769b51f6',
 '2916afb4-10ba-

In [3]:
retriever = vector_store.as_retriever()
results = retriever.invoke("How do I reset my corporate password?")
for result in results:
    print(result.page_content)
    print("\n metadata \n")
    print(result.metadata)
    print("\n")

Resolution:
1. Open the Self-Service Password Reset portal.
2. Enter corporate username.

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles_v2\\01_Password_Reset_Guide.txt'}


3. Complete OTP/MFA verification.
4. Create a new password as per policy.

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles_v2\\01_Password_Reset_Guide.txt'}


Knowledge Base Article
Title: How to Reset Your Password (Updated)

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles_v2\\01_Password_Reset_Guide.txt'}


Issue:
User cannot log in due to forgotten or expired password.

 metadata 

{'source': '..\\data\\updates\\IT_Helpdesk_KB_Articles_v2\\01_Password_Reset_Guide.txt'}




In [1]:
# Only update what has changed.
from langchain_community.indexes._sql_record_manager import SQLRecordManager


In [2]:
from langchain_core.indexing import index

In [9]:
db_url = "sqlite:///record_manager_cache.db"
sql_record_manager = SQLRecordManager(namespace="example", db_url=db_url)
directory_loader = DirectoryLoader(
    path="../data/updates/IT_Helpdesk_KB_Articles",
    glob="*.txt",
    loader_cls=TextLoader,
)
documents = directory_loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)
chunks = text_splitter.split_documents(documents)
embedding = VertexAIEmbeddings(
    model_name="text-embedding-005")
vector_store = Chroma(
    collection_name="kb_collection",
    embedding_function=embedding,
    persist_directory="../vectordb/kb_collection_db_sample1",
)
sql_record_manager.create_schema()


  embedding = VertexAIEmbeddings(


In [10]:
result = index(
    docs_source=documents,
    record_manager=sql_record_manager,
    vector_store=vector_store,
    cleanup='incremental',
    source_id_key='source'
)

In [11]:
result

{'num_added': 20, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [None]:
directory_loader = DirectoryLoader(
    path="../data/updates/IT_Helpdesk_KB_Articles_v2",
    glob="*.txt",
    loader_cls=TextLoader,
)
documents = directory_loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)
chunks = text_splitter.split_documents(documents)
embedding = VertexAIEmbeddings(
    model_name="text-embedding-005")
vector_store = Chroma(
    collection_name="kb_collection",
    embedding_function=embedding,
    persist_directory="../vectordb/kb_collection_db_sample1",
)
# changed docs and reindex
result = index(
    docs_source=documents,
    record_manager=sql_record_manager,
    vector_store=vector_store,
    cleanup='incremental',
    source_id_key='source'
)


  embedding = VertexAIEmbeddings(


In [15]:
result

{'num_added': 0, 'num_updated': 0, 'num_skipped': 20, 'num_deleted': 0}