In [1]:
# @title Installations
# The %%capture command hides the installation output.
%%capture
!pip install langchain
!pip install lark
!pip install openai
!pip install chromadb
!pip install langchain_openai
!pip install langchain_chroma
!pip install langchain_community
!pip install langchain_community


In [3]:
# @title Imports
from langchain_core.documents import Document
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.chat_models import ChatOpenAI
import os
from google.colab import userdata
from langchain.retrievers import ParentDocumentRetriever
from langchain_text_splitters import CharacterTextSplitter
from langchain.storage import InMemoryStore

In [18]:
# @title Docs
# This is a new set of documents related to company policies.
docs = [
    Document(
        page_content=(
            "Smoking Restrictions: Smoking inside company buildings, offices, "
            "meeting rooms, and other enclosed spaces is strictly prohibited. "
            "This includes electronic cigarettes and vaping devices. In compliance "
            "with Applicable Laws: All employees and visitors must adhere to "
            "relevant federal, state, and local smoking laws and regulations. "
            "Disposal of Smoking Materials: Properly dispose of cigarette butts "
            "and related materials in designated receptacles. Littering on "
            "company premises is prohibited. No Smoking in Company Vehicles: "
            "Smoking is not permitted in company vehicles, whether they are owned "
            "or leased, to maintain the condition and cleanliness of these "
            "vehicles. Enforcement and Consequences: All employees and visitors "
            "are expected to adhere to this policy. Non-compliance may lead to "
            "appropriate disciplinary action, which could include fines, or, in "
            "the case of employees, possible termination of employment. Review of "
            "Policy: This policy will be reviewed periodically to ensure its "
            "alignment with evolving legal requirements and best practices for "
            "maintaining a healthy and safe workplace. We appreciate your "
            "cooperation in maintaining a smoke-free and safe environment for all."
        ),
        metadata={"source": "companypolicies.txt", "policy_name": "Smoking Policy"},
    ),
    Document(
        page_content=(
            "Drug and Alcohol Policy: The Drug and Alcohol Policy is established "
            "to establish clear expectations and guidelines for the responsible "
            "use of drugs and alcohol within the organization. This policy aims "
            "to maintain a safe, healthy, and productive workplace. Prohibited "
            "Substances: The use, possession, distribution, or sale of illegal "
            "drugs or unauthorized controlled substances is strictly prohibited "
            "on company premises or during work-related activities. This includes "
            "the misuse of prescription drugs. Alcohol Consumption: The "
            "consumption of alcoholic beverages is not allowed during work hours, "
            "on company property, or while performing company-related duties. "
            "Exception may be made for company-sanctioned events."
        ),
        metadata={"source": "companypolicies.txt", "policy_name": "Drug and Alcohol Policy"},
    ),
    Document(
        page_content=(
            "Harassment Policy: Our company is committed to providing a work "
            "environment free of harassment. Harassment is defined as any "
            "unwelcome conduct, whether verbal, physical, or visual, based on "
            "an individual’s race, color, religion, sex, national origin, age, "
            "disability, or any other legally protected characteristic. This "
            "policy applies to all employees, supervisors, managers, and "
            "third parties. Reporting Procedure: Any employee who believes they "
            "have been subjected to harassment should report the incident to "
            "their supervisor or Human Resources immediately. All reports will "
            "be investigated promptly and confidentially. Retaliation: Retaliation "
            "against any employee who reports harassment or assists in an "
            "investigation is strictly prohibited."
        ),
        metadata={"source": "companypolicies.txt", "policy_name": "Harassment Policy"},
    ),
    Document(
        page_content=(
            "Code of Conduct: This Code of Conduct outlines the ethical and "
            "professional standards expected of all employees. Employees must "
            "act with integrity, honesty, and respect for others. Conflicts of "
            "Interest: Employees must avoid any activity that creates a conflict "
            "of interest with the company. Confidentiality: Employees must not "
            "disclose confidential company information to unauthorized parties. "
            "Reporting Violations: Employees are encouraged to report any observed "
            "violations of this Code of Conduct to their supervisor or through "
            "the confidential reporting hotline."
        ),
        metadata={"source": "companypolicies.txt", "policy_name": "Code of Conduct"},
    ),
]


In [26]:
# @title Splitters

# Parent is with bug chunk size , child is with small chunk size
parent_splitter = CharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=20,
    separator="\n"
)

child_splitter = CharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=20,
    separator= "\n"
)


In [8]:
# @title Environment Setup
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [10]:
# @title Chroma DB and OpenAI Embeddings
vectordb = Chroma(collection_name="split_parents", embedding_function=OpenAIEmbeddings())

In [27]:
retriever = ParentDocumentRetriever(
    vectorstore=vectordb,
    docstore=InMemoryStore(),
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)
retriever.add_documents(docs)

In [28]:
retriever.invoke("smoking policy")

[Document(metadata={'source': 'companypolicies.txt', 'policy_name': 'Smoking Policy'}, page_content='Smoking Restrictions: Smoking inside company buildings, offices, meeting rooms, and other enclosed spaces is strictly prohibited. This includes electronic cigarettes and vaping devices. In compliance with Applicable Laws: All employees and visitors must adhere to relevant federal, state, and local smoking laws and regulations. Disposal of Smoking Materials: Properly dispose of cigarette butts and related materials in designated receptacles. Littering on company premises is prohibited. No Smoking in Company Vehicles: Smoking is not permitted in company vehicles, whether they are owned or leased, to maintain the condition and cleanliness of these vehicles. Enforcement and Consequences: All employees and visitors are expected to adhere to this policy. Non-compliance may lead to appropriate disciplinary action, which could include fines, or, in the case of employees, possible termination of