In [1]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores.chroma import Chroma
import os
import shutil

## Create vector database

In [2]:
#Set paths
CHROMA_PATH = "chroma"
DATA_PATHS = ["data\\aratohu", "data\\community law", "data\\tenancy nz"]

In [3]:
def load_documents():
    documents = []

    for path in DATA_PATHS:
        loader = DirectoryLoader(path, glob="*.md")
        temp_documents = loader.load()
        documents = documents + temp_documents


    return documents

In [4]:
def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

    document = chunks[10]
    print(document.page_content)
    print(document.metadata)

    return chunks

In [5]:
def save_to_chroma(chunks: list[Document]):
    # Clear out the database first.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    # Create a new DB from the documents.
    db = Chroma.from_documents(
        chunks, embedding_function, persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

In [6]:
def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)

In [7]:
generate_data_store()

Split 110 documents into 1682 chunks.
Tenancy Services have a Compliance and Investigations Team who investigate breaches of tenancy law. The team focuses on breaches that are serious and ongoing. In particular, they target landlords who persistently breach the rules and who own numerous rental properties or unlawful residential
{'source': 'data\\aratohu\\action if lanlord doesnt resolve.md', 'start_index': 1713}
Saved 1682 chunks to chroma.


## Load vector database

In [8]:
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

db = Chroma(persist_directory="chroma", embedding_function=embedding_function)

In [11]:
query = "When can the landlord inspect my property?"

docs = db.similarity_search(query)

In [12]:
docs

[Document(page_content='It is lawful for the landlord to inspect the property when the tenant isn’t at home. For more information see Property inspections.', metadata={'source': 'data\\aratohu\\legal reasons for landlord visits.md', 'start_index': 2117}),
 Document(page_content='“The landlord can inspect the property at any time.”\n\nSection 48 of the Act RTA states exactly when and how often inspections can occur.\n\n“Tenants are responsible for dealing with any pest issues.”\n\n“Tenants are responsible for dealing with all pest issues after the first 28 days.”', metadata={'source': 'data\\aratohu\\unenforceable and unlawful tenancy conditions.md', 'start_index': 9269}),
 Document(page_content='A notice period of not less than 48 hours, and not more than 14 days, is required for a landlord to carry out a property inspection. A property inspection can be any time between the hours of 8am and 7pm any day of the week. Inspections are only allowed once every four weeks (s 48(2)(b) RTA).',

## Generate response

In [13]:
from dataclasses import dataclass
from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

In [14]:
CHROMA_PATH = "chroma"

In [25]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question and provide a rationale based on the above context: {question}
"""

In [44]:
query_text = "Can we take our landlord to the tenancy tribunal if they promised us in writing to reduce our rent for the remainder of our 90day notice but as soon as we hand in our 28day notice they said never mind and increase the rent again?"

In [46]:
# Prepare the DB.
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

In [47]:
# Prepare the prompt
results = db.similarity_search_with_relevance_scores(query_text, k=3)

if len(results) == 0 or results[0][1] < 0.5:
    print(f"Unable to find matching results.")
else:
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print(prompt)

Human: 
Answer the question based only on the following context:

If the Tribunal does order your landlord to lower your rent, the rent will usually be fixed at that level for six months (see: “Problems with your landlord: What you can do”).
What happens if I get behind in my rent?

Residential Tenancies Act 1986, ss 55, 56, 77(2)(L)

---

If the tenant is 21 days or more in arrears, the landlord has the option of going straight to the Tenancy Tribunal without first giving the tenant a 14-day notice to remedy. If things get to this point it is more likely that the landlord will want to get the tenant out of the tenancy and to recover

---

The landlord can apply to the Tenancy Tribunal for a termination of the tenancy if:

The tenant hasn’t got back on top of their rent payments as per a 14-day notice they have received from the landlord (s 56 RTA).

---

Answer the question and provide a rationale based on the above context: Can we take our landlord to the tenancy tribunal if they pro

In [48]:
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [49]:
from langchain.llms import HuggingFaceHub

In [50]:
repo_id = "google/flan-t5-xxl"  # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options

In [51]:
# Submit propmt to model
model = HuggingFaceHub(
    repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 128}
)



In [52]:
response_text = model.predict(prompt)

In [53]:
sources = [doc.metadata.get("source", None) for doc, _score in results]
formatted_response = f"Response: {response_text}\nSources: {sources}"
print(formatted_response)

Response: The landlord can apply to the Tenancy Tribunal for a termination of the tenancy if: The tenant hasn’t got back on top of their rent payments as per a 14-day notice they have received from the landlord (s 56 RTA).
Sources: ['data\\community law\\rent.md', 'data\\aratohu\\getting into rent arrears.md', 'data\\aratohu\\getting into rent arrears.md']
