In [1]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores.chroma import Chroma
import os
import shutil

## Create vector database

In [2]:
#Set paths
CHROMA_PATH = "chroma"
DATA_PATHS = ["data\\aratohu", "data\\community law", "data\\tenancy nz", "data\\tribunal"]

In [3]:
def load_documents():
    documents = []

    for path in DATA_PATHS:
        loader = DirectoryLoader(path, glob="*.md")
        temp_documents = loader.load()
        documents = documents + temp_documents


    return documents

In [4]:
def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

    document = chunks[10]
    print(document.page_content)
    print(document.metadata)

    return chunks

In [5]:
def save_to_chroma(chunks: list[Document]):
    # Clear out the database first.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    # Create a new DB from the documents.
    db = Chroma.from_documents(
        chunks, embedding_function, persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

In [6]:
def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)

In [7]:
generate_data_store()

Split 169 documents into 2663 chunks.
Tenancy Services have a Compliance and Investigations Team who investigate breaches of tenancy law. The team focuses on breaches that are serious and ongoing. In particular, they target landlords who persistently breach the rules and who own numerous rental properties or unlawful residential
{'source': 'data\\aratohu\\action if lanlord doesnt resolve.md', 'start_index': 1713}


  from .autonotebook import tqdm as notebook_tqdm


Saved 2663 chunks to chroma.


## Load vector database

In [8]:
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

db = Chroma(persist_directory="chroma", embedding_function=embedding_function)

In [9]:
query = "When can the landlord inspect my property?"

docs = db.similarity_search(query)

In [10]:
docs

[Document(page_content='It is lawful for the landlord to inspect the property when the tenant isn’t at home. For more information see Property inspections.', metadata={'source': 'data\\aratohu\\legal reasons for landlord visits.md', 'start_index': 2117}),
 Document(page_content='“The landlord can inspect the property at any time.”\n\nSection 48 of the Act RTA states exactly when and how often inspections can occur.\n\n“Tenants are responsible for dealing with any pest issues.”\n\n“Tenants are responsible for dealing with all pest issues after the first 28 days.”', metadata={'source': 'data\\aratohu\\unenforceable and unlawful tenancy conditions.md', 'start_index': 9269}),
 Document(page_content='A notice period of not less than 48 hours, and not more than 14 days, is required for a landlord to carry out a property inspection. A property inspection can be any time between the hours of 8am and 7pm any day of the week. Inspections are only allowed once every four weeks (s 48(2)(b) RTA).',

## Generate response

In [11]:
from dataclasses import dataclass
from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

In [33]:
CHROMA_PATH = "chroma"

In [34]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question and provide a rationale based on the above context: {question}
"""

In [35]:
query_text = "Can my landlord raise the rent?"

In [36]:
# Prepare the DB.
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

In [37]:
# Prepare the prompt
results = db.similarity_search_with_relevance_scores(query_text, k=3)

if len(results) == 0 or results[0][1] < 0.5:
    print(f"Unable to find matching results.")
else:
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print(prompt)

Human: 
Answer the question based only on the following context:

Your landlord can increase your rent no more than once every 12 months. They have to give you 60 days’ advance notice (two months) of any rent increase, and this has to be in writing.

---

“The landlord can increase the rent with two weeks’ notice.”

The Act clearly sets out the process for rent increases (s 24 RTA). Increases can only happen once every 12 months.

“The landlord can raise the rent immediately if they find that additional people are staying at the house.”

---

Your landlord can apply to the Tribunal to increase the rent if they’ve made significant improvements with your permission, or if they’ve had unexpected costs to do with the property.
Getting your rent reviewed if it’s higher than other rents

Residential Tenancies Act 1986, ss 25, 26

---

Answer the question and provide a rationale based on the above context: Can my landlord raise the rent?



In [38]:
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [39]:
from langchain.llms import HuggingFaceHub

In [40]:
repo_id = "google/flan-t5-xxl"  # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options

In [41]:
# Submit propmt to model
model = HuggingFaceHub(
    repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 128}
)



In [42]:
response_text = model.predict(prompt)

In [43]:
sources = [doc.metadata.get("source", None) for doc, _score in results]
formatted_response = f"Response: {response_text}\nSources: {sources}"
print(formatted_response)

Response: Your landlord can increase your rent no more than once every 12 months. They have to give you 60 days’ advance notice (two months) of any rent increase, and this has to be in writing. ” The Act clearly sets out the process for rent increases (s 24 RTA). Increases can only happen once every 12 months. “The landlord can raise the rent immediately if they find that additional people are staying at the house.”
Sources: ['data\\community law\\rent.md', 'data\\aratohu\\unenforceable and unlawful tenancy conditions.md', 'data\\community law\\rent.md']
