In [1]:
%load_ext dotenv
%dotenv data_ingestion/.env

## Token Trimming Helper

In [2]:
import tiktoken

# Initialize the tokenizer
tokenizer = tiktoken.get_encoding("cl100k_base")

# Function to truncate text to a specified number of tokens
def truncate_text(text, max_tokens=124000):
    tokens = tokenizer.encode(text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
        text = tokenizer.decode(tokens)
    return text



## Connecting to the DB

In [3]:
from sshtunnel import SSHTunnelForwarder
import requests
import json
import os
import pgvector
import psycopg
from pgvector.psycopg import register_vector
import json
def get_db_connection():
    # Setting up the SSH tunnel with tunnel credentials
    REMOTE_HOST = os.getenv("REMOTE_HOST")
    REMOTE_SSH_PORT = int(os.getenv("REMOTE_SSH_PORT"))
    PORT = int(os.getenv("PORT"))
    SSH_KEYFILE = os.getenv("SSH_KEYFILE")
    SSH_USERNAME =  os.getenv("SSH_USERNAME")

    server = SSHTunnelForwarder(
        ssh_address_or_host=(REMOTE_HOST, REMOTE_SSH_PORT),
        ssh_username= SSH_USERNAME,
        ssh_pkey=SSH_KEYFILE,
        # Key part! Connect to AWS_HOST through the tunnel.
        remote_bind_address=('localhost', PORT)
    )
    server.start()
    print("server connected")

    conn_str = f"dbname=postgres host=localhost port={server.local_bind_port} user=postgres password={os.getenv('DB_PASSWORD')}"
    conn_str_formatted = f"postgresql://postgres:{os.getenv('DB_PASSWORD')}@localhost:{server.local_bind_port}/postgres"
    return conn_str_formatted, conn_str, psycopg.connect(conn_str)

## Set the Question

In [4]:
QUESTION = "Water leaking into the apartment from the floor above. What are the prominent legal precedents from cases in Washington on this problem?"
QUESTION_2 = "When the landlord is sued in court for leaking pipes, infer and give examples of the number of times there was a favorable decision for the lessee?"

## Save the data from Vector DB

In [5]:
import json

# BEGIN: Run query
conn_str_formatted, conn_str, conn = get_db_connection()

query = """
SELECT id, data
FROM cases
ORDER BY description_vector <=> azure_openai.create_embeddings('text-embedding-3-small', %s )::vector
LIMIT 10;
"""

with conn.cursor() as cur:
    cur.execute(query, (QUESTION,))
    results = cur.fetchall()
    # for row in results:
    #     print(row)
    results_json = json.dumps(results, indent=2)
    print(results_json)
# END: Run query

conn.close()

server connected
[
  [
    "473788",
    {
      "id": 473788,
      "name": "Mickele D. O'Brien, Appellant, v. Martin Detty, et al, Respondents",
      "court": {
        "id": 8985,
        "name": "Washington Court of Appeals",
        "name_abbreviation": "Wash. Ct. App."
      },
      "analysis": {
        "sha256": "8c6dd20edf35f2ea8f8fb3b355b5a1b324c89bc064de234d2037642056f38cdc",
        "simhash": "1:d36f4819213f9fd8",
        "pagerank": {
          "raw": 1.5159058765710466e-07,
          "percentile": 0.6684242980961438
        },
        "char_count": 5451,
        "word_count": 915,
        "cardinality": 423,
        "ocr_confidence": 0.845
      },
      "casebody": {
        "judges": [
          "Munson, C.J., and Green, J., concur."
        ],
        "parties": [
          "Mickele D. O'Brien, Appellant, v. Martin Detty, et al, Respondents."
        ],
        "opinions": [
          {
            "text": "Roe, J.\nPlaintiff was a tenant in a house owned by defenda

In [6]:
# Export the new_results_json to a file
output_file = f'initial_context_results_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(results_json)

print(f"Results have been exported to {output_file}")

Results have been exported to initial_context_results_Water leaking into the apartment from the floor above. What are the prominent legal precedents from cases in Washington on this problem?.json


## Save the data from Reranker

In [7]:
# Define the new query
#SELECT generate_json_pairs(%s) AS result_json;

conn_str_formatted, conn_str, conn = get_db_connection()
new_query = """
-- Query to use semantic ranker model to rerank the results of vector search
WITH vector AS (
	SELECT ROW_NUMBER() OVER () AS ord, text, data
	FROM (
		SELECT data -> 'casebody' -> 'opinions' -> 0 ->> 'text' AS text, data
		FROM cases
		ORDER BY description_vector <=> azure_openai.create_embeddings('text-embedding-3-small', %s)::vector
		LIMIT 10)
),
result AS (
	SELECT * 
	FROM jsonb_array_elements(
			semantic_relevance(%s,
			10)
		) WITH ORDINALITY AS elem(value)
)
SELECT vector.ord AS ord, result.value::DOUBLE PRECISION AS value, data->>'name_abbreviation', LEFT(vector.text, 20000), data
FROM vector
JOIN result ON vector.ord = result.ordinality
ORDER BY value DESC;
"""

# Execute the new query
with conn.cursor() as cur:
    cur.execute(new_query, (QUESTION, QUESTION))
    new_results = cur.fetchall()
    new_results_json = json.dumps(new_results, indent=2)
    print(new_results_json)

server connected
[
  [
    1,
    -3.214557409286499,
    "O'Brien v. Detty",
    "Roe, J.\nPlaintiff was a tenant in a house owned by defendant Detty. (Martin Detty will be referred to, in this opinion, as the sole defendant.) During a long rain, water leaked in around a window, and some collected on the floor. Plaintiff immediately reported this to the defendant and requested that it be repaired. The defendant-landlord twice, within a week of being notified, attempted but failed to locate and remedy the problem. About 2 weeks after giving notice of this condition, the plaintiff slipped in some water and sustained the injuries of which she complains in this action. About 2 weeks after her fall; the defendant succeeded in finding and repairing the leak. The case was tried to a jury, which returned a verdict for the defendant.\nThe Residential Landlord-Tenant Act of 1973, RCW 59.18, modified the common law so as to require decent, safe and sanitary housing. RCW 59.18.040(4). It also req

In [8]:
# Export the new_results_json to a file
output_file = f'reranker_context_results_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(new_results_json)

print(f"Results have been exported to {output_file}")

Results have been exported to reranker_context_results_Water leaking into the apartment from the floor above. What are the prominent legal precedents from cases in Washington on this problem?.json


## Save the data from Graph

In [9]:
import json

# BEGIN: Run query
conn_str_formatted, conn_str, conn = get_db_connection()

query = """
WITH vector AS (
		SELECT cases.id, RANK() OVER (ORDER BY description_vector <=> azure_openai.create_embeddings('text-embedding-3-small', %s)::vector) AS vector_rank, cases.data ->> 'name_abbreviation' AS abbr, (cases.data#>>'{analysis, pagerank, percentile}')::NUMERIC AS pagerank, cases.data
		FROM cases
		ORDER BY description_vector <=> azure_openai.create_embeddings('text-embedding-3-small', %s)::vector
		LIMIT 100
	),
	combined AS (
		SELECT RANK() OVER (ORDER BY vector.pagerank DESC) AS pagerank_rank, vector.* FROM vector ORDER BY vector.pagerank DESC
	)
	SELECT
	    COALESCE(1.0 / (60 + combined.vector_rank), 0.0) +
	    COALESCE(1.0 / (60 + combined.pagerank_rank), 0.0) AS score,
		combined.*
	FROM combined
	ORDER BY score DESC
    LIMIT 10;
"""

with conn.cursor() as cur:
    cur.execute(query, (QUESTION, QUESTION))
    results = cur.fetchall()
    for row in results:
        print(row)
conn.close()

server connected
(Decimal('0.02744360902255639097'), 16, '1036918', 10, 'King County v. Boeing Co.', Decimal('0.8160991239257578'), {'id': 1036918, 'name': 'King County, Appellant, v. The Boeing Company, et al., Respondents', 'court': {'id': 9029, 'name': 'Washington Supreme Court', 'name_abbreviation': 'Wash.'}, 'analysis': {'sha256': '3d4a3c659045d83a06561283e2fde5209ca06c6d92795fa7caba2936df48eb44', 'simhash': '1:de86849bb8b0eb94', 'pagerank': {'raw': 2.5693416579171945e-07, 'percentile': 0.8160991239257578}, 'char_count': 12259, 'word_count': 2091, 'cardinality': 725, 'ocr_confidence': 0.677}, 'casebody': {'judges': [], 'parties': ['King County, Appellant, v. The Boeing Company, et al., Respondents.'], 'opinions': [{'text': 'Hamilton, J.\nPlaintiff, King County, appeals from a summary judgment granted in favor of defendants, The Boeing Company (hereafter referred to as Boeing) and Isaacson Iron Works (hereafter referred to as Isaacson).\nThe suit between the parties revolves about 

In [10]:
import json
from decimal import Decimal

# Custom serialization function
def decimal_default(obj):
    if isinstance(obj, Decimal):
        return float(obj)  # or str(obj) if you prefer
    raise TypeError



In [11]:
results_json = json.dumps([results], indent=2, default=decimal_default)

In [12]:
# Export the new_results_json to a file
output_file = f'graph_context_results_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(results_json)

print(f"Results have been exported to {output_file}")

Results have been exported to graph_context_results_Water leaking into the apartment from the floor above. What are the prominent legal precedents from cases in Washington on this problem?.json


## Create OpenAI request with Simple Context

In [13]:
from openai import AzureOpenAI
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY_MAXIM"),
    api_version="2024-02-01",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_MAXIM")
)

In [14]:
import json

# Define the path to the local JSON file
file_path = f'initial_context_results_{QUESTION}.json'

# Read and parse the JSON file
with open(file_path, 'r') as file:
    initial_context_data = json.load(file)


## Get the all top vector result

In [15]:
# Truncate the reranked context text to 124000 tokens
# truncate_text(str(initial_context_data), 124000)
print(len(initial_context_data))

10


In [16]:
for data in initial_context_data:
    print(data[1]['name_abbreviation'])

O'Brien v. Detty
United Mutual Savings Bank v. Riebli
Puget Sound Service Corp. v. Dalarna Management Corp.
Dempsey v. City of Seattle
1515-1519 Lakeview Boulevard Condominium Ass'n v. Apartment Sales Corp.
Currens v. Sleek
McCutcheon v. United Homes Corp.
Geise v. Lee
Lummi Indian Nation v. State
King County v. Boeing Co.


## Make sure it is always in the Context token limit

In [17]:
import os
from openai import AzureOpenAI

endpoint = os.getenv("AZURE_OPENAI_ENDPOINT_MAXIM")
deployment = os.getenv("DEPLOYMENT_NAME", "gpt-4o")
subscription_key = os.getenv("AZURE_OPENAI_API_KEY_MAXIM")

# Initialize Azure OpenAI client with key-based authentication
client = AzureOpenAI(
    azure_endpoint = endpoint,
    api_key = subscription_key,
    api_version = "2024-05-01-preview",
)

completion = client.chat.completions.create(
    model=deployment,
    messages= [
    {
        "role": "system",
        "content": "You are an AI assistant that helps people find information."
    },
    {
        "role": "user",
        "content": f"""
        DOCUMENT: {str(initial_context_data)}
        QUESTION: {QUESTION}
        INSTRUCTIONS: Answer the user's QUESTION using the DOCUMENT text above. Keep your answer grounded in the facts of the DOCUMENT. If the DOCUMENT doesn’t contain the facts to answer the QUESTION, return NONE.
        """
    }
],
    max_tokens=800,
    temperature=1,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)



In [18]:
import json

# Convert the completion object to JSON
completion_json = completion.to_json()

# Print the JSON representation of the completion object
print(json.loads(completion_json)['choices'][0]['message']['content'])

The prominent legal precedents from cases in Washington on the issue of water leaking into an apartment from the floor above, as found in the provided DOCUMENT, are:

1. **Mickele D. O'Brien v. Martin Detty (1978)**:
   - This case established that a landlord must have timely notice of the need for repairs before being obliged to make them. The landlord is given a reasonable time to effectuate those repairs. If the landlord fails to repair the defect within this reasonable period and damage results, the landlord may be held liable.

2. **United Mutual Savings Bank v. Arthur Riebli (1960)**:
   - This case indicated that exclusive control over the offending instrumentality must be established by evidence to apply the doctrine of res ipsa loquitur. In this case, the court ruled that the landlord did not have exclusive control over the pipe causing the water damage, thus not meeting the criteria for res ipsa loquitur.

3. **R. T. Dempsey v. The City of Seattle (1936)**:
   - This case ill

In [19]:
# Export the new_results_json to a file
output_file = f'chatgpt_4_results_simple_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(completion.to_json())

print(f"Results have been exported to {output_file}")

Results have been exported to chatgpt_4_results_simple_Water leaking into the apartment from the floor above. What are the prominent legal precedents from cases in Washington on this problem?.json


## Create OpenAI request with Reranker Context

In [20]:
import json

# Define the path to the local JSON file
folder = "sample_question_1"
#{folder}/
file_path = f'reranker_context_results_{QUESTION}.json'

# Read and parse the JSON file
with open(file_path, 'r') as file:
    reranked_context_data = json.load(file)

## Get the all top reranked result

In [21]:
reranked_context_content = []
# Filter the list to include only items with text\n",
for item in reranked_context_data:
    if item[2:] is not None:
        print(item[2:])
        reranked_context_content.append(item[2:])

["O'Brien v. Detty", 'Roe, J.\nPlaintiff was a tenant in a house owned by defendant Detty. (Martin Detty will be referred to, in this opinion, as the sole defendant.) During a long rain, water leaked in around a window, and some collected on the floor. Plaintiff immediately reported this to the defendant and requested that it be repaired. The defendant-landlord twice, within a week of being notified, attempted but failed to locate and remedy the problem. About 2 weeks after giving notice of this condition, the plaintiff slipped in some water and sustained the injuries of which she complains in this action. About 2 weeks after her fall; the defendant succeeded in finding and repairing the leak. The case was tried to a jury, which returned a verdict for the defendant.\nThe Residential Landlord-Tenant Act of 1973, RCW 59.18, modified the common law so as to require decent, safe and sanitary housing. RCW 59.18.040(4). It also requires a landlord to maintain a dwelling unit in reasonably we

In [22]:
for data in reranked_context_content:
    print(data[0])

O'Brien v. Detty
United Mutual Savings Bank v. Riebli
McCutcheon v. United Homes Corp.
Lummi Indian Nation v. State
Geise v. Lee
Puget Sound Service Corp. v. Dalarna Management Corp.
King County v. Boeing Co.
1515-1519 Lakeview Boulevard Condominium Ass'n v. Apartment Sales Corp.
Currens v. Sleek
Dempsey v. City of Seattle


In [23]:
print(len(reranked_context_content))


10


## Make sure it is always in the Context token limit

#### (Optional) If you need to truncate text for limits the Rerank context format

In [137]:
#reranked_context_content = truncate_text(str(reranked_context_text), 12400)

###
#What I used for the demo..

# reranked_context_text = reranked_context_data[0][2]
# print(reranked_context_text)
# reranked_context_text = truncate_text(str(reranked_context_data), 3000)

In [24]:
import os
from openai import AzureOpenAI

completion = client.chat.completions.create(
    model=deployment,
    messages= [
    {
        "role": "system",
        "content": "You are an AI assistant that helps people find information."
    },
    {
        "role": "user",
        "content": f"""
        DOCUMENT: {str(reranked_context_content)}
        QUESTION: {QUESTION}
        INSTRUCTIONS: Answer the user's QUESTION using the DOCUMENT text above. Keep your answer grounded in the facts of the DOCUMENT. If the DOCUMENT doesn’t contain the facts to answer the QUESTION, return NONE.
        """
    }
],
    max_tokens=800,
    temperature=1,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)



In [25]:
import json

# Convert the completion object to JSON
completion_json = completion.to_json()

# Print the JSON representation of the completion object
print(json.loads(completion_json)['choices'][0]['message']['content'])

The prominent legal precedents from cases in Washington regarding water leaking into an apartment from the floor above include:

1. **O'Brien v. Detty**:
   - **Issue**: Whether a landlord is liable for injuries resulting from a defective condition (water leak) if they were notified and failed to remedy the defect within a reasonable time.
   - **Legal Principle**: Under the Residential Landlord-Tenant Act of 1973, RCW 59.18, landlords are required to maintain a dwelling in reasonably weathertight condition. The landlord must have timely notice of the defect and must repair it within a reasonable time (RCW 59.18.060(8); RCW 59.18.070). Negligence may be established if the landlord fails to act within this time frame.
   - **Outcome**: The case emphasizes the landlord’s obligation to address defects in a timely manner after being notified by the tenant.

2. **Puget Sound Service Corp. v. Dalarna Management Corp.**:
   - **Issue**: Whether a seller (landlord) is liable for non-disclosure

In [26]:
# Export the new_results_json to a file
output_file = f'chatgpt_4_results_w_reranker_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(completion.to_json())

print(f"Results have been exported to {output_file}")

Results have been exported to chatgpt_4_results_w_reranker_Water leaking into the apartment from the floor above. What are the prominent legal precedents from cases in Washington on this problem?.json


## Create OpenAI request with Graph Context

In [27]:
import json

# Define the path to the local JSON file
folder = "sample_question_1"
file_path = f'graph_context_results_{QUESTION}.json'

# Read and parse the JSON file
with open(file_path, 'r') as file:
    graph_context_data = json.load(file)

## Get the all top reranked result

In [28]:
#graph_context_text = truncate_text(str(graph_context_data), 124000)
graph_context_content = []
# Filter the list to include only items with text\n",
for item in graph_context_data[0]:
    print(item[6])
    graph_context_content.append(item[6])

{'id': 1036918, 'name': 'King County, Appellant, v. The Boeing Company, et al., Respondents', 'court': {'id': 9029, 'name': 'Washington Supreme Court', 'name_abbreviation': 'Wash.'}, 'analysis': {'sha256': '3d4a3c659045d83a06561283e2fde5209ca06c6d92795fa7caba2936df48eb44', 'simhash': '1:de86849bb8b0eb94', 'pagerank': {'raw': 2.5693416579171945e-07, 'percentile': 0.8160991239257578}, 'char_count': 12259, 'word_count': 2091, 'cardinality': 725, 'ocr_confidence': 0.677}, 'casebody': {'judges': [], 'parties': ['King County, Appellant, v. The Boeing Company, et al., Respondents.'], 'opinions': [{'text': 'Hamilton, J.\nPlaintiff, King County, appeals from a summary judgment granted in favor of defendants, The Boeing Company (hereafter referred to as Boeing) and Isaacson Iron Works (hereafter referred to as Isaacson).\nThe suit between the parties revolves about drainage of surface waters from the southerly portion of the King County Airport, also known as Boeing Field. There is no substantia

In [29]:
for data in graph_context_content:
    print(data['name_abbreviation'])

King County v. Boeing Co.
Wilber Development Corp. v. Les Rowland Constr., Inc.
O'Brien v. Detty
Bach v. Sarich
Currens v. Sleek
Foisy v. Wyman
Ronkosky v. City of Tacoma
Lummi Indian Nation v. State
New Meadows Holding Co. v. Washington Water Power Co.
Washington Hydroculture, Inc. v. Payne


In [30]:
import os
from openai import AzureOpenAI

completion = client.chat.completions.create(
    model=deployment,
    messages= [
    {
        "role": "system",
        "content": "You are an AI assistant that helps people find information."
    },
    {
        "role": "user",
        "content": f"""
        DOCUMENT: {str(graph_context_content)}
        QUESTION: {QUESTION}
        INSTRUCTIONS: Answer the user's QUESTION using the DOCUMENT text above. Keep your answer grounded in the facts of the DOCUMENT. If the DOCUMENT doesn’t contain the facts to answer the QUESTION, return NONE.
        """
    }
],
    max_tokens=800,
    temperature=1,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)



In [31]:
import json

# Convert the completion object to JSON
completion_json = completion.to_json()

# Print the JSON representation of the completion object
print(json.loads(completion_json)['choices'][0]['message']['content'])

Based on the provided DOCUMENT text, the prominent legal precedent from cases in Washington that involves water leaking into an apartment is from the case **Wilber Development Corporation v. Les Rowland Constr., Inc., et al.** The case summary includes a specific instance where the plaintiff claimed damages due to water being collected and discharged upon their land from storm sewers:

1. **Wilber Development Corporation v. Les Rowland Constr., Inc., et al.**
   - **Citation**: 83 Wash. 2d 871
   - **Facts**: The plaintiff owned 61.3 acres of land and alleged that the defendants (developers and government entities) caused an increase in surface water flow due to storm drainage facilities constructed in conjunction with nearby subdivisions, which resulted in property damage.
   - **Legal Principle**: The court held that a municipality could be liable if it collects surface water by artificial means and discharges it onto an owner's land in a manner different from the natural flow, causi

In [32]:
# Export the new_results_json to a file
output_file = f'chatgpt_4_results_w_graph_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(completion.to_json())

print(f"Results have been exported to {output_file}")

Results have been exported to chatgpt_4_results_w_graph_Water leaking into the apartment from the floor above. What are the prominent legal precedents from cases in Washington on this problem?.json
