In [1]:
%load_ext dotenv
%dotenv data_ingestion/.env

## Connecting to the DB

In [4]:
from sshtunnel import SSHTunnelForwarder
import requests
import json
import os
import pgvector
import psycopg
from pgvector.psycopg import register_vector
import json
def get_db_connection():
    # Setting up the SSH tunnel with tunnel credentials
    REMOTE_HOST = os.getenv("REMOTE_HOST")
    REMOTE_SSH_PORT = int(os.getenv("REMOTE_SSH_PORT"))
    PORT = int(os.getenv("PORT"))
    SSH_KEYFILE = os.getenv("SSH_KEYFILE")
    SSH_USERNAME =  os.getenv("SSH_USERNAME")

    server = SSHTunnelForwarder(
        ssh_address_or_host=(REMOTE_HOST, REMOTE_SSH_PORT),
        ssh_username= SSH_USERNAME,
        ssh_pkey=SSH_KEYFILE,
        # Key part! Connect to AWS_HOST through the tunnel.
        remote_bind_address=('localhost', PORT)
    )
    server.start()
    print("server connected")

    conn_str = f"dbname=postgres host=localhost port={server.local_bind_port} user=postgres password={os.getenv('DB_PASSWORD')}"
    conn_str_formatted = f"postgresql://postgres:{os.getenv('DB_PASSWORD')}@localhost:{server.local_bind_port}/postgres"
    return conn_str_formatted, conn_str, psycopg.connect(conn_str)

## Set the Question

In [102]:
QUESTION_2 = "Water leaking into the apartment from the floor above. What are the prominent legal precedents in Washington on this problem?"
QUESTION = "When the landlord is sued in court for leaking pipes, how many time did it result in a favourable decision for the lessee?"

## Save the data from Vector DB

In [103]:
import json

# BEGIN: Run query
conn_str_formatted, conn_str, conn = get_db_connection()

query = """
SELECT id, data
FROM cases
ORDER BY description_vector <=> azure_openai.create_embeddings('text-embedding-3-small', %s )::vector
LIMIT 10;
"""

with conn.cursor() as cur:
    cur.execute(query, (QUESTION,))
    results = cur.fetchall()
    # for row in results:
    #     print(row)
    results_json = json.dumps(results, indent=2)
    print(results_json)
# END: Run query

conn.close()

server connected
[
  [
    "1067108",
    {
      "id": 1067108,
      "name": "R. R. Lee et al., Respondents, v. William B. Cloes et al., Appellants",
      "court": {
        "id": 9029,
        "name": "Washington Supreme Court",
        "name_abbreviation": "Wash."
      },
      "analysis": {
        "sha256": "789c3584e212890cfe5334e0c990fbf9b3c6e6c25521d071c9d1ac90ee288b17",
        "simhash": "1:7ebfe7328bcfad18",
        "pagerank": {
          "raw": 1.03064477862558e-07,
          "percentile": 0.5479782798147691
        },
        "char_count": 2051,
        "word_count": 352,
        "cardinality": 188,
        "ocr_confidence": 0.61
      },
      "casebody": {
        "judges": [],
        "parties": [
          "R. R. Lee et al., Respondents, v. William B. Cloes et al., Appellants."
        ],
        "opinions": [
          {
            "text": "Per Curiam.\nAppellants, Mr. and Mrs. William B. Cloes, rented store space in a building owned by respondents, Mr. and Mrs. 

In [159]:
# Export the new_results_json to a file
output_file = f'initial_context_results_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(results_json)

print(f"Results have been exported to {output_file}")

Results have been exported to initial_context_results_When the landlord is sued in court for leaking pipes, how many time did it result in a favourable decision for the lessee?.json


## Save the data from Reranker

In [105]:
# Define the new query
#SELECT generate_json_pairs(%s) AS result_json;

conn_str_formatted, conn_str, conn = get_db_connection()
new_query = """
-- Query to use semantic ranker model to rerank the results of vector search
WITH vector AS (
	SELECT ROW_NUMBER() OVER () AS ord, text
	FROM (
		SELECT data -> 'casebody' -> 'opinions' -> 0 ->> 'text' AS text
		FROM cases
		ORDER BY description_vector <=> azure_openai.create_embeddings('text-embedding-3-small', %s)::vector
		LIMIT 50)
),
result AS (
	SELECT * 
	FROM jsonb_array_elements(
			semantic_relevance(%s, 50)
		) WITH ORDINALITY AS elem(value)
)
SELECT vector.ord AS ord, result.value::DOUBLE PRECISION AS value, LEFT(vector.text, 20000)
FROM vector
JOIN result ON vector.ord = result.ordinality
ORDER BY value DESC;
"""

# Execute the new query
with conn.cursor() as cur:
    cur.execute(new_query, (QUESTION, QUESTION))
    new_results = cur.fetchall()
    new_results_json = json.dumps(new_results, indent=2)
    print(new_results_json)

server connected
[
  [
    1,
    -6.135669708251953,
    "Per Curiam.\nAppellants, Mr. and Mrs. William B. Cloes, rented store space in a building owned by respondents, Mr. and Mrs. R. R. Lee. They appeal from the decision of the trial court, sitting without a jury, which awarded judgment to the Lees for the face amount of a note, given by the Cloeses to secure payment of rent, together with interest and attorney\u2019s fees. The court also dismissed the claim of appellants for damages for destruction of certain goods, stock and fixtures.\nThe errors assigned deal with the court\u2019s failure to apply the doctrine of res ipsa loquitur; a holding that an exculpatory clause in the lease between the parties placed the risk of loss for damages to personal property on the premises upon appellants; and, to the court\u2019s findings of fact that an oral agreement was entered into terminating the tenancy on September 1,1969.\nThe record in this case fails to show whether the question of the 

In [160]:
# Export the new_results_json to a file
output_file = f'reranker_context_results__{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(new_results_json)

print(f"Results have been exported to {output_file}")

Results have been exported to reranker_context_results__When the landlord is sued in court for leaking pipes, how many time did it result in a favourable decision for the lessee?.json


## Create OpenAI request with Simple Context

In [107]:
from openai import AzureOpenAI
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY_MAXIM"),
    api_version="2024-02-01",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_MAXIM")
)

In [161]:
import json

# Define the path to the local JSON file
file_path = f'initial_context_results_{QUESTION}.json'

# Read and parse the JSON file
with open(file_path, 'r') as file:
    initial_context_data = json.load(file)


## Get the all top vector result

In [109]:
initial_context_text = str(initial_context_data)
#initial_context_data[0][1]['casebody']['opinions'][0]['text']
print(initial_context_text)

[['1067108', {'id': 1067108, 'name': 'R. R. Lee et al., Respondents, v. William B. Cloes et al., Appellants', 'court': {'id': 9029, 'name': 'Washington Supreme Court', 'name_abbreviation': 'Wash.'}, 'analysis': {'sha256': '789c3584e212890cfe5334e0c990fbf9b3c6e6c25521d071c9d1ac90ee288b17', 'simhash': '1:7ebfe7328bcfad18', 'pagerank': {'raw': 1.03064477862558e-07, 'percentile': 0.5479782798147691}, 'char_count': 2051, 'word_count': 352, 'cardinality': 188, 'ocr_confidence': 0.61}, 'casebody': {'judges': [], 'parties': ['R. R. Lee et al., Respondents, v. William B. Cloes et al., Appellants.'], 'opinions': [{'text': 'Per Curiam.\nAppellants, Mr. and Mrs. William B. Cloes, rented store space in a building owned by respondents, Mr. and Mrs. R. R. Lee. They appeal from the decision of the trial court, sitting without a jury, which awarded judgment to the Lees for the face amount of a note, given by the Cloeses to secure payment of rent, together with interest and attorney’s fees. The court al

## Make sure it is always in the Context token limit

In [110]:
import tiktoken

# Initialize the tokenizer
tokenizer = tiktoken.get_encoding("cl100k_base")

# Function to truncate text to a specified number of tokens
def truncate_text(text, max_tokens=4096):
    tokens = tokenizer.encode(text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
        text = tokenizer.decode(tokens)
    return text

# Truncate the reranked context text to 4096 tokens
initial_context_text = truncate_text(initial_context_text, 124000)
print(initial_context_text)

[['1067108', {'id': 1067108, 'name': 'R. R. Lee et al., Respondents, v. William B. Cloes et al., Appellants', 'court': {'id': 9029, 'name': 'Washington Supreme Court', 'name_abbreviation': 'Wash.'}, 'analysis': {'sha256': '789c3584e212890cfe5334e0c990fbf9b3c6e6c25521d071c9d1ac90ee288b17', 'simhash': '1:7ebfe7328bcfad18', 'pagerank': {'raw': 1.03064477862558e-07, 'percentile': 0.5479782798147691}, 'char_count': 2051, 'word_count': 352, 'cardinality': 188, 'ocr_confidence': 0.61}, 'casebody': {'judges': [], 'parties': ['R. R. Lee et al., Respondents, v. William B. Cloes et al., Appellants.'], 'opinions': [{'text': 'Per Curiam.\nAppellants, Mr. and Mrs. William B. Cloes, rented store space in a building owned by respondents, Mr. and Mrs. R. R. Lee. They appeal from the decision of the trial court, sitting without a jury, which awarded judgment to the Lees for the face amount of a note, given by the Cloeses to secure payment of rent, together with interest and attorney’s fees. The court al

In [111]:
import os
from openai import AzureOpenAI

endpoint = os.getenv("AZURE_OPENAI_ENDPOINT_MAXIM")
deployment = os.getenv("DEPLOYMENT_NAME", "gpt-4o")
subscription_key = os.getenv("AZURE_OPENAI_API_KEY_MAXIM")

# Initialize Azure OpenAI client with key-based authentication
client = AzureOpenAI(
    azure_endpoint = endpoint,
    api_key = subscription_key,
    api_version = "2024-05-01-preview",
)

completion = client.chat.completions.create(
    model=deployment,
    messages= [
    {
        "role": "system",
        "content": "You are an AI assistant that helps people find information."
    },
    {
        "role": "user",
        "content": f"Using the following context {initial_context_text}, answer the following question {QUESTION}"
    }
],
    max_tokens=800,
    temperature=1,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)



In [112]:
import json

# Convert the completion object to JSON
completion_json = completion.to_json()

# Print the JSON representation of the completion object
print(json.loads(completion_json)['choices'][0]['message']['content'])

Based on the provided context, there are several cases where the landlord was sued for issues related to leaking pipes or similar maintenance issues. Here's a summary of the outcomes:

1. **R. R. Lee et al., Respondents, v. William B. Cloes et al., Appellants**:
   - **Outcome**: Judgment affirmed in favor of the landlord.
   - **Details**: The court found insufficient evidence to show that the freezing of water pipes was due to negligence by the respondents (landlord).

2. **John Pappas, Appellant, v. James Zerwoodis et al., Respondents**:
   - **Outcome**: Judgment affirmed in favor of the landlord.
   - **Details**: The court held that the lessee did not establish his claimed damages with sufficient certainty and dismissed the case.

In both cases, the decisions were favorable to the landlords. Therefore, based on these instances provided in the context, the landlords had favorable decisions in court for cases involving leaking pipes.


In [162]:
# Export the new_results_json to a file
output_file = 'chatgpt_4_results_simple_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(completion.to_json())

print(f"Results have been exported to {output_file}")

FileNotFoundError: [Errno 2] No such file or directory: 'When the landlord is sued in court for leaking pipes, how many time did it result in a favourable decision for the lessee?/chatgpt_4_results_simple.json'

## Create OpenAI request with Reranker Context

In [114]:
import json

# Define the path to the local JSON file
file_path = 'reranker_context_results_{QUESTION}.json'

# Read and parse the JSON file
with open(file_path, 'r') as file:
    reranked_context_data = json.load(file)

## Get the all top reranked result

In [146]:
reranked_context_text = str(reranked_context_data)
#reranked_context_data[0][2] 
print(reranked_context_text)



## Make sure it is always in the Context token limit

In [150]:
# Original list
data = reranked_context_data
reranked_context_content = []
# Filter the list to include only items with text
for item in data:
    if item[2] is not None:
        print(item[2])
        reranked_context_content.append(item[2])

Per Curiam.
Appellants, Mr. and Mrs. William B. Cloes, rented store space in a building owned by respondents, Mr. and Mrs. R. R. Lee. They appeal from the decision of the trial court, sitting without a jury, which awarded judgment to the Lees for the face amount of a note, given by the Cloeses to secure payment of rent, together with interest and attorney’s fees. The court also dismissed the claim of appellants for damages for destruction of certain goods, stock and fixtures.
The errors assigned deal with the court’s failure to apply the doctrine of res ipsa loquitur; a holding that an exculpatory clause in the lease between the parties placed the risk of loss for damages to personal property on the premises upon appellants; and, to the court’s findings of fact that an oral agreement was entered into terminating the tenancy on September 1,1969.
The record in this case fails to show whether the question of the application of the doctrine of res ipsa loqui-tur was presented to the trial 

In [151]:
len(reranked_context_content)

50

In [153]:
reranked_context_content = truncate_text(str(reranked_context_content), 124000)

reranked_context_text = truncate_text(reranked_context_text, 124000)
print(reranked_context_text)



In [154]:
import os
from openai import AzureOpenAI

completion = client.chat.completions.create(
    model=deployment,
    messages= [
    {
        "role": "system",
        "content": "You are an AI assistant that helps people find information."
    },
    {
        "role": "user",
        "content": f"Using the following context {reranked_context_content}, answer the following question {QUESTION}"
    }
],
    max_tokens=800,
    temperature=1,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)



In [155]:
import json

# Convert the completion object to JSON
completion_json = completion.to_json()

# Print the JSON representation of the completion object
print(json.loads(completion_json)['choices'][0]['message']['content'])

The provided text contains numerous excerpts from legal cases related to landlord-tenant disputes, but it does not include specific information on how many times landlords have been sued for leaking pipes or how many such cases resulted in favorable decisions for lessees. To accurately determine the number of such cases and outcomes, one would need access to a comprehensive database of court decisions or legal records focusing on landlord-tenant disputes involving leaking pipes. The text mostly contains details of individual cases and various legal principles without specific aggregation of such data.


In [157]:
# Export the new_results_json to a file
output_file = f'chatgpt_4_results_w_reranker_{QUESTION}.json'
with open(output_file, 'w') as f:
    f.write(completion.to_json())

print(f"Results have been exported to {output_file}")

Results have been exported to chatgpt_4_results_w_reranker_When the landlord is sued in court for leaking pipes, how many time did it result in a favourable decision for the lessee?.json
