In [73]:
import os
import openai
import PyPDF2
import tiktoken
from dotenv import load_dotenv
import chromadb
from chromadb.utils import embedding_functions
import streamlit as st
import base64
from streamlit_extras.stateful_button import button
import json
import time
from IPython.display import display, Markdown

In [10]:
load_dotenv()

True

In [22]:
DB_PATH = "vector_db"
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection("document_embeddings")

In [23]:
def split_text_into_chunks(text, max_tokens=3000):
    encoding = tiktoken.get_encoding("cl100k_base")
    tokens = encoding.encode(text)
    chunks = []
    while len(tokens) > 0:
        chunk = tokens[:max_tokens]
        chunks.append(encoding.decode(chunk))
        tokens = tokens[max_tokens:]
    return chunks

In [24]:
def load_docs(folder_path):
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if os.path.isfile(file_path):
            try:
                if filename.endswith(".pdf"):
                    with open(file_path, "rb") as file:
                        reader = PyPDF2.PdfReader(file)
                        content = "".join([page.extract_text() for page in reader.pages])
                    documents.append({"content": content, "metadata": {"source": filename}})
                elif filename.endswith((".txt", ".md")):
                    with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
                        content = file.read()
                    documents.append({"content": content, "metadata": {"source": filename}})
            except Exception as e:
                print(f"Error loading {filename}: {e}")
    return documents

In [27]:
def initialize_db(documents, collection=collection):
    try:
        existing_ids = collection.get()["ids"]
        if existing_ids:
            collection.delete(ids=existing_ids)
            print(f"Deleted existing IDs: {existing_ids}")
    except Exception as e:
        print(f"Error retrieving or deleting existing documents: {e}")

    for i, doc in enumerate(documents):
        chunks = split_text_into_chunks(doc["content"], 1024)
        chunk_ids = [f"doc_{i}_chunk_{j}" for j in range(len(chunks))]
        existing_ids = collection.get()["ids"]
        new_ids = [id for id in chunk_ids if id not in existing_ids]
        if new_ids:
            collection.add(
                documents=[chunks[j] for j, id in enumerate(chunk_ids) if id in new_ids],
                metadatas=[doc["metadata"]] * len(new_ids),
                ids=new_ids
            )
            print(f"Added new IDs: {new_ids}")
        else:
            print(f"No new IDs to add for {doc['metadata']['source']}")

In [29]:
initialize_db(load_docs("../docs"))

/Users/ngkuissi/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:01<00:00, 43.4MiB/s]


Added new IDs: ['doc_0_chunk_0', 'doc_0_chunk_1', 'doc_0_chunk_2', 'doc_0_chunk_3', 'doc_0_chunk_4', 'doc_0_chunk_5', 'doc_0_chunk_6', 'doc_0_chunk_7', 'doc_0_chunk_8', 'doc_0_chunk_9', 'doc_0_chunk_10', 'doc_0_chunk_11', 'doc_0_chunk_12', 'doc_0_chunk_13']


In [36]:
print(collection.get()['documents'][1])

  mortgage  payments  of principal,  interest  and 
Insurance begin in full.  
When  coverage  ends 
Your  Insurance  coverage  ends  on the earliest  of the following  dates:  
•the date the mortgage  is paid in full, discharged  (unless  exercising  your
portability option without adding additional funds) or assumed in
writing by some other person;
•the date the principal  balance  of your mortgage  increases;
•the date the Insurance  Service  Centre  receives  your written  or verbal
request to cancel your life, critical illness or disability Insurance;
•the date that all or part of your Insurance  premiums  are 90 days
overdue;
•the last day of the month  in which  you turn 70 years  of age; please  note
that your mortgage  will not be insured  for the full duration  of the
amortization  period  if the amortization  period  of your mortgage
extends  beyond  your 70th birthday;
•the date you are no longer  the borrower,  co-borrower  or guarantor  of
the mortgage;
•the date you die;

In [None]:
def retrieve_relevant_chunks(query, top_k=3, collection=collection):
    results = collection.query(query_texts=[query], n_results=top_k)
    return results["documents"][0] if results["documents"] else []

In [39]:
res = retrieve_relevant_chunks("I am 23 years old looking for a mortgage")

In [41]:
print(res[0])

mortgage for which you are applying for HomeProtector life and/or
disability Insurance is $750,000 or less; and/or
•for critical  illness  Insurance  – you are less than 56 years  of age and the
total of all your insured  RBC  Royal  Bank  mortgages,  plus any mortgage
for which you are applying for HomeProtector critical illness Insurance
is $300,000 or less.
If these  conditions  apply,  then you are not required  to: 
•answer  the application  health  questions;  or 
•be actively working at the time  of the Add -on/Refinance.
Your answers to the health questions on your previous HomeProtector 
application for the mortgage prior to the Add -On/Refinance, plus any 
evidence of your insurability submitted with respect to that application, 
shall be deemed to be information relating to the Application submitted 
under  HARP.  
Pre-existing  condition  exclusion  under  HARP  
The Pre-existing  Condition  Exclusion  under  HARP  means  your benefit  
will be limited if:  
•you re-applied

In [None]:
def rank()

In [None]:
def simulate(query, user_info, model="gpt-4o-mini"):
    system_prompt= "You are helping persuade a person get creditor insurance. "
    system_prompt += "You are to create a simulatiom about a person based on some user information. "
    system_prompt += "The information would be in form of a json"
    system_prompt += "You are also getting information about creditor insurance that may be relevant to "
    system_prompt += "If the question is not requesting a simulation, response with 'I don't know.' "
    system_prompt += "When answering a simulation return as markdown text"
    
    relevant_info = retrieve_relevant_chunks(f"{user_info}", top_k = 2)
    
    prompt = f"Query: {query}"
    prompt += f"\n\nUser information {user_info}"
    prompt += "\n\nUseful Informaiton about creditor insurance"
    for info in relevant_info:
        prompt += "Context: "
        prompt += info
        prompt += "\n\n"
        
    
    print(prompt)
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=[{"role": "system", "content": system_prompt},
                      {"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()
    except openai.OpenAIError as e:
        print(f"Error with OpenAI API: {e}")
        return "An error occurred."
    

In [69]:
with open("../profile.json", "rb") as f:
    data = json.load(f)

In [70]:
data['applicants'][0]

{'first_name': 'John',
 'last_name': 'Doe',
 'age': 35,
 'marital_status': 'Married',
 'employment': 'Software Engineer',
 'income': 120000,
 'household_income': 180000,
 'number_of_kids': 2,
 'current_assets': ['Stocks: $50,000', '401(k): $100,000'],
 'liabilities': ['Car Loan: $20,000', 'Student Loans: $30,000']}

In [71]:
res = simulate(query="Create a simulation were the clien has to deal with job loss, how would that affect that?", user_info=data['applicants'][0])

Query: Create a simulation were the clien has to deal with job loss, how would that affect that?

 User information {'first_name': 'John', 'last_name': 'Doe', 'age': 35, 'marital_status': 'Married', 'employment': 'Software Engineer', 'income': 120000, 'household_income': 180000, 'number_of_kids': 2, 'current_assets': ['Stocks: $50,000', '401(k): $100,000'], 'liabilities': ['Car Loan: $20,000', 'Student Loans: $30,000']}Useful Informaiton about creditor insuranceContext:  mortgage 
balance(s)  owing  at your date of death,  to a maximum  of $750,000  for all 
of your insured mortgages combined. The insured mortgage balance 
consists  of: 
•the unpaid principal balance(s);
•mortgage interest from the mortgage payment due date immediately
prior to death to a maximum of five (5) years;
•Insurance  premiums  for this coverage  due and paid after the date of
death
•any pre-payment  charges;
•any overdrawn  balance  in your property  tax account;  and 
•any “cash  back  option”  penalty  amou

In [80]:
display(Markdown(res))

```markdown
### Simulation: John Doe's Situation After Job Loss

#### Background Information
- **Name:** John Doe
- **Age:** 35
- **Marital Status:** Married
- **Occupation:** Software Engineer
- **Income:** $120,000
- **Household Income:** $180,000
- **Number of Kids:** 2
- **Current Assets:** 
  - Stocks: $50,000
  - 401(k): $100,000
- **Liabilities:** 
  - Car Loan: $20,000
  - Student Loans: $30,000

#### Scenario: Job Loss
John loses his job as a Software Engineer due to company downsizing. Here's how this affects his financial situation and the importance of creditor insurance:

1. **Immediate Financial Impact:**
   - Loss of primary income: $120,000 annually, or approximately $10,000 monthly.
   - Household income drops to $60,000 (from spouse's income), leading to a reduction of disposable income significantly.
   - Monthly mortgage payment and other debts remain unchanged, putting financial strain on the family.

2. **Expenditure Adjustments:**
   - John and his family will need to reassess their monthly budget. Essential expenses like mortgage payments, car loans, and student loans must still be met.
   - With two kids, expenses for their education and daily needs would still apply, putting further pressure on their finances.

3. **Role of Creditor Insurance:**
   - In this moment of vulnerability, having creditor insurance (such as HomeProtector) can provide a safety net.
   - If John had creditor insurance, it could cover the mortgage payments in the event of job loss, ensuring the family's home is protected.
   - The life insurance component would also ease worries about financial obligations should anything happen to him during this time.

4. **Long-term Considerations:**
   - If John cannot find another job soon, his current assets may deplete quicker than anticipated. The stocks and 401(k) could be liquidated for living expenses, affecting long-term financial stability.
   - If John can secure a new job but at a lower salary, the family will need to adjust their lifestyle even further. 

5. **Decision to Insure:**
   - John's situation illustrates the importance of having creditor insurance, especially when the security of family home and the wellbeing of dependents are at risk.
   - He could consider enrolling in creditor insurance to help cover those essential mortgage payments if a job loss became prolonged.

#### Conclusion:
For John Doe, dealing with the repercussions of job loss highlights the critical nature of creditor insurance. It could provide some peace of mind and financial support in times of need, safeguarding his family's future against uncertainties like job loss or other unforeseen events.
```