# ASOP Q&A Machine

## 1. Setup

In [1]:
#!pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai chromadb bs4

In [2]:
#!pip install --upgrade --quiet pypdf2
#This is used for PDF parsing

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()  # This loads the variables from .env

os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY') # OpenAI
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv('LANGCHAIN_API_KEY')

In [4]:
import bs4
from langchain import hub
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableParallel # for RAG with source
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from IPython.display import display, Markdown, Latex
import glob
import chromadb

## 2. Load pdf files and convert to vector DB

In [5]:
# Define a function to load and extract text from PDFs in a folder
'''
def load_pdfs_from_folder(folder_path):
    pdf_files = glob.glob(f"{folder_path}/*.pdf")
    docs = []
    for pdf_file in pdf_files:
        loader = PyPDFLoader(pdf_file)
        docs.extend(loader.load())
    return docs

# Example folder path
folder_path = 'ASOP'
docs = load_pdfs_from_folder(folder_path)

# Text spliter and put in vectorstore
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200,
    length_function=len,)
splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(), persist_directory="./chroma_db")
'''

'\ndef load_pdfs_from_folder(folder_path):\n    pdf_files = glob.glob(f"{folder_path}/*.pdf")\n    docs = []\n    for pdf_file in pdf_files:\n        loader = PyPDFLoader(pdf_file)\n        docs.extend(loader.load())\n    return docs\n\n# Example folder path\nfolder_path = \'ASOP\'\ndocs = load_pdfs_from_folder(folder_path)\n\n# Text spliter and put in vectorstore\ntext_splitter = RecursiveCharacterTextSplitter(\n    chunk_size=1000, \n    chunk_overlap=200,\n    length_function=len,)\nsplits = text_splitter.split_documents(docs)\n\nvectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(), persist_directory="./chroma_db")\n'

## 3. Retrieve

In [6]:
vectorstore = Chroma(embedding_function=OpenAIEmbeddings(), persist_directory="./chroma_db")

In [7]:
# Retrieve and RAG chain

# Use MMR (Maximum Marginal Relevance) to find a set of documents that are both similar to the input query and diverse among themselves
# Increase number of documents to get, and increase diversity (lambda mult 0.5 being default, 0 being the most diverse, 1 being the least)
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25}) 

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0) # context window size 16k for GPT 3.5 Turbo

def format_docs_with_sources(docs):
    formatted_docs = "\n\n".join(doc.page_content for doc in docs)
    sources_pages = "\n".join(f"{doc.metadata['source']} (Page {doc.metadata['page'] + 1})" for doc in docs)
    # Added 1 to the page number assuming 'page' starts at 0 and we want to present it in a user-friendly way

    return f"Documents:\n{formatted_docs}\n\nSources and Pages:\n{sources_pages}"

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs_with_sources(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

## 4. Generate

In [8]:
def generate_output():
    usr_input = input("What is your question on ASOP?: ")

    output = rag_chain_with_source.invoke(usr_input)

    markdown_output = "### Question\n{}\n\n### Answer\n{}\n\n### Context\n".format(output['question'], output['answer'])

    last_page_content = None  # Variable to store the last page content
    i = 1 # Source indicator

    for doc in output['context']:
        current_page_content = doc.page_content.replace('\n', '  \n')  # Get the current page content
        # Check if the current content is different from the last one
        if current_page_content != last_page_content:
            markdown_output += "- **Source {}**: {}, page {}:\n\n{}\n".format(i, doc.metadata['source'], doc.metadata['page'], current_page_content)
            i = i + 1
        last_page_content = current_page_content  # Update the last page content

    display(Markdown(markdown_output))

### Example questions
- explain ASOP No. 14
- explain ASOP No. 22
- How are expenses relfected in cash flow testing?
- How are expenses relfected in cash flow testing based on ASOP No. 22?
- What is catastrophe risk?
- When do I update assumptions?
- What should I do when I do not have credible data to develop non-economic assumptions?
- How do I set economic assumptions for cash flow testing?

In [11]:
generate_output()

What is your question on ASOP?:  How are expenses relfected in cash flow testing?


### Question
How are expenses relfected in cash flow testing?

### Answer
Expenses are reflected in cash flow testing by considering all known sources of expenses, such as operating expenses, capital expenditures, debt interest and principal payments, and refunds of advance fees. The cash flow projection should show the cash and investment balances at the beginning and end of each projection year. The actuary should consider guidance from ASOP No. 7 when choosing assumptions for cash flow projections.

### Context
- **Source 1**: ASOP/asop007_128.pdf, page 10:

determine what types of deviations from exp ected experience should be taken into account,   
if any, given the purpose of the analysis.    
 3.2.1 Reasons for Cash Flow Testing  
—The actuary should consider cash flow testing   
when variations in the underlying risks are likely to have a material impact on the   
expected    
 cash flows in certain products, certain lines of business, or on the company.   
Situations that might indicate a need for cash flow testing include the following:   
   a. where there are material asset risks (for example, below investment grade   
bonds, assets with payment timing risks such as CMOs or mortgage-backed   
securities, mortgages concentrated in cer tain regions of th e country, and large   
illiquid assets such as real estate);   
   b. where there are liabilities that have cash flows far out into the future (for   
example, structured settlement annuitie s with a significant reinvestment    
rate-of-return risk);
- **Source 2**: ASOP/asop007_128.pdf, page 16:

4.3 Documentation  
—The degree of documentation of the act uary’s cash flow analysis will vary   
with the complexity and purpose of the an alysis. The documentation should be more   
complete for more significant assignments such as regulatory cash flow testing than for other   
assignments such as periodic income projections.
- **Source 3**: ASOP/asop003_124.pdf, page 13:

ASOP No. 3 ⎯September 2007    
 9   
The actuary should reflect revenues from all known sources (such as advance fees,   
periodic fees, additional fees, payments from non-residents, reimbursements from Medicare or other third party payer, and investment income ). The actuary should reflect   
expenses from all known sources (such as ope rating expenses, capital  expenditures, debt   
interest and principal payments, any cost of using an offsite h ealth facility, and refunds of   
advance fees).    The cash flow projection should show the cash and investment balances at the beginning   
and end of each projection year.  The actuary should consider th e guidance in ASOP No. 7, Analysis of Life, Health, or   
Property/Casualty Insurer Cash Flows , when choosing assumptions for cash flow   
projections.   
 3.8 Selection of Actuarial Assumptions  
—The actuary should cons ider the guidance below   
when selecting assumptions for performing act uarial analyses covered by this ASOP.
- **Source 4**: ASOP/asop028_200.pdf, page 20:

Performing Cash Flow Testing for Insurers , in October 1988. ASOP No. 7 was revised in July   
1991 and again in June 2002.     
   
In July 1990, the ASB adopted ASOP No. 14, When to Do Cash Flow Testing for Life and   
Health Insurance Companies , to provide guidance in determining whether to do cash flow   
testing in forming a professional opinion or recommendation. ASOP No. 14 was repealed in   
September 2001 after the ASB determined that relevant portions were incorporated in the 2001   
revisions of ASOP No. 7 and ASOP No. 22, Statements of Opinion Based on Asset Adequacy   
Analysis by Actuaries for Life or Health Insurers.
- **Source 5**: ASOP/asop007_128.pdf, page 5:

The exposure draft of this revised standard was issued in September 2000 with a comment deadline of March 31, 2001. The Cash Flow Testing Task Force carefully considered the twenty-one comment letters received. For a summary of the substantive issues contained in these comment letters, please see appendix 2.   The most significant changes from the exposure draft were as follows:   1. In section 3.10.1, Scenarios, and 3.10.3, Internal Consistency, a few changes were made
- **Source 6**: ASOP/asop007_128.pdf, page 22:

Comment   
  Response A few commentators questioned the use of the phrases  “long duration” and “short-term,” and noted that   
these can have meaning in a GAAP context.   The task force agreed that the use of those phrases could cause confusion in that regard and changed the   
wording.   
Section 3.2.2, Cash Flow Testing is Not Always Necessary   
Comment   Response One commentator asked that the phrase “policy term” be included as part of what the actuary should   
consider as to whether a block is relatively insensitive to changes in economic conditions.    
 The task force agreed and added words to accomplish this.    
Section 3.2.3, Use of Analyses or Data Predating the Analysis Date   
Comment   
 Response One commentator believed that the actuary should cons ider future material events in the analysis.    
 The task force disagreed, believing such a thi ng is beyond the scope of cash flow analysis.    
Section 3.5.1, Policy Cash Flow Characteristics   
Comment


## 5. Links
- https://www.actuarialstandardsboard.org/standards-of-practice/
- https://bard.google.com/chat
- https://chat.openai.com/
- GPT4All
- ollama run solar:10.7b-instruct-v1-q5_K_M

## 6. References
- https://python.langchain.com/docs/use_cases/question_answering/quickstart
- https://python.langchain.com/docs/use_cases/question_answering/sources
- https://chat.langchain.com/