In [4]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [5]:
from langchain_openai import ChatOpenAI
llm_name = 'gpt-4-turbo-preview'
llm = ChatOpenAI(model_name = llm_name, temperature = 0)

In [3]:
llm.invoke("Hello world!")

AIMessage(content='Hello! How can I assist you today?')

In [4]:
from langchain.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path = '../dat/526963_processed_data.csv')
sections = loader.load()

In [5]:
len(sections)

74

In [6]:
section = sections[22].page_content
print(section)

section_reference: 3.2.4
category: Property Characteristics
section: Site Improvements
subsection: Landscape and Irrigation
cost_related: False
docu_txt: Was There An Irrigation System? is An underground automatic irrigation system was provided., Is there landscaping or irrigation? is yes, Describe landscaping issue(s) and recommendation(s) in complete sentences and identify specific locations is Based on the CC&R's and discussions with the management company, all "common" landscape areas are expensed through the HOA fees. All landscape areas within the subject's general area is ownership responsibility. Based on this, barren turf was observed throughout the site typical of conditions noted to the south elevation of the structure. Refurbishment of the turf is recommended.
Trees, shrubs and Ivy (growing on the walls) were observed impeding on the building rooftop and faÃ§ade at points along the north, south and east walls. No portion of any vegetation should be in contact with the build

In [7]:
index = section.find('docu_txt')
len(section[index :])

1394

In [8]:
sections[0].metadata

{'source': '../dat/526963_processed_data.csv', 'row': 0}

In [9]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate


In [10]:
file = '../dat/526963_processed_data.csv'

In [11]:
# load documents
loader = CSVLoader(file_path = file)
sections = loader.load()

# split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
secs = text_splitter.split_documents(sections)

# define embedding
embeddings = OpenAIEmbeddings()

# create vector database from data
vectordb = Chroma.from_documents(sections, embeddings)

# define retriever
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# memory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Provide a thorough explanation of your reasoning, using bullet points for clarity where needed.
Conclude each response with 'Thank you for asking, Is there anything else I can help you with?' placed 1 line after the answer.

{context}

Return the output in the following format: 
Question: {question}
Answer:

Thank you for asking, Is there anything else I can help you with?
"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

#ConversationalRetrievalChain
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
    verbose = True,
    combine_docs_chain_kwargs={'prompt': QA_CHAIN_PROMPT}
)

In [12]:
question = "What is the client info of the report?"
result = qa({"question": question})
print(result['answer'])

  warn_deprecated(




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Provide a thorough explanation of your reasoning, using bullet points for clarity where needed.
Conclude each response with 'Thank you for asking, Is there anything else I can help you with?' placed 1 line after the answer.

section_reference: 1.3
category: Property Description
section: CLIENT INFO
subsection: 
cost_related: False
docu_txt: Client State is California, Client Company is GK Management Co., Inc, Client Zip is 90230, Does the client require a professional stamp? is no, Client First Name is Jason, Client Last Name is Mandler, Client Address is 5150 Overland Avenue, Client City is Culver City

section_reference: 1.8
category: Property Description
section: REVIEWER INFO
subsection:

In [13]:
question = "What is the client's company?"
result = qa({"question": question})
print(result['answer'])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the client info of the report?
Assistant: Question: What is the client info of the report?

Answer:
- Client State: California
- Client Company: GK Management Co., Inc
- Client Zip: 90230
- Does the client require a professional stamp?: No
- Client First Name: Jason
- Client Last Name: Mandler
- Client Address: 5150 Overland Avenue
- Client City: Culver City

Thank you for asking, Is there anything else I can help you with?
Follow Up Input: What is the client's company?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. 
If you d

In [14]:
question = "Who is Jason Mandler?"
result = qa({"question": question})
print(result["answer"])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the client info of the report?
Assistant: Question: What is the client info of the report?

Answer:
- Client State: California
- Client Company: GK Management Co., Inc
- Client Zip: 90230
- Does the client require a professional stamp?: No
- Client First Name: Jason
- Client Last Name: Mandler
- Client Address: 5150 Overland Avenue
- Client City: Culver City

Thank you for asking, Is there anything else I can help you with?
Human: What is the client's company?
Assistant: Question: What is the name of the company for the client mentioned in the report?
Answer: The name of the company for the client mentioned in the report is GK Management Co., Inc.

Thank you for asking, Is there anything else I can help you with?
Follow Up Input: Wh

In [15]:
question = "What is the REVIEWER INFO of the report?"
result = qa({"question": question})
print(result["answer"])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the client info of the report?
Assistant: Question: What is the client info of the report?

Answer:
- Client State: California
- Client Company: GK Management Co., Inc
- Client Zip: 90230
- Does the client require a professional stamp?: No
- Client First Name: Jason
- Client Last Name: Mandler
- Client Address: 5150 Overland Avenue
- Client City: Culver City

Thank you for asking, Is there anything else I can help you with?
Human: What is the client's company?
Assistant: Question: What is the name of the company for the client mentioned in the report?
Answer: The name of the company for the client mentioned in the report is GK Management Co., Inc.

Thank you for asking, Is there anything else I can help you with?
Human: Who is Jason

In [16]:
question = "What is the overview of the property?"
result = qa({"question": question})
print(result["answer"])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the client info of the report?
Assistant: Question: What is the client info of the report?

Answer:
- Client State: California
- Client Company: GK Management Co., Inc
- Client Zip: 90230
- Does the client require a professional stamp?: No
- Client First Name: Jason
- Client Last Name: Mandler
- Client Address: 5150 Overland Avenue
- Client City: Culver City

Thank you for asking, Is there anything else I can help you with?
Human: What is the client's company?
Assistant: Question: What is the name of the company for the client mentioned in the report?
Answer: The name of the company for the client mentioned in the report is GK Management Co., Inc.

Thank you for asking, Is there anything else I can help you with?
Human: Who is Jason

In [17]:
question = "What is the total cost?"
result = qa({"question": question})
print(result['answer'])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the client info of the report?
Assistant: Question: What is the client info of the report?

Answer:
- Client State: California
- Client Company: GK Management Co., Inc
- Client Zip: 90230
- Does the client require a professional stamp?: No
- Client First Name: Jason
- Client Last Name: Mandler
- Client Address: 5150 Overland Avenue
- Client City: Culver City

Thank you for asking, Is there anything else I can help you with?
Human: What is the client's company?
Assistant: Question: What is the name of the company for the client mentioned in the report?
Answer: The name of the company for the client mentioned in the report is GK Management Co., Inc.

Thank you for asking, Is there anything else I can help you with?
Human: Who is Jason

# Dustin Testing

In [11]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate


In [12]:
file = '../dat/526963_processed_data.csv'

In [32]:
# load documents
loader = CSVLoader(file_path = file)
sections = loader.load()

# split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
secs = text_splitter.split_documents(sections)

# define embedding
embeddings = OpenAIEmbeddings()

# create vector database from data
vectordb = Chroma.from_documents(sections, embeddings)

# define retriever
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# memory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Provide a thorough explanation of your reasoning, using bullet points for clarity where needed.
Conclude each response with 'Thank you for asking, Is there anything else I can help you with?' placed 1 line after the answer.

{context}

Return the output in the following format: 
Question: {question}
Answer:

Thank you for asking, Is there anything else I can help you with?
"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

#ConversationalRetrievalChain
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
    verbose = True,
    combine_docs_chain_kwargs={'prompt': QA_CHAIN_PROMPT}
)

In [37]:
# Note for these tests. They have sections so I pretended that the csv was a whole folder and each section was a document



# identify & summarize property(s) that match certain property characteristic criteria(or deficiencies)
question = "Name the sections that have at least 1 ADA parking space?"
result = qa({"question": question})
print(result['answer'])
# answer inconsistent



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: Name the sections that talk about skylights?
Assistant: Question: Name the sections that talk about skylights?

Answer: The sections that talk about skylights are:
- 4.4.1 Structure and Envelope, Roof, Roofing Materials
- 4.4.3 Structure and Envelope, Roof, Roof Mounted Equipment

Thank you for asking, Is there anything else I can help you with?
Human: What roof mounted equipment are items are present in section reference 4.4.3?
Assistant: Question: What items of roof mounted equipment are mentioned in section 4.4.3?
Answer: The items of roof-mounted equipment mentioned in section 4.4.3 include:
- Roof access hatch
- Skylights
- Two vent fans
- Three package HVAC systems

Thank you for asking, Is there anything else I can help you with?
Hum

In [33]:
# identify & summarize property(s) that match certain property characteristic criteria(or deficiencies)
question = "Name the sections that talk about skylights?"
result = qa({"question": question})
print(result['answer'])



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Provide a thorough explanation of your reasoning, using bullet points for clarity where needed.
Conclude each response with 'Thank you for asking, Is there anything else I can help you with?' placed 1 line after the answer.

section_reference: 4.4.1
category: Structure and Envelope
section: Roof
subsection: Roofing Materials
cost_related: False
docu_txt: Skylights is Conventional domed translucent plexiglass skylights were provided. The skylights were factory flashed., Roof covering is Roof coverings consisted of built-up roofing with mineral-surfaced cap sheet, Roof condition is The roofing systems appeared to be in poor condition., If so, how was it provided? is Safe roof access was provid

In [34]:
# return individual property report details
question = "What roof mounted equipment are items are present in section reference 4.4.3?"
result = qa({"question": question})
print(result['answer'])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: Name the sections that talk about skylights?
Assistant: Question: Name the sections that talk about skylights?

Answer: The sections that talk about skylights are:
- 4.4.1 Structure and Envelope, Roof, Roofing Materials
- 4.4.3 Structure and Envelope, Roof, Roof Mounted Equipment

Thank you for asking, Is there anything else I can help you with?
Follow Up Input: What roof mounted equipment are items are present in section reference 4.4.3?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you 

In [35]:
# summarize portfolio cost details
question = "Give me the total cost of all roofs in the next 5 years for the whole file?"
result = qa({"question": question})
print(result['answer'])
# this consistently misses 1 or 2 sections



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: Name the sections that talk about skylights?
Assistant: Question: Name the sections that talk about skylights?

Answer: The sections that talk about skylights are:
- 4.4.1 Structure and Envelope, Roof, Roofing Materials
- 4.4.3 Structure and Envelope, Roof, Roof Mounted Equipment

Thank you for asking, Is there anything else I can help you with?
Human: What roof mounted equipment are items are present in section reference 4.4.3?
Assistant: Question: What items of roof mounted equipment are mentioned in section 4.4.3?
Answer: The items of roof-mounted equipment mentioned in section 4.4.3 include:
- Roof access hatch
- Skylights
- Two vent fans
- Three package HVAC systems

Thank you for asking, Is there anything else I can help you with?
Fol

In [36]:
# comparison among projects
question = "Which section has the highest roof cost?"
result = qa({"question": question})
print(result['answer'])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: Name the sections that talk about skylights?
Assistant: Question: Name the sections that talk about skylights?

Answer: The sections that talk about skylights are:
- 4.4.1 Structure and Envelope, Roof, Roofing Materials
- 4.4.3 Structure and Envelope, Roof, Roof Mounted Equipment

Thank you for asking, Is there anything else I can help you with?
Human: What roof mounted equipment are items are present in section reference 4.4.3?
Assistant: Question: What items of roof mounted equipment are mentioned in section 4.4.3?
Answer: The items of roof-mounted equipment mentioned in section 4.4.3 include:
- Roof access hatch
- Skylights
- Two vent fans
- Three package HVAC systems

Thank you for asking, Is there anything else I can help you with?
Hum