In [1]:
# imports
import ast  # for converting embeddings saved as strings back to arrays
import openai  # for calling the OpenAI API
import pandas as pd  # for storing text and embeddings data
import tiktoken  # for counting tokens
from scipy import spatial # for calculating vector similarities for search
import os
 
openai.api_key = os.environ["OPENAI_API_KEY"]

# models
EMBEDDING_MODEL = "text-embedding-ada-002"
GPT_MODEL = "gpt-3.5-turbo"

# Chroma vector DB location
from langchain.vectorstores import Chroma
persist_directory = 'marcus/chroma/'

In [2]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

# Initialize Chroma DB
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [3]:
# show number of records in the DB
print(vectordb._collection.count())

756


In [4]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

#Prepare the conversation memory
memory = ConversationBufferMemory(
     memory_key="history",
    return_messages=True
)

# Initialize the GPT model
llm = ChatOpenAI(model_name=GPT_MODEL, temperature=0)

# Prepare the vector DB for "Similarity" search and instruct the DB to return the top 5 records with similarity score
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [5]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

support_template = """
You answer questions about the state of the US economy. Respond in a professional tone, with concise answers.\
Use the articles below on the United States Economy to answer the subsequent question. If the time period in question\
is outside the scope of the document return a statement of such as the only response. Do not generate answer from the\
provided materials if the provided materials are out of the time scope specified in the question. If the answer cannot\
be found in the articles, write "I could not find an answer".
    

{context}

Question: {question}"""

# Prepare the prompt to be used in the Q&A 
SUPPORT_PROMPT = PromptTemplate(template=support_template, input_variables=["context", "question"])

# Initialize the Q&A handle
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": SUPPORT_PROMPT},
    memory = memory,
)


In [6]:
import langchain

# Set this flag to True for displaying debugging info
#langchain.debug = True

In [7]:
# Start asking question

question = "Tell me the Federal Reserve Bank of San Francisco District labor markets situation in the month of January"
result = qa({"query": question})
result["result"]


'Based on the provided information, the labor market situation in the Federal Reserve Bank of San Francisco District in the month of January showed that employment levels grew at a modest pace. Labor availability improved across the District, leading to a decrease in job turnover and voluntary quits. Hiring difficulties also eased in consumer services sectors such as retail, food services, and hospitality. However, there were still challenges in attracting experienced talent in sectors like health care, legal services, manufacturing, and skilled trades. Some real estate firms and mortgage providers reduced open positions in response to moderating demand. Overall, employers planned to maintain current employment levels to avoid hiring challenges experienced throughout the pandemic.'

In [8]:
print(memory.buffer)

[HumanMessage(content='Tell me the Federal Reserve Bank of San Francisco District labor markets situation in the month of January', additional_kwargs={}, example=False), AIMessage(content='Based on the provided information, the labor market situation in the Federal Reserve Bank of San Francisco District in the month of January showed that employment levels grew at a modest pace. Labor availability improved across the District, leading to a decrease in job turnover and voluntary quits. Hiring difficulties also eased in consumer services sectors such as retail, food services, and hospitality. However, there were still challenges in attracting experienced talent in sectors like health care, legal services, manufacturing, and skilled trades. Some real estate firms and mortgage providers reduced open positions in response to moderating demand. Overall, employers planned to maintain current employment levels to avoid hiring challenges experienced throughout the pandemic.', additional_kwargs=

In [9]:
question = "Tell me the state of real estate market in the New York District for the year of 2023"
result = qa({"query": question})
result["result"]

'Based on the provided information, the state of the real estate market in the New York District for the year of 2023 can be summarized as follows:\n\n- In early 2023, the residential sales market remained subdued in upstate New York, with flat to slightly declining prices and sluggish sales volume. However, sales of single-family homes and apartments picked up in and around New York City, with steady prices and declining inventory.\n- Residential rental markets firmed, with high rents in Manhattan and increasing rents in Brooklyn, Queens, and upstate New York. Rental vacancy rates remained low.\n- Commercial real estate markets were generally stable, with slight increases in office vacancy rates in New York City and northern New Jersey. Retail vacancy rates held steady, while industrial vacancy rates increased slightly. Rents were mostly flat or slightly increasing.\n- Construction conditions weakened, with low levels of new office construction starts and weak multi-family residential

In [10]:
print(memory.buffer)

[HumanMessage(content='Tell me the Federal Reserve Bank of San Francisco District labor markets situation in the month of January', additional_kwargs={}, example=False), AIMessage(content='Based on the provided information, the labor market situation in the Federal Reserve Bank of San Francisco District in the month of January showed that employment levels grew at a modest pace. Labor availability improved across the District, leading to a decrease in job turnover and voluntary quits. Hiring difficulties also eased in consumer services sectors such as retail, food services, and hospitality. However, there were still challenges in attracting experienced talent in sectors like health care, legal services, manufacturing, and skilled trades. Some real estate firms and mortgage providers reduced open positions in response to moderating demand. Overall, employers planned to maintain current employment levels to avoid hiring challenges experienced throughout the pandemic.', additional_kwargs=