In [1]:
from pprint import pprint
from langchain_community.llms import Ollama
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain.memory import ConversationBufferMemory

In [2]:
model_name = "mistral"
embedding_name = "nomic-embed-text"

In [3]:
llm = Ollama(model=model_name, callbacks=[StreamingStdOutCallbackHandler()])
embeddings = OllamaEmbeddings(model=embedding_name)

In [4]:
loader = PyPDFLoader("../data/DE-FOA-0003207.pdf")
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4096, chunk_overlap=128)
all_splits = text_splitter.split_documents(pages)

In [5]:
db = Chroma.from_documents(all_splits, embeddings)

depending on which Retrival class used, the memory needs to have different key names.

In [6]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    input_key="question",
    output_key="answer",
)

qa = RetrievalQAWithSourcesChain.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )

response = qa.invoke("What is the purpose of this funding opportunity?")

pprint(response)

 The given text defines various terms related to grants management, including "grant," "cost principles," "deadline," "debarment and suspension," "direct costs," "disallowed costs," "domestic organization," "effort," "equipment," and "expanded authorities." These definitions cover different aspects of grant administration and help clarify the meaning of key terms used in the context of managing grants. For example, a grant is defined as an agreement between a Federal awarding agency or pass-through entity and a non-Federal entity to transfer funds for carrying out a public purpose authorized by law. Direct costs are those that can be identified with a particular project or activity, while disallowed costs are those charges to a Federal award that the Federal awarding agency determines to be unallowable. Equipment is defined as tangible personal property having a useful life of more than one year and a per-unit acquisition cost which equals or exceeds the lesser of the capitalization le

In [7]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    input_key="query",
    output_key="result",
)

qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )

response = qa.invoke("Give me an executive summary of this FOA document.")

pprint(response)

 Title: FY 2024 Funding for Accelerated, Inclusive Research (FAIR)
Funding Opportunity Announcement (FOA) Number: DE-FOA-0003227
Issuer: Department of Energy (DOE), Office of Science (SC)
Submission Deadline for Pre-Applications: April 23, 2024, at 5:00 PM Eastern Time
Pre-Application Response Date: May 28, 2024, at 11:59 PM Eastern Time
Submission Deadline for Applications: July 16, 2024, at 11:59 PM Eastern Time

The Department of Energy (DOE), Office of Science (SC) has released a Funding Opportunity Announcement (FOA) entitled "FY 2024 Funding for Accelerated, Inclusive Research (FAIR)" with an issue date of March 12, 2024. This is an Initial FOA with a CFDA number of 81.049. The goal of this program is to support research that accelerates the transition of advanced materials and processes to industrial applications while promoting diversity, equity, inclusion, and broadening participation in STEM fields.

Key Features:
1. Two-stage application process with a required pre-applicati