In [1]:
from langchain_community.llms import Ollama
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory

In [2]:
model_name = "mistral"
embedding_name = "nomic-embed-text"

In [3]:
llm = Ollama(model=model_name, callbacks=[StreamingStdOutCallbackHandler()])
embeddings = OllamaEmbeddings(model=embedding_name)

In [4]:
loader = PyPDFLoader("../data/DE-FOA-0003207.pdf")
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4096, chunk_overlap=128)
all_splits = text_splitter.split_documents(pages)

In [5]:
db = Chroma.from_documents(all_splits, embeddings)

In [6]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [7]:
qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(),
        memory=memory,
    )

In [8]:
qa.invoke("Give me a summary of this FOA")

 Title: FY 2024 Funding for Accelerated, Inclusive Research (FAIR)

The Department of Energy (DOE), Office of Science (SC), has announced a Funding Opportunity Announcement (FOA) with number DE-FOA-0003207 and type "Initial." The CFDA number is 81.049. This FOA is for the FAIR program, which supports accelerated and inclusive research in various scientific disciplines.

Key Dates:
- Issue Date: March 12, 2024
- Submission Deadline for Pre-Applications: April 23, 2024, at 5:00 PM Eastern Time
- Pre-Application Response Date: May 28, 2024, at 11:59 PM Eastern Time
- Submission Deadline for Applications: July 16, 2024, at 11:59 PM Eastern Time

To apply, you must submit a pre-application. The pre-application should include the project title, applicant/institution information, and a clear description of the objectives and technical approach of the proposed research. The pre-application should not exceed two pages, excluding the title page, when printed using standard letter-size (8.5-inch 

{'query': 'Give me a summary of this FOA',
 'chat_history': [HumanMessage(content='Give me a summary of this FOA'),
  AIMessage(content=' Title: FY 2024 Funding for Accelerated, Inclusive Research (FAIR)\n\nThe Department of Energy (DOE), Office of Science (SC), has announced a Funding Opportunity Announcement (FOA) with number DE-FOA-0003207 and type "Initial." The CFDA number is 81.049. This FOA is for the FAIR program, which supports accelerated and inclusive research in various scientific disciplines.\n\nKey Dates:\n- Issue Date: March 12, 2024\n- Submission Deadline for Pre-Applications: April 23, 2024, at 5:00 PM Eastern Time\n- Pre-Application Response Date: May 28, 2024, at 11:59 PM Eastern Time\n- Submission Deadline for Applications: July 16, 2024, at 11:59 PM Eastern Time\n\nTo apply, you must submit a pre-application. The pre-application should include the project title, applicant/institution information, and a clear description of the objectives and technical approach of t