In [None]:
%pip install langchain
%pip install unstructured
%pip install "unstructured[pdf]"
%pip install faiss-cpu
%pip install tqdm

In [25]:
from langchain.vectorstores import FAISS
from langchain.document_loaders import DirectoryLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever

from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

from dotenv import load_dotenv
load_dotenv()

True

In [26]:
loader = DirectoryLoader(
    "sources/",
    show_progress=True
)
data = loader.load()

100%|██████████| 1/1 [00:13<00:00, 13.14s/it]


In [27]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
splits = text_splitter.split_documents(data)

In [28]:
embedding = OpenAIEmbeddings()


In [29]:
store = FAISS.from_documents(
    documents=splits, 
    embedding=embedding
)
store.save_local("store")

In [17]:
store = FAISS.load_local("store", embeddings=embedding)

In [35]:
llm = ChatOpenAI(temperature=0, model="gpt-4")
retriever = MultiQueryRetriever.from_llm(
    retriever=store.as_retriever(), llm=llm
)

In [36]:
_retriever_func_k = 10


def retriever_func(query: str) -> str:
    documents = retriever.get_relevant_documents(query, top_k=_retriever_func_k)
    documents = [
        (document.metadata["source"], document.page_content.replace("\n", " "))
        for document in documents
    ]
    return "\n\n".join(
        f'# Source: {source}\n\n{content}' for source, content in documents
    )

In [37]:
tools = [
    Tool(
        name="Minerva Student Handbook QA System",
        func=retriever_func,
        description="useful for when you need to answer questions about the Minerva student handbook. Input should be a fully formed question.",
    )
]

In [38]:
agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

In [40]:
agent.run(
    "How much will I have to pay if I'm caught vaping in the residence hall?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis question is about a specific policy in the Minerva student handbook. I should use the Minerva Student Handbook QA System to find the answer.
Action: Minerva Student Handbook QA System
Action Input: What is the penalty for vaping in the residence hall?[0m
Observation: [36;1m[1;3m# Source: sources/Final Student Handbook Sept 14.pdf

Any student caught smoking, vaping, or possessing smoking apparatus or paraphernalia in any area of Minerva-leased properties will be charged $250 per occasion and may be subject to removal for continued violations.  5.5.1. Definitions  Smoking is deﬁned as inhaling, exhaling, burning, vaping, carrying or possessing any lighted cigarette, cigar, pipe, electronic cigarette that creates a vapor, hookah or other lit product and including the use of any substance, including but not limited to tobacco, cloves or marijuana. Illegal drugs include all forms of cannabis, including in amounts and form

"If you're caught vaping in the residence hall, you will have to pay a fine of $250 per occasion. Continued violations may lead to removal from the residence hall."

In [None]:
"If you're caught vaping in the residence hall, you will have to pay a fine of $250 per occasion. Continued violations may lead to removal from the residence hall."