In [None]:
import langchain
from elasticsearch_dsl import Search
from langchain.agents import create_openai_functions_agent
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
)
from langchain.tools.retriever import create_retriever_tool
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS

In [None]:
import pyrootutils

path = pyrootutils.find_root(search_from="../../", indicator=[".git", "setup.cfg"])


# set root directory
pyrootutils.set_root(
    path=path,  # path to the root directory
    project_root_env_var=True,  # set the PROJECT_ROOT environment variable to root directory
    dotenv=True,  # load environment variables from .env if exists in root directory
    pythonpath=True,  # add root directory to the PYTHONPATH (helps with imports)
    cwd=True,  # change current working directory to the root directory (helps with filepaths)
)

In [None]:
from sotanaut.db_handling.es_connection import create_connection, get_connection

In [None]:
INDEX = "research-papers"

In [None]:
create_connection()
client = get_connection()

In [None]:
s = Search(using=client, index=INDEX).source(includes=[])
response = s.execute()

In [None]:
for hit in response:
    print(hit.title)
    text_splitter = CharacterTextSplitter()
    texts = text_splitter.split_text(hit.full_text)
    docs = [Document(page_content=t) for t in texts]
    break

In [None]:
documents = RecursiveCharacterTextSplitter(
    separators=[". ", "\n"], chunk_size=1000, chunk_overlap=200
).split_documents(docs)

In [None]:
vector = FAISS.from_documents(documents, OpenAIEmbeddings())
retriever = vector.as_retriever()

retriever_tool = create_retriever_tool(
    retriever,
    "Paper_1",
    "Search the paper titled BovineTalk: Machine Learning for Vocalization Analysis of Dairy Cattle under Negative Affective States. For any questions about cow births, you must use this tool!",
)

In [None]:
retriever.get_relevant_documents("What machine learning method authors used?")

In [None]:
from langchain import hub

# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages[0]

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [None]:
from langchain.agents import create_openai_functions_agent

agent = create_openai_functions_agent(llm, [retriever_tool], prompt)

In [None]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=[retriever_tool], verbose=True)

In [None]:
agent_executor.invoke({"input": "Do you know anything about the cows birth detection?"})

In [None]:
client.indices.delete(index=INDEX)

In [None]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [None]:
message_history = ChatMessageHistory()

agent_with_chat_history = RunnableWithMessageHistory(
    agent_executor,
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    lambda session_id: message_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [None]:
agent_with_chat_history.invoke(
    {"input": "Thanks do you know anything else?"},
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    config={"configurable": {"session_id": "<foo>"}},
)