In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [2]:
import weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore

weaviate_client = weaviate.connect_to_local()

In [3]:
embeddings = OpenAIEmbeddings()

# with open("../data/Tenacious.txt", encoding="windows-1252") as f:
#     state_of_the_union = f.read()
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# texts = text_splitter.split_text(state_of_the_union)

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("../data/Tenacious.pdf")
data = loader.load()

In [17]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=0)
docs = text_splitter.split_documents(data)

In [6]:
text_splitter = CharacterTextSplitter(
chunk_size = 1000,
chunk_overlap  = 0,
length_function = len,
)
texts = text_splitter.split_text(text)



In [7]:
# state_of_the_union = data

# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# texts = text_splitter.split_text(state_of_the_union)

In [8]:
docsearch = WeaviateVectorStore.from_texts(
    texts,
    embeddings,
    client=weaviate_client,
    metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))],
)

retriever = docsearch.as_retriever()

/root/TeamMate/.venv/lib/python3.11/site-packages/pydantic/main.py:1070: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/


In [9]:
import dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

dotenv.load_dotenv()

review_template_str = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}
"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"],
        template=review_template_str,
    )
)

review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)
messages = [review_system_prompt, review_human_prompt]

review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)

chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

review_chain = review_prompt_template | chat_model

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

output_parser = StrOutputParser()

review_chain = review_prompt_template | chat_model | output_parser

review_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | review_prompt_template
    | chat_model
    | StrOutputParser()
)

In [11]:
import random
import time

def get_current_wait_time(job: str) -> int | str:
    """Dummy function to generate fake wait times"""

    if job not in ["Software Engineer", "B", "C", "D"]:
        return f"job wait {job} does not exist"

    # Simulate API call delay
    time.sleep(1)

    return random.randint(0, 10000)

In [12]:
from langchain.agents import (
    create_openai_functions_agent,
    Tool,
    AgentExecutor,
)
from langchain import hub

In [13]:
tools = [
Tool(
    name="Documents",
    func=review_chain.invoke,
    description="""Useful when you need to answer questions
    about uploaded documents. Not useful for answering questions
    about specific For instance,
    if the question is "What does the document say about project deadlines?",
    the input should be "What does the document say about project deadlines?"
    """,
),

Tool(
    name="JobWait",
    func=get_current_wait_time,
    description="""Use when asked about current wait times to get a job.
    This tool can only get the current wait time for a job application and does
    not have any information about aggregate or historical wait times. This tool returns wait times in
    days. Do not pass the word "job" as input, only the job title itself. For instance, if the question is
    "How long will I wait for a Software Engineer position?", the input should be 
    "What is the wait time for a Software Engineer position".
    """,
)
]

agent_prompt = hub.pull("hwchase17/openai-functions-agent")

agent_chat_model = ChatOpenAI(
    model="gpt-3.5-turbo-1106",
    temperature=0,
)

agent = create_openai_functions_agent(
    llm=agent_chat_model,
    prompt=agent_prompt,
    tools=tools,
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    return_intermediate_steps=True,
    verbose=True,
)



In [14]:
agent_executor.invoke( {"input": "What is the company invoved?"} )



[1m> Entering new AgentExecutor chain...[0m


/root/TeamMate/.venv/lib/python3.11/site-packages/pydantic/main.py:1070: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/


[32;1m[1;3m
Invoking: `Documents` with `What is the company involved?`


[0m

/root/TeamMate/.venv/lib/python3.11/site-packages/pydantic/main.py:1070: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
/root/TeamMate/.venv/lib/python3.11/site-packages/pydantic/main.py:1070: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/


[36;1m[1;3mI don't have that information, sorry. Thanks for asking![0m[32;1m[1;3mI'm sorry, but I don't have that information. If you have a specific document or context in mind, please provide more details so that I can assist you better.[0m

[1m> Finished chain.[0m


/root/TeamMate/.venv/lib/python3.11/site-packages/pydantic/main.py:1070: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/


{'input': 'What is the company invoved?',
 'output': "I'm sorry, but I don't have that information. If you have a specific document or context in mind, please provide more details so that I can assist you better.",
 'intermediate_steps': [(AgentActionMessageLog(tool='Documents', tool_input='What is the company involved?', log='\nInvoking: `Documents` with `What is the company involved?`\n\n\n', message_log=[AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"__arg1":"What is the company involved?"}', 'name': 'Documents'}})]),
   "I don't have that information, sorry. Thanks for asking!")]}