In [36]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [37]:
loader = TextLoader("../data/Tenacious.txt", encoding="windows-1252")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

# loader = TextLoader("../data/Robinson_Advisory.txt", encoding="windows-1252")
# index = VectorstoreIndexCreator().from_loaders([loader])

# text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
#     chunk_size=1024, chunk_overlap=0
# )
# documents = loader.load_and_split()
# docs = text_splitter.split_documents(documents)
# embeddings = OpenAIEmbeddings()

Created a chunk of size 1343, which is longer than the specified 1000


In [38]:
import weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore

In [39]:
weaviate_client = weaviate.connect_to_local()
db = WeaviateVectorStore.from_documents(docs, embeddings, client=weaviate_client)

In [40]:
query = "What is this week challenge?"
docs = db.similarity_search(query)

# Print the first 100 characters of each result
for i, doc in enumerate(docs):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:100] + "...")


Document 1:
One of the most dangerous thoughts for a technical person is to make an
uninformed or emotion based ...

Document 2:
The solution should focus on specific students tackling specific challenges, such
as taking universi...

Document 3:
gv Tenacious
Intelligence

Corporation

Tenacious Talent - GenAl
UpSkilling

GEN-AI Challenge
TEAM-M...

Document 4:
3. Architectures:
e Explore different agent architectures, such as rule-based,
learning-based, and h...


In [41]:
with open("../data/Tenacious.txt", encoding="windows-1252") as f:
    state_of_the_union = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(state_of_the_union)

Created a chunk of size 1343, which is longer than the specified 1000


In [42]:
docsearch = WeaviateVectorStore.from_texts(
    texts,
    embeddings,
    client=weaviate_client,
    metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))],
)

retriever = docsearch.as_retriever()

In [43]:
from langchain_core.prompts import ChatPromptTemplate

template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

print(prompt)

input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question}\nContext: {context}\nAnswer:\n"))]


In [44]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [45]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is this week's challenge?")

'The challenge for this week is to focus on creating a personalized agent for optimizing trainee time and focus, as part of the Team-Mate project. The main objective is to enhance the training and educational experience by introducing a supportive digital assistant to help students manage tasks effectively and access necessary resources effortlessly. The deliverables include basic chatbot functionality, adaptive learning support, building an understanding of the trainee, setting up a RAG system, and creating a simple frontend interface for interaction.'

In [18]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]

In [19]:
print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:


In [50]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("what company is invoved in this project?"):
    print(chunk, end="", flush=True)

The company involved in this project is TeamMate, an independent company dedicated to helping students and trainees manage their time and focus more effectively. They aim to optimize schedules, facilitate collaboration, and provide tailored support to enhance learning experiences and job readiness. TeamMate introduces a personalized LLM-enhanced agent to serve as a supportive digital assistant for students.

In [47]:
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is this weeks challenge?")

"This week's challenge is to focus on developing prompt engineering proficiency, implementing RAG systems, designing intelligent agents, implementing vector stores, and analyzing data with LLMs. Thanks for asking!"

In [57]:
import dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

dotenv.load_dotenv()

review_template_str = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}
"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"],
        template=review_template_str,
    )
)

review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)
messages = [review_system_prompt, review_human_prompt]

review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)

chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

review_chain = review_prompt_template | chat_model

In [58]:
context = "I had a great stay!"
question = "Did anyone have a positive experience?"

review_chain.invoke({"context": context, "question": question})

AIMessage(content='Yes, I had a great stay! Thanks for asking!', response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 90, 'total_tokens': 102}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-e5c796fe-379c-429d-8c52-dfe20dc61425-0', usage_metadata={'input_tokens': 90, 'output_tokens': 12, 'total_tokens': 102})

In [59]:
from langchain_core.output_parsers import StrOutputParser

In [60]:
output_parser = StrOutputParser()

review_chain = review_prompt_template | chat_model | output_parser

In [61]:
context = "I had a great stay!"
question = "Did anyone have a positive experience?"

review_chain.invoke({"context": context, "question": question})

'Yes, I had a great stay! Thanks for asking!'

In [62]:
review_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | review_prompt_template
    | chat_model
    | StrOutputParser()
)

In [64]:

question = """What is this week challenge about?"""
review_chain.invoke(question)

'The challenge this week is about creating a personalized agent called Team-Mate to help students and trainees manage their time and focus effectively in educational settings like university courses, training programs, or boot camps. The project aims to optimize schedules, facilitate collaboration, and provide tailored support to enhance learning experiences and job readiness. Thanks for asking!'

In [90]:
import random
import time

def get_current_wait_time(job: str) -> int | str:
    """Dummy function to generate fake wait times"""

    if job not in ["Software Engineer", "B", "C", "D"]:
        return f"job wait {job} does not exist"

    # Simulate API call delay
    time.sleep(1)

    return random.randint(0, 10000)

In [91]:
from langchain.agents import (
    create_openai_functions_agent,
    Tool,
    AgentExecutor,
)
from langchain import hub

In [92]:
tools = [
Tool(
    name="Documents",
    func=review_chain.invoke,
    description="""Useful when you need to answer questions
    about uploaded documents. Not useful for answering questions
    about specific For instance,
    if the question is "What does the document say about project deadlines?",
    the input should be "What does the document say about project deadlines?"
    """,
),

Tool(
    name="JobWait",
    func=get_current_wait_time,
    description="""Use when asked about current wait times to get a job.
    This tool can only get the current wait time for a job application and does
    not have any information about aggregate or historical wait times. This tool returns wait times in
    days. Do not pass the word "job" as input, only the job title itself. For instance, if the question is
    "How long will I wait for a Software Engineer position?", the input should be 
    "What is the wait time for a Software Engineer position".
    """,
)
]

agent_prompt = hub.pull("hwchase17/openai-functions-agent")

agent_chat_model = ChatOpenAI(
    model="gpt-3.5-turbo-1106",
    temperature=0,
)

agent = create_openai_functions_agent(
    llm=agent_chat_model,
    prompt=agent_prompt,
    tools=tools,
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    return_intermediate_steps=True,
    verbose=True,
)



In [93]:
agent_executor.invoke( {"input": "How long will I wait for a Software Engineer position?"} )



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `JobWait` with `Software Engineer`


[0m[33;1m[1;3m3037[0m[32;1m[1;3mThe current wait time for a Software Engineer position is approximately 3037 days.[0m

[1m> Finished chain.[0m


{'input': 'How long will I wait for a Software Engineer position?',
 'output': 'The current wait time for a Software Engineer position is approximately 3037 days.',
 'intermediate_steps': [(AgentActionMessageLog(tool='JobWait', tool_input='Software Engineer', log='\nInvoking: `JobWait` with `Software Engineer`\n\n\n', message_log=[AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"__arg1":"Software Engineer"}', 'name': 'JobWait'}}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 247, 'total_tokens': 264}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_b953e4de39', 'finish_reason': 'function_call', 'logprobs': None}, id='run-530c72e7-7ed2-4817-9a7f-6e1ec51d66d5-0', usage_metadata={'input_tokens': 247, 'output_tokens': 17, 'total_tokens': 264})]),
   3037)]}