In [None]:
%%capture
!pip install langchain>=0.1.17 openai>=1.13.3 langchain_openai>=0.1.6 transformers>=4.40.1 datasets>=2.18.0 accelerate>=0.27.2 sentence-transformers>=2.5.1 duckduckgo-search>=5.2.2 langchain_community
!CMAKE_ARGS="-DLLAMA_CUDA=on" pip install llama-cpp-python==0.2.69

In [None]:
from langchain import PromptTemplate, LLMChain, LlamaCpp

In [None]:
# Make sure the model path is correct for your system
llm = LlamaCpp(
    model_path="Phi-3-mini-4k-instruct-fp16.gguf",
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=2048,
    seed=42,
    verbose=False
)

llm.invoke("Hi! My name is Maarten. What is 2 + 1?")

In [None]:
# Create the prompt_template with input_prompt variable
template = """<s><|user|>
{input_prompt}<|end|>
<|assistant|>"""

prompt = PromptTemplate(
    template=template,
    input_variables=["input_prompt"]
)

# Basic chain : Prompt + LLM
basic_chain = prompt | llm

# Use the chain
basic_chain.invoke(
    {
        "input_prompt": "Hi! My name is Maarten. What is 2 + 1?"
    }
)

In [None]:
# A chain with multiple prompts. Create a chain for the title of our story.
template = """<s><|user|>
Create a title for a tory about {summary}. Only return the title.<|end|>
<|assistant|>"""

title_prompt = PromptTemplate(template=template, input_variables=["summary"])
title = LLMChain(llm=llm, prompt=title_prompt, output_key='title')

title.invoke({"summary": "A girl that lost her mother."})

In [None]:
# Create a chain for the character description using the summary and the title
template = """<s><|user|>
Describe the main character of a story about {summary} with the title {title}. Use only two sentences.<|end|>
<|assistant|>"""

character_prompt = PromptTemplate(template=template, input_variables=["summary", "title"])
character = LLMChain(llm=llm, prompt=character_prompt, output_key='character')

#character.invoke({"summary": "A girl that lost her mother.", "title": "The Lost Mother"})


In [None]:
# Create a chain for the story using the summary, title and character description
template = """<s><|user|>
Create a story about {summary} with the title {title}. The main character is: {character}. Only return the story and it cannot be longer than one paragraph.<|end|>
<|assistant|>"""

story_prompt = PromptTemplate(template=template, input_variables=["summary", "title", "character"])
story = LLMChain(llm=llm, prompt=story_prompt, output_key='story')

In [None]:
# Combine all three components to create the full chain
llm_chain = title | character | story

In [None]:
llm_chain.invoke("A girl that lost her mother.")

### Memory: Helping LLMs to Remember Conversations

In [None]:
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory, ConversationSummaryMemory

In [None]:
# Conversation Buffere Memory: Append the chat history to the prompt

# Create an updated prompt template to include chat history
template = """<s><|user|>Current conversation: {chat_history}
{input_prompt}<|end|>
<|assistant|>"""

prompt = PromptTemplate(
    template=template,
    input_variables=["input_prompt", "chat_history"]
)
"""

In [None]:
# Define the type of memory we will use
memory = ConversationBufferMemory(memory_key="chat_history")

# Chain the LLM, prompt and memory together
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=memory
)

In [None]:
# Generate a conversation and ask a basic question
llm_chain.invoke({"input_prompt": "Hi! My name is Maarten. What is 2 + 1?"})

# Does the LLM remember the name we gave it?
llm_chain.invoke({"input_prompt": "What is my name?"})

### Windowed Conversation Buffer

In [None]:
# Retain only the last two conversations in the memory
memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history")

# Chain the LLM, prompt and memory together
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=memory
)

In [None]:
# Ask two questions and generate two conversations in its memory

llm_chain.predict(input_prompt="Hi! My name is Maarten and I am 33 years old. What is 2 + 1?")
llm_chain.predict(input_prompt="What is 3 + 3?")

In [None]:
# Check whether it knows the name we gave it
llm_chain.invoke({"input_prompt": "What is my name?"})

In [None]:
# Check whether it knows the age we gave it (Note that llm_chain is supposed to remember only last two conversations)
llm_chain.invoke({"input_prompt": "What is my age?"})

### Conversation Summary

In [None]:
# Create a summary prompt template
summary_prompt_template = """<s><|user|>Summarize the conversations and update with the new lines.
Current summary: {summary}

New lines of conversation: {new_lines}

New summary:
<|end|>
<|assistant|>"""

summary_prompt = PromptTemplate(
    template=summary_prompt_template,
    input_variables=["summary", "new_lines"])

In [None]:
# Define the type of memory we will use
memory = ConversationSummaryMemory(llm=llm, memory_key="chat_history", prompt=summary_prompt)

# Chain the LLM, prompt and memory together
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=memory
)

In [None]:
# Generate a conversation and ask for the name
llm_chain.invoke({"input_prompt": "Hi! My name is Maarten. What is 2 + 1?"})
llm_chain.invoke({"input_prompt": "What is my name?"})

In [None]:
# Check whether everything has been summarized thus far
llm_chain.invoke({"input_prompt": "What was the first question I asked?"})

In [None]:
# Accessing the memory variable that we created to check what the summary is thus far
memory.load_meamory_variables({})

# Agents: Creating a System of LLMs

### ReAct in LangChain with OpenAI's GPT-3.5

In [None]:
import os
from langchain_openai import ChatOpenAI
from langchain.agents import load_tools, Tool
from langchain.agents import AgentExecutor, create_react_agent
from langchain.tools import DuckDuckGoSearchRun

In [None]:
# Load OpenAI's LLMs with LangChain
os.environ["OPENAI_API_KEY"] = ""
openai_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
# Create the ReAct template
react_template = """Answer the following questions as best as you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: The action to take should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought: {agent_scratchpad}
"""

prompt = PromptTemplate(
    template=react_template,
    input_variables=["tools", "tool_names", "input", "agent_scratchpad"]
)

In [None]:
# You can create the tool to pass to an agent
search = DuckDuckGoSearchResults()
search_tool = Tool(
    name="duckduck",
    description="A web search engine. Use this to as a search engine for general queries.",
    func=search.run
)

# Prepare tools
tools = load_tools(["llm_math"], llm=openai_llm)
tools.append(search_tool)

In [None]:
# Construct the ReAct agent

agent = create_react_agent(openai_llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent, tools=tools, verbose=True, handle_parser_errors=True
)

In [None]:
# What is the price of a MacBook Pro?
agent_executor.invoke(
    {
        "input": "What is the current price of a MacBook Pro in USD? How much would it cost in EUR if the exchange rate is 0.85 EUR for 1 USD."
    }
)