# Langchain & llama_index 練習場
**目的**
- 藉由各種小問題來嘗試與熟悉Langchain與llama_index的功能。
- 主要是想要藉由llm來找出股票代碼或是金融報表中的資訊。
- 使用模型: ChatGroq (llama3-70b-8192)

In [28]:
from langchain.tools import BaseTool, StructuredTool, tool
from langchain.agents import Tool
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import os
import pandas as pd
import numpy as np
from langchain_experimental.agents.agent_toolkits import create_csv_agent, create_pandas_dataframe_agent
from langchain.agents import create_tool_calling_agent, AgentExecutor, create_react_agent
from dotenv import load_dotenv
# from langchain_openai import ChatOpenAI
from langchain import hub
from llama_index.experimental.query_engine import PandasQueryEngine
from langchain_community.vectorstores import Chroma
from IPython.display import Markdown, display
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, chain
from llm_search import duck_search
from langchain.agents import initialize_agent
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_core.pydantic_v1 import BaseModel, Field
from bs4 import BeautifulSoup
import requests

In [29]:
load_dotenv() # 讀取.env檔案內的APIs

True

In [30]:
"""建立llm，並且同時輸入測試用的資料"""
# LLM & Parser
groq_llm = ChatGroq(temperature=0, model="llama3-70b-8192")
parser = StrOutputParser()
# Data
testStocks = ["聯電","台積電","華邦電", "台新金"]
# stock_code_path = os.path.join("stocknumbers", "TWstockNums.csv")
# stock_code_df = pd.read_csv(stock_code_path)
# stock_code_df.head(3)

In [31]:
# 季度損益表資料
import yfinance as yf
stock = yf.Ticker("2303.tw")
df = stock.quarterly_incomestmt

### llm 基本使用

In [None]:
"""Langchain 官方基本教學，如何使用chatprompttemplate來建立prompt"""
prompt = ChatPromptTemplate.from_template("tell me a joke about {topic}")
chain = prompt | groq_llm | StrOutputParser()
chain.invoke({"topic": "bear"})

'Why did the bear go to the doctor?\n\nBecause it had a grizzly cough!'

### basic Tool calling
**目的**
- 整理tool calling的建立方式

In [21]:
query = "3加5是多少? 然後3減5是多少?"

In [22]:
"""第一種: 使用@tool直接快速建立一個tool"""
@tool
def plus(a: int, b:int):
    """將a與b相加"""
    return a + b

@tool
def minus(a:int, b:int):
    """將a與b相減"""
    return a - b

tools = [plus, minus]

In [23]:
groq_tools = groq_llm.bind_tools(tools)
groq_tools.invoke(query).tool_calls

[{'name': 'plus',
  'args': {'a': 3, 'b': 5},
  'id': 'call_99mk',
  'type': 'tool_call'},
 {'name': 'minus',
  'args': {'a': 3, 'b': 5},
  'id': 'call_8w4r',
  'type': 'tool_call'}]

In [24]:
"""第二種: 使用class"""
class add(BaseModel):
    """將a與b相加"""

    a: int = Field(..., description="First integer")
    b: int = Field(..., description="Second integer")


class minus(BaseModel):
    """將a與b相減"""

    a: int = Field(..., description="First integer")
    b: int = Field(..., description="Second integer")

tools = [add, minus]

In [25]:
groq_tools = groq_llm.bind_tools(tools)
groq_tools.invoke(query).tool_calls

[{'name': 'add',
  'args': {'a': 3, 'b': 5},
  'id': 'call_hfn9',
  'type': 'tool_call'},
 {'name': 'minus',
  'args': {'a': 3, 'b': 5},
  'id': 'call_ejxv',
  'type': 'tool_call'}]

In [26]:
"""第三種: 綜合使用class (設定args_schema) 與 @tool"""
# Define input schema
class numberInput(BaseModel):
    a: int= Field(description="first integer")
    b: int= Field(description="second integer")
@tool(args_schema=numberInput)
def plus(a: int, b:int):
    """useful for adding two numbers together"""
    c = a + b
    return f"{a} + {b} is {c}"

@tool(args_schema=numberInput)
def minus(a: int, b:int):
    """useful for two numbers minus each other"""
    c = a - b
    return f"{a} - {b} is {c}"

tools = [plus, minus]

In [27]:
groq_tools = groq_llm.bind_tools(tools)
groq_tools.invoke(query).tool_calls

[{'name': 'plus',
  'args': {'a': 3, 'b': 5},
  'id': 'call_pvvf',
  'type': 'tool_call'},
 {'name': 'minus',
  'args': {'a': 3, 'b': 5},
  'id': 'call_tge5',
  'type': 'tool_call'}]

### advanced tool calling
**目的**
- 使用複雜的方式設計tool，讓llm可以有更強的取得外部資訊功能。
- 爬取goodinfo台灣股市資訊網中的股價財務資訊評分表。
- 讓LLM來讀取最新資訊後回答query。
- 最後面部分是用來測試各種agent對於抓取url的效果。

**心得**
- 表現有時佳，有時候不好，會遇到LLM無法抓取到url的問題。(或許可能跟model本身能力有關)，須注意這樣的情況。

In [32]:
def finDetail(url: str, headers: dict) -> pd.DataFrame:
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'
    # print(response.text)
    soup = BeautifulSoup(response.text, 'lxml')
    data = soup.select_one("#divDetail")
    df = pd.read_html(data.prettify())
    df = df[0]
    return df

In [33]:
"""抓取到的資料形式"""
headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
        "Cookie":"IS_TOUCH_DEVICE=F; SCREEN_SIZE=WIDTH=1536&HEIGHT=864; _ga=GA1.1.134640417.1718698272; __gads=ID=b42b9ba3a594ff97:T=1718698274:RT=1718698274:S=ALNI_MZ1kFXN3htHHdY3ZGXMWn10JYhrSg; __gpi=UID=00000e5315d89fdd:T=1718698274:RT=1718698274:S=ALNI_MbuXU-bLcaSUcqYleIOzqDTTFxMKw; __eoi=ID=0b4097d725162137:T=1718698274:RT=1718698274:S=AA-AfjYZbznwQCxhcZLVvPaxBsjM; CLIENT%5FID=20240618161154117%5F156%2E59%2E34%2E83; TW_STOCK_BROWSE_LIST=1785; FCNEC=%5B%5B%22AKsRol9Vv6qLdMCxvuvjU1sRPiybs9UpKWKftBn8q79OO4v-hfpBOUrtI8lvc5IyBwM6YVFPrwpug-bkB5ZnUxeYa32E4ABhLsrT0WBiN7XtnBuMv34o7xKNpJr4oHhvNFGW7d-M66kzUq0TLx_w8ci83BQW9DAuiA%3D%3D%22%5D%5D; _ga_0LP5MLQS7E=GS1.1.1718698272.1.1.1718698563.60.0.0"
        }
url = "https://goodinfo.tw/tw/StockFinGrade.asp?STOCK_ID=1785"
df = finDetail(url=url, headers=headers)
df.head(3)

Unnamed: 0,類別,評分項目,％,總分,得分,百分 換算,評分標準,說明
0,資產佔比,現金 (%),8.9,3,1,33,~ 5 ：0 分 5 ~ 10 ：1 分 10 ~ 15 ：2 分 15 ~ ...,現金及約當現金 / 資產總額 x 100% 現金是所有資產中流動性最大的一項資產，現金部位...
1,資產佔比,應收帳款 (%),7.2,3,3,100,~ 10 ：3 分 10 ~ 20 ：2 分 20 ~ 30 ：1 分 30 ~...,所有應收款項合計 / 資產總額 x 100% 應收帳款應維持在一定比率以下，應收帳款比率越...
2,資產佔比,存貨 (%),43.7,3,0,0,~ 10 ：3 分 10 ~ 20 ：2 分 20 ~ 30 ：1 分 30 ~...,存貨 / 資產總額 x 100% 存貨比率應維持在一定水準才算健康，存貨水位過高的公司，通...


In [34]:
"""進階class設定"""
class finScore(BaseTool):
    name = "financial_statement_search"
    description = "useful for when you need to search financial statement score to answer question."
    
    def _run(self, url: str):
        headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
        "Cookie":"IS_TOUCH_DEVICE=F; SCREEN_SIZE=WIDTH=1536&HEIGHT=864; _ga=GA1.1.134640417.1718698272; __gads=ID=b42b9ba3a594ff97:T=1718698274:RT=1718698274:S=ALNI_MZ1kFXN3htHHdY3ZGXMWn10JYhrSg; __gpi=UID=00000e5315d89fdd:T=1718698274:RT=1718698274:S=ALNI_MbuXU-bLcaSUcqYleIOzqDTTFxMKw; __eoi=ID=0b4097d725162137:T=1718698274:RT=1718698274:S=AA-AfjYZbznwQCxhcZLVvPaxBsjM; CLIENT%5FID=20240618161154117%5F156%2E59%2E34%2E83; TW_STOCK_BROWSE_LIST=1785; FCNEC=%5B%5B%22AKsRol9Vv6qLdMCxvuvjU1sRPiybs9UpKWKftBn8q79OO4v-hfpBOUrtI8lvc5IyBwM6YVFPrwpug-bkB5ZnUxeYa32E4ABhLsrT0WBiN7XtnBuMv34o7xKNpJr4oHhvNFGW7d-M66kzUq0TLx_w8ci83BQW9DAuiA%3D%3D%22%5D%5D; _ga_0LP5MLQS7E=GS1.1.1718698272.1.1.1718698563.60.0.0"
        }
        response = requests.get(url, headers=headers)
        response.encoding = 'utf-8'
        html_content = response.text
        
        def get_html_content(html_content: str):
            soup = BeautifulSoup(html_content, 'lxml')
            data = soup.select_one("#divDetail")
            df = pd.read_html(data.prettify())
            df = df[0]
            return df
        
        result = get_html_content(html_content)

        return result
    
    def _arun(self, url: str):
        raise NotImplementedError("This tool does not support async")

# Build tools
fs_getter = finScore()
tools = [fs_getter]

In [35]:
# conversational agent memory
memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=3,
    return_messages=True
)

# 建立agent
conversational_agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=groq_llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=memory
)
# 修改prompt可以使用
# conversational_agent.agent.llm_chain.prompt.messages[0].prompt.template = fixed_prompt

In [36]:
res = conversational_agent.invoke("How's the financial performance of this stock? The url is on : https://goodinfo.tw/tw/StockFinGrade.asp?STOCK_ID=1785")
res["output"]



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```
{
    "action": "financial_statement_search",
    "action_input": "https://goodinfo.tw/tw/StockFinGrade.asp?STOCK_ID=1785"
}
```[0m
Observation: [36;1m[1;3m             類別         評分項目            ％  總分  得分  百分  換算  \
0          資產佔比       現金 (%)          8.9   3   1      33   
1          資產佔比     應收帳款 (%)          7.2   3   3     100   
2          資產佔比       存貨 (%)         43.7   3   0       0   
3          資產佔比    基金及投資 (%)          2.9   3   3     100   
4          資產佔比     其他資產 (%)          0.6   2   2     100   
5          負債佔比     負債總額 (%)           51   5   0       0   
6          獲利能力        營業毛利率           12   5   2      40   
7          獲利能力        營業利益率            6   5   1      20   
8          獲利能力        業外損益率         -0.3   2   1      50   
9          獲利能力        稅後淨利率          4.7   5   1      20   
10         獲利能力      股東權益報酬率          8.2   4   1      25   
11         獲利能力        資產報酬率            4   

"Based on the financial statement search, the stock's financial performance is mediocre, with a total score of 47 out of 100. The company's asset utilization, profitability, and cash flow are areas of concern, but its return on equity is relatively high."

#### 測試各種agent

In [38]:
"""可以用，但URL會一直找不到，可能要用其他的方式去解決url問題。"""
from langchain.agents import AgentExecutor, create_react_agent, create_tool_calling_agent, create_structured_chat_agent

In [39]:
# 第一種prompt (直接修改template)
prompt_one = hub.pull("hwchase17/react")
prompt_one.template = 'You are a profesional financial analyst. You MUST answer my questions in Traditional Chinese. First, you have access to the url : {URL}. Then, Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take.\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}'

In [41]:
"""會遇到agent抓取到不對的url情形"""
agent = create_react_agent(llm=groq_llm, tools=tools, prompt=prompt_one)
agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True)

# agent = create_tool_calling_agent(llm=groq_llm, tools=tools, prompt=prompt_one)
# agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True)

# agent = create_structured_chat_agent(llm=groq_llm, tools=tools, prompt=prompt_one)
# agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True)

url = 'https://goodinfo.tw/tw/StockFinGrade.asp?STOCK_ID=1785'
agent_executor.invoke({
    "input" :  "How's the financial performance of this stock?",
    "URL": url
})

MissingSchema: Invalid URL 'financial_statement_search(1785)': No scheme supplied. Perhaps you meant https://financial_statement_search(1785)?

In [40]:
# 第二種prompt (直接修改template)
prompt_two = hub.pull("hwchase17/openai-functions-agent")
prompt_two.messages[0].prompt.template

'You are a helpful assistant'

In [42]:
"""也會遇到agent抓取到不對的url情形"""
# agent = create_react_agent(llm=groq_llm, tools=tools, prompt=prompt_two)
# agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True)

agent = create_tool_calling_agent(llm=groq_llm, tools=tools, prompt=prompt_two)
agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True)

# agent = create_structured_chat_agent(llm=groq_llm, tools=tools, prompt=prompt_two)
# agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True)

url = 'https://goodinfo.tw/tw/StockFinGrade.asp?STOCK_ID=1785'
agent_executor.invoke({
    "input" :  "How's the financial performance of this stock?",
    "URL": url
})

MissingSchema: Invalid URL '': No scheme supplied. Perhaps you meant https://?

### Custom Function Calling
**目的**: 學習使用各種建立的tool calling方法去找出query中的股票名稱相對應的股票代碼。

**方法**:
- 先創立一個查詢股票代碼的自訂函數: stockNumber()
- 再利用tool decorator去讓LLM可以使用到自訂函數 *stockNumber()*
- ittemgetter會找到invoke中的stock input，然後RunnableLambda讓stockNumber變成Runnable性質，所以可以去收到invoke的input。

**心得**:
- 準確度目前看來還不錯，可以留著以後用。

In [None]:
def stockNumber(stocknames: list, df: pd.DataFrame = stock_code_df) -> dict:
    stocknums = {}
    try:
        for stock in stocknames:
            index = np.where(df["股票名稱"] == stock)[0][0]
            number = df.iloc[index]["股票代碼"]
            stocknums[stock] = number
        return stocknums
    except:
        raise IndexError("股票名稱有誤!")

In [None]:
@tool
def stockNumSearch(query: str):
    """查詢股票代碼可以用"""
    prompt = ChatPromptTemplate.from_template("{stocks}的股票代碼是多少?")
    chain = (
        {
            "stocks": itemgetter("stocks") | RunnableLambda(stockNumber)
        }
    ) | prompt | groq_llm
    response = chain.invoke({"stocks": testStocks})
    return response.content

In [None]:
prompt = ChatPromptTemplate.from_template("{stocks}的股票代碼是多少?")
chain = (
    {
        "stocks": itemgetter("stocks") | RunnableLambda(stockNumber)
    }
) | prompt | groq_llm | parser

response = chain.invoke({"stocks": testStocks})
response

'According to the dictionary you provided, the stock codes are:\n\n* 聯電: 2303\n* 台積電: 2330\n* 華邦電: 2344\n* 台新金: 2887'

### llama-index pandas query engine
- **目的**
- 使用llama-index pandas query engine去根據query去找出股票中的季度損益表的正確數值。
- 測試llama-index pandas query engine找股票代碼的能力是如何。

**心得**
- 無法在這裡直接使用到langchain提供的Groq api，要使用llama_index提供的groq才能呼叫模型。
- 找出季度報表的數值效果不好，但找出單次單一的股票代碼還可行，需要去處理ouput format的問題。
- 有人提到pandasqueryengine容易出錯，建議可使用pandas ai => 但實際使用過後，效果也沒有多好。
=> https://stackoverflow.com/questions/77445728/pandasqueryengine-from-llama-index-is-unable-to-execute-code-with-the-following

In [26]:
from llama_index.llms.groq import Groq
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from llama_index.core.output_parsers import LangchainOutputParser
from llamaprompts import instruction_str, new_prompt

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


In [29]:
"""找出季度報表中的正確數值"""
llamaindex_groq_llm = Groq(temperature=0, model="llama3-8b-8192") # output_parser=output_parser 先不用，會有錯誤
# llama-index pandas queryengine
query_engine = PandasQueryEngine(df=df, verbose=True, llm=llamaindex_groq_llm, instruction_str=instruction_str)
query_engine.update_prompts({"pandas_prompt": new_prompt})
query_engine.query("What is the operating revenue of 2024-03-31")

> Pandas Instructions:
```
df.loc[0, 'Normalized EBITDA']
```
> Pandas Output: There was an error running the output as Python code. Error message: 'Normalized EBITDA'


Traceback (most recent call last):
  File "C:\Users\ASUS\AppData\Roaming\Python\Python311\site-packages\pandas\core\indexes\base.py", line 3802, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 165, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 5745, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 5753, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Normalized EBITDA'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\ASUS\anaconda3\envs\workEnv\Lib\site-packages\llama_index\experimental\query_engine\pandas\output_parser.py", line 54, in default_output_processor
    output_str = str(safe_eval(module_end_str, global_vars

Response(response="There was an error running the output as Python code. Error message: 'Normalized EBITDA'", source_nodes=[], metadata={'pandas_instruction_str': "df.loc[0, 'Normalized EBITDA']", 'raw_pandas_output': "There was an error running the output as Python code. Error message: 'Normalized EBITDA'"})

In [14]:
df.columns

DatetimeIndex(['2024-03-31', '2023-12-31', '2023-09-30', '2023-06-30',
               '2023-03-31'],
              dtype='datetime64[ns]', freq=None)

In [24]:
df.columns = df.columns.astype("str")

In [None]:
"""找出query相對應的股票代碼"""
# 可以用，但目前要處理parser問題
llamaindex_groq_ans = []
for stock in testStocks:
    query = "[{}]的股票代碼是多少?".format(stock)
    response = query_engine.query(query)
    llamaindex_groq_ans.append(str(response))

llamaindex_groq_ans

> Pandas Instructions:
```
df.loc[df['股票名稱'] == '聯電']['股票代碼'].values[0]
```
> Pandas Output: 2303
> Pandas Instructions:
```
df.loc[df['股票名稱'] == '台積電', '股票代碼']
```
> Pandas Output: 275    2330
Name: 股票代碼, dtype: object
> Pandas Instructions:
```
df.loc[df['股票名稱'] == '華邦電', '股票代碼']
```
> Pandas Output: 282    2344
Name: 股票代碼, dtype: object


['2303',
 '275    2330\nName: 股票代碼, dtype: object',
 '282    2344\nName: 股票代碼, dtype: object']

### csv, pandas agent (Langchain)
**目的**
- 找出資料中的相對應股票代碼。
- 了解csv/panda agent 理解財報的能力。

**方法**
- 使用langchain提供的create_csv_agent，並將allow_dangerous_code設為True，否則不能跑。

**心得**
- 可以用，而且兩者效果也都不錯。目前使用狀況比llamaindex pandas queryengine與 pandasai 好用。

In [49]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "return the response in a json. The format is `stock name` : `stock number`")
])

In [73]:
"""測試一個股票"""
agent = create_csv_agent(
    groq_llm,
    stock_code_path,
    verbose=True,
    agent_type= "zero-shot-react-description",
    handle_parsing_errors=True,
    allow_dangerous_code  = True
)
agent.run("華邦電的股票代碼是多少?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the row in the dataframe where the stock name is "華邦電" and then get the corresponding stock code.

Action: python_repl_ast
Action Input: `df[df['股票名稱'] == '華邦電']`[0m[36;1m[1;3m     股票代碼 股票名稱         上市日 市場類別  產業類別
282  2344  華邦電  1995/10/18   上市  半導體業[0m[32;1m[1;3mIt looks like the observation is an empty dataframe, which means there is no row in the dataframe where the stock name is "華邦電". 

Action: python_repl_ast
Action Input: `df.head()` (to double-check the dataframe)[0m[36;1m[1;3mSyntaxError: invalid syntax (<unknown>, line 1)[0m[32;1m[1;3mIt looks like I made a mistake in my previous action. The `print` statement is missing.

Action: python_repl_ast
Action Input: `print(df.head())`[0m[36;1m[1;3m   股票代碼 股票名稱         上市日 市場類別  產業類別
0  1101   台泥  1962/02/09   上市  水泥工業
1  1102   亞泥  1962/06/08   上市  水泥工業
2  1103   嘉泥  1969/11/14   上市  水泥工業
3  1104   環泥  1971/02/01   上市  水泥工業
4  1108  

'The stock code of "華邦電" is 2344.'

In [44]:
"""測試多個股票"""
groq_csv_ans_run = []
groq_csv_ans_invoke = []
for stock in testStocks:
    query = "{}的股票代碼是多少?".format(stock)
    run = await agent.arun(query)
    groq_csv_ans_run.append(run)
    invoke = agent.invoke(query)
    groq_csv_ans_invoke.append(invoke)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the row where the stock name is "聯電" and then get the stock code from that row.

Action: python_repl_ast
Action Input: `df.loc[df['股票名稱'] == '聯電', '股票代碼']`[0m[36;1m[1;3m261    2303
Name: 股票代碼, dtype: object[0m[32;1m[1;3mIt looks like the observation is a pandas Series with a single value, which is the stock code we're looking for.

Thought: I now know the final answer
Final Answer: The stock code of 聯電 is 2303.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the row where the stock name is "聯電" and then get the stock code from that row.

Action: python_repl_ast
Action Input: `df.loc[df['股票名稱'] == '聯電', '股票代碼']`[0m[36;1m[1;3m261    2303
Name: 股票代碼, dtype: object[0m[32;1m[1;3mIt looks like the observation is a pandas Series with a single value, which is the stock code we're looking for.

Thought: I now know the final answer
F

In [45]:
groq_csv_ans_run

['The stock code of 聯電 is 2303.',
 '台積電的股票代碼是2330.',
 'The stock code of "華邦電" is 2344.',
 'The stock code of 台新金 is 2887.']

In [62]:
"""測試llm理解財報的能力"""
income_agent = create_pandas_dataframe_agent(
        groq_llm,
        df=df,
        verbose=True,
        agent_type= "zero-shot-react-description", # zero-shot-react-description
        handle_parsing_errors=True,
        allow_dangerous_code  = True
    )



In [74]:
res = await income_agent.arun("You are a financial expert to summerize what's the financial status of 2024-03-31?")
res



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To summarize the financial status of 2024-03-31, I need to analyze the data in the dataframe.

Action: python_repl_ast
Action Input: df.loc['Tax Effect Of Unusual Items', '2024-03-31'][0m[36;1m[1;3m62570718.095299[0m[32;1m[1;3mThe tax effect of unusual items for 2024-03-31 is approximately 62.57 million.

Action: python_repl_ast
Action Input: df.loc['Normalized EBITDA', '2024-03-31'][0m[36;1m[1;3m23654234000.0[0m[32;1m[1;3mThe normalized EBITDA for 2024-03-31 is approximately 23.65 billion.

Action: python_repl_ast
Action Input: df.loc['Total Unusual Items', '2024-03-31'][0m[36;1m[1;3m347383000.0[0m[32;1m[1;3mAction: python_repl_ast
Action Input: df.loc['Total Unusual Items Excluding Goodwill', '2024-03-31'][0m[36;1m[1;3m347383000.0[0m[32;1m[1;3mThe total unusual items and total unusual items excluding goodwill for 2024-03-31 are both approximately 347.38 million.

Action: python_repl_ast
Acti

'As of 2024-03-31, the company has a tax effect of unusual items of approximately 62.57 million, a normalized EBITDA of approximately 23.65 billion, and total unusual items (including and excluding goodwill) of approximately 347.38 million. The tax rate for calculations is around 18.01%.'

### 一般方法
**目的**
- 設定簡單的函數去找出股票的相對應代碼。

In [None]:
index = np.where(stock_code_df["股票名稱"] == "台積電")[0][0]
stock_code_df.iloc[index]["股票代碼"]

'2330'

In [None]:
np.where(stock_code_df["股票名稱"] == "台積電")[0][0]

275

In [None]:
stock_code_df.iloc[275]["股票代碼"]

'2330'

In [None]:
stock_code_df.iloc[275]["產業類別"]

'半導體業'

In [15]:
def stockNumber(stocknames: list, df: pd.DataFrame) -> dict:
    stocknums = {}
    try:
        for stock in stocknames:
            index = np.where(df["股票名稱"] == stock)[0][0]
            number = df.iloc[index]["股票代碼"]
            stocknums[stock] = number
        return stocknums
    except:
        raise IndexError("股票名稱有誤!")

In [16]:
stockNumber(stocknames=["台積電"], df=stock_code_df)

{'台積電': '2330'}

In [21]:
testlist = ["華邦", "台積電"]
while (inputs := input('輸入股票 (q to quit)') )!= "q":
    try:
        inputs = [inputs]
        result = stockNumber(stocknames=inputs, df=stock_code_df)
        print(result)
        break
    except:
        print("請輸入正確的股票名稱")

台積電
<class 'str'>
<class 'list'>
['台積電']
{'台積電': '2330'}


In [23]:
while (inputs := input('輸入股票 (q to quit)') )!= "q":
    restart = False
    try:
        inputs = [inputs]
        result = stockNumber(stocknames=inputs, df=stock_code_df)
        break
    except:
        print("請輸入正確的股票名稱")
        restart = True
    if restart:
        continue

請輸入正確的股票名稱
here


### RAG (Langchain)
- indexing
 -> embedding: 目前主流看起來是用OpenAI Embeddings()，但就是會需要錢。所以來試試看用llama index提供的huggingface來做embedding。
- vector store : https://docs.llamaindex.ai/en/stable/examples/embeddings/Langchain/
- retrieval
- Q&A

References:
1. https://python.langchain.com/v0.1/docs/modules/data_connection/vectorstores/

In [43]:
# # Load data
# loader = PyMuPDFLoader("xxx.pdf")
# data = loader.load()

# # Split data
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=50, chunk_overlap=5)
# all_splits = text_splitter.split_documents(data)

# # Embedding
# model_name = "sentence-transformers/all-MiniLM-L6-v2"
# model_kwargs = {'device': 'cpu'}
# embedding = HuggingFaceEmbeddings(model_name=model_name,
#                                   model_kwargs=model_kwargs)
# # vector store
# persist_directory = 'db-techs-02'
# vectordb = Chroma.from_documents(documents=all_splits, embedding=embedding, persist_directory=persist_directory)
# retriever = vectordb.as_retriever()

### Conversational Agent
**目的**
- 熟悉conversational agent的程式碼

**方式**
- 使用duckduckgo作為外部資料來源，皆收到資料後傳給agent去使用適當的tool來回答query。
- 修改agent中原始設定的prompt來讓agent使用繁體中文回答。

**心得**
- 好用，但可惜的是initialize_agent後續會被汰除掉，需要學習其他agent使用方式。

### create_tool_calling_agent 測試

In [25]:
@tool
def summary(query: str):
    """用來總結文章內容"""
    prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant to help me summarize Traditional Chinese News using Traditional Chinese. Only return summarized texts."),
        ("user", "{input}"),
        MessagesPlaceholder("agent_scratchpad", optional=True)
        ]
    )
    groq_chain = prompt | groq_llm | parser
    res = groq_chain.invoke({"input": query})
    return res


@tool
def plus(a: int, b:int):
    """將a與b相加"""
    return a + b


tools = [summary, plus]
groq_with_tools = groq_llm.bind_tools(tools)

In [5]:
query_1 = '''
台灣證券交易所（Taiwan Stock Exchange，簡稱：TWSE，或稱台股）成立於1961年，是台灣最主要的證券交易市場。台股提供公開上市公司一個公平、透明、有效率的市場，讓他們可以籌集資金。同時，也為投資者提供一個交易平台，讓他們可以買賣股票和其他證券。

台股的主要指數是台灣加權股價指數，通常被簡稱為「台股指數」或「大盤」。這個指數反映了在台灣證券交易所上市的所有股票的整體表現。此外，還有許多其他的子指數，例如電子股指數、金融股指數等，分別反映了特定產業的股票表現。

台股的交易時間為每週一至五的上午9點到下午1點30分。在這段時間內，投資者可以透過證券商買賣股票。台股也有提供盤後交易服務，讓投資者在正常交易時間之外進行交易。
'''

In [6]:
query_2 = "what's 1+2?"

In [91]:
# prompt = ChatPromptTemplate.from_messages(
# [
#     ("system", "You are a helpful assistant to help me summarize Traditional Chinese News using Traditional Chinese. Only return summarized texts."),
#     ("user", "{input}"),
#     MessagesPlaceholder("agent_scratchpad", optional=True)
# ]
# )
# groq_chain = prompt | groq_llm | parser
# res = groq_chain.invoke({"input": query_1})
# res

'台灣證券交易所（TWSE）成立於1961年，是台灣最主要的證券交易市場，提供公開上市公司籌集資金和投資者交易平台。主要指數是台灣加權股價指數，反映所有上市股票表現。交易時間為週一至五上午9點到下午1點30分，另有盤後交易服務。'

In [10]:
res = groq_with_tools.invoke(query_1)
res.content

''

In [11]:
res.tool_calls

[{'name': 'sumtest', 'args': {'query': '台灣證券交易所'}, 'id': 'call_17rk'}]

In [26]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant who answers in Traditional Chinese."),
        ("user", "{input}"),
        MessagesPlaceholder("agent_scratchpad", optional=True)
    ]
)

agent = create_tool_calling_agent(prompt=prompt, llm=groq_llm, tools=tools)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [23]:
input_query = "幫我總結以下新聞" + query_1
res = agent_executor.invoke({"input": (input_query)})
res



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `summary` with `{'query': '台灣證券交易所的介紹'}`


[0mInside
[36;1m[1;3m台灣證券交易所（TWSE）是台灣的證券交易市場，於1961年10月9日成立。其主要功能是提供股票、債券、衍生工具等金融商品的交易平台，讓投資者能夠買賣股票、債券等金融商品。TWSE也是亞洲最大的證券交易所之一，目前有超過900家上市公司，總市值超過30兆元台幣。[0m[32;1m[1;3m
Invoking: `summary` with `{'query': ''}`


[0mInside
[36;1m[1;3mPlease provide the Traditional Chinese news article you'd like me to summarize, and I'll be happy to assist you![0m[32;1m[1;3m
Invoking: `summary` with `{'query': '台灣證券交易所的介紹'}`


[0mInside
[36;1m[1;3m台灣證券交易所（TWSE）是台灣的證券交易市場，於1961年10月9日成立。其主要功能是提供股票、債券、衍生工具等金融商品的交易平台，讓投資者能夠買賣股票、債券等金融商品。TWSE也是亞洲最大的證券交易所之一，目前有超過900家上市公司，總市值超過30兆元台幣。[0m[32;1m[1;3m台股是台灣最主要的證券交易市場，於1961年成立，提供股票、債券、衍生工具等金融商品的交易平台，讓投資者能夠買賣股票、債券等金融商品。[0m

[1m> Finished chain.[0m


{'input': '幫我總結以下新聞\n台灣證券交易所（Taiwan Stock Exchange，簡稱：TWSE，或稱台股）成立於1961年，是台灣最主要的證券交易市場。台股提供公開上市公司一個公平、透明、有效率的市場，讓他們可以籌集資金。同時，也為投資者提供一個交易平台，讓他們可以買賣股票和其他證券。\n\n台股的主要指數是台灣加權股價指數，通常被簡稱為「台股指數」或「大盤」。這個指數反映了在台灣證券交易所上市的所有股票的整體表現。此外，還有許多其他的子指數，例如電子股指數、金融股指數等，分別反映了特定產業的股票表現。\n\n台股的交易時間為每週一至五的上午9點到下午1點30分。在這段時間內，投資者可以透過證券商買賣股票。台股也有提供盤後交易服務，讓投資者在正常交易時間之外進行交易。\n',
 'output': '台股是台灣最主要的證券交易市場，於1961年成立，提供股票、債券、衍生工具等金融商品的交易平台，讓投資者能夠買賣股票、債券等金融商品。'}

### DuckDuckgo
- 可以用，而且免費，也沒有使用上的次數限制。

1. https://python.langchain.com/v0.2/docs/integrations/tools/ddg/
2. https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.duckduckgo_search.DuckDuckGoSearchAPIWrapper.html
3. https://api.python.langchain.com/en/latest/tools/langchain_community.tools.ddg_search.tool.DuckDuckGoSearchRun.html

- RunnablePassthrough:
1. https://python.langchain.com/v0.1/docs/expression_language/primitives/passthrough/
2. https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html

In [None]:
"""
You can also directly pass a custom DuckDuckGoSearchAPIWrapper to DuckDuckGoSearchResults.
Therefore, you have much more control over the search results.
"""

In [88]:
from pprint import pprint
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper

In [105]:
wrapper = DuckDuckGoSearchAPIWrapper(region="tw-tzh", max_results=5, source="news", time="d")
duck_search = DuckDuckGoSearchRun(api_wrapper=wrapper)

In [112]:
res = duck_search.invoke("台股最新的消息?")
pprint(res)

('晶圓代工龍頭台積電 (2330-TW) 法說會今 (18) 日登場，但遇美國科技股大跌不測，台積電開盤遭摜破千元大關以下跌 4% 的 988 '
 '元開出，大盤跳空下跌以 23373.01 點開出，大跌 400 點，盤中跌幅加速並下跌達 500 點。 '
 '（中央社記者曾仁凱台北18日電）市場擔憂美中科技戰可能再度升溫，17日美國晶片股狂瀉，拖累今天台股，台股盤中一度重挫超過600點，截至10時30分，台股指數下跌529.64點，至23240.18點，失守月線23452點，成交值約新台幣3131億元。 '
 '台指期今 '
 '(17)日結算，加上川普一句「台灣應付美國保護費」，台積電今日股價應聲下跌，盤中一度重挫逾3%，聯發科、廣達、大立光等大型權值股也持續回檔，拖累指數一度大跌347點，所幸尾盤台積電跌勢略為縮小，終場加權指數下跌227.43點、以23769.82點作收，成交量為5424億元，盤面上強勢族群為ABF載板、面板股、製鞋股、台塑四寶。 '
 '台指期昨日大跌315點至23,679點，台指期淨部位方面，三大法人淨空單減少6,134口至15,813口，其中外資淨空單減少2,292口至34,842口；十大交易人中的特定法人全月台指期淨空單減少2,280口至18,018口。 '
 '電子權值股部分，台積電（2330）收1030元 ... 大樹（6469）、中天（4128）、訊聯（1784）也都有3~4%的漲幅。 '
 '營建營造類股也有買單拉抬，永信建（5508）、富裔（6264）亮燈漲停，股價分別來到 ...')


In [107]:
prompt = ChatPromptTemplate.from_template(
    """Answer the question based only on the context provided and please answer in Traditional Chinese.

Context: {context}

Question: {question}"""
)

chain = (
    RunnablePassthrough.assign(context=(lambda x: x["question"]) | duck_search)
    | prompt
    | groq_llm
    | StrOutputParser()
)

In [108]:
chain.invoke({"question": "有沒有台股的最新消息?"})

'有，台股最新消息是：截至10時30分，台股指數下跌529.64點，至23240.18點，失守月線23452點，成交值約新台幣3131億元。'

### Yahoo finance search
- 用起來沒有想像中那麼好，速度慢又找不到資料(?)

In [67]:
from langchain.agents import AgentType, initialize_agent
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool
from langchain_openai import ChatOpenAI

# llm = ChatOpenAI(temperature=0.0)
tools = [YahooFinanceNewsTool()]
agent_chain = initialize_agent(
    tools,
    groq_llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
)

  warn_deprecated(


In [70]:
agent_chain.run("MSFT")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find financial news about Microsoft, so I'll use the yahoo_finance_news tool.

Action: yahoo_finance_news
Action Input: MSFT[0m
Observation: [36;1m[1;3mTop Contributor of ClearBridge Sustainability Leaders Strategy: Microsoft Corporation (MSFT)
ClearBridge Investments, an investment management company, released its “ClearBridge Sustainability Leaders Strategy” second quarter 2024 investor letter. A copy of the same can be downloaded here. U.S. stocks increased during a difficult quarter for diversified portfolios due to the market leadership remaining centered around a few mega-cap firms that were thought to be the major winners […][0m
Thought:[32;1m[1;3mThought: It seems like the observation is not directly related to Microsoft's financial news, but rather a mention of Microsoft in an investor letter. I need to find more specific financial news about Microsoft.

Action: yahoo_finance_news
Action Inpu

"Unfortunately, I couldn't find specific financial news about Microsoft using the yahoo_finance_news tool."

In [71]:
yf_retriever.run("MSFT")

'Top Contributor of ClearBridge Sustainability Leaders Strategy: Microsoft Corporation (MSFT)\nClearBridge Investments, an investment management company, released its “ClearBridge Sustainability Leaders Strategy” second quarter 2024 investor letter. A copy of the same can be downloaded here. U.S. stocks increased during a difficult quarter for diversified portfolios due to the market leadership remaining centered around a few mega-cap firms that were thought to be the major winners […]'

### create_pandas_dataframe_agent 測試
**目的**
- 測試季度損益表的回覆情況。

**心得**
- 效果不好，會遇到looping、超過max iteration, max time的問題。
- 抑或是agent會找到不對的資訊。

In [26]:
pd_prompt = "You are a professional financial analyst to help me to analyze a company's financial reports."
pd_template = ChatPromptTemplate.from_messages(pd_prompt)
agent = create_pandas_dataframe_agent(groq_llm, df, verbose=True, allow_dangerous_code=True, early_stopping_method="generate")

In [83]:
res = agent.run("You are a professional financial analyst to help me to analyze the status of the given company's quarterly incomestatement of 2024-03-31")
res
'''目前效果不好，所以要來改善chatbot回答的效率以及準確率，另外也要測試看看中文的效果如何'''



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To analyze the status of the company's quarterly income statement, I need to understand the data provided in the dataframe. 

Action: python_repl_ast
Action Input: df.info()[0m[36;1m[1;3m<class 'pandas.core.frame.DataFrame'>
Index: 54 entries, Tax Effect Of Unusual Items to Operating Revenue
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   2024-03-31 00:00:00  53 non-null     object
 1   2023-12-31 00:00:00  49 non-null     object
 2   2023-09-30 00:00:00  54 non-null     object
 3   2023-06-30 00:00:00  53 non-null     object
 4   2023-03-31 00:00:00  53 non-null     object
dtypes: object(5)
memory usage: 4.6+ KB
[0m[32;1m[1;3mFrom the observation, I can see that the dataframe has 54 rows and 5 columns, each representing a quarterly period. The columns are of object type, which means they can hold string or numeric values. Th

KeyboardInterrupt: 

### 嘗試去複製使用langchain openai tool calling 的教學
**心得**
- 大部分基本上都可以不需要用到opean tool calling的functions。
- 可以做到同樣的事情。
- 但agent的memory技能還是不夠好，剛說完就會忘記了。

In [3]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import tool

In [4]:
# Define input schema
class numberInput(BaseModel):
    a: int= Field(description="first integer")
    b: int= Field(description="second integer")
@tool(args_schema=numberInput)
def plus_tool(a: int, b:int):
    """useful for adding two numbers together"""
    c = a + b
    return f"{a} + {b} is {c}"

@tool(args_schema=numberInput)
def minus_tool(a: int, b:int):
    """useful for two numbers minus each other"""
    c = a - b
    return f"{a} - {b} is {c}"

# Create a toolkit
tools = [plus_tool, minus_tool]
model_with_tools = groq_llm.bind_tools(tools=tools)

In [135]:
template = ChatPromptTemplate.from_messages([
    (
        "system",
        "You are a helpful assistant to identify two numbers in my question when needed. If it's not the case, just give me a response."
    ),
        MessagesPlaceholder(variable_name='chat_history', optional=True),
    (
        "user","{input}"
    ),
    MessagesPlaceholder(variable_name='agent_scratchpad', optional=True)
    ])

In [105]:
from langchain.agents.output_parsers import ToolsAgentOutputParser
from langchain.agents.format_scratchpad.tools import format_to_tool_messages

In [136]:
chain = template | model_with_tools | ToolsAgentOutputParser()

In [137]:
result = chain.invoke({
    "input" : "what's 3 minus 2?"
})
result

[ToolAgentAction(tool='minus_tool', tool_input={'a': 3, 'b': 2}, log="\nInvoking: `minus_tool` with `{'a': 3, 'b': 2}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_ndsp', 'function': {'arguments': '{"a":3,"b":2}', 'name': 'minus_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 1106, 'total_tokens': 1155, 'completion_time': 0.160814721, 'prompt_time': 0.351381805, 'queue_time': None, 'total_time': 0.512196526}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_c1a4bcec29', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-0cd17c76-285d-49c2-b0d2-79f32faed93f-0', tool_calls=[{'name': 'minus_tool', 'args': {'a': 3, 'b': 2}, 'id': 'call_ndsp'}], usage_metadata={'input_tokens': 1106, 'output_tokens': 49, 'total_tokens': 1155})], tool_call_id='call_ndsp')]

In [138]:
result[0].tool

'minus_tool'

In [139]:
result[0].tool_input

{'a': 3, 'b': 2}

In [140]:
result01 = chain.invoke({
    "input" : "what's 3 minus 2?",
    "agent_scratchpad":[]
})
result01

[ToolAgentAction(tool='minus_tool', tool_input={'a': 3, 'b': 2}, log="\nInvoking: `minus_tool` with `{'a': 3, 'b': 2}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_7smc', 'function': {'arguments': '{"a":3,"b":2}', 'name': 'minus_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 1106, 'total_tokens': 1155, 'completion_time': 0.156577575, 'prompt_time': 0.444120837, 'queue_time': None, 'total_time': 0.600698412}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_87cbfbbc4d', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-269590d2-d614-4f96-972c-cdb145a60f97-0', tool_calls=[{'name': 'minus_tool', 'args': {'a': 3, 'b': 2}, 'id': 'call_7smc'}], usage_metadata={'input_tokens': 1106, 'output_tokens': 49, 'total_tokens': 1155})], tool_call_id='call_7smc')]

In [141]:
result01[0].tool

'minus_tool'

In [142]:
observation = plus_tool(result01[0].tool_input)
observation

'3 + 2 is 5'

In [143]:
type(result01[0])

langchain.agents.output_parsers.tools.ToolAgentAction

In [144]:
result01[0].message_log

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_7smc', 'function': {'arguments': '{"a":3,"b":2}', 'name': 'minus_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 1106, 'total_tokens': 1155, 'completion_time': 0.156577575, 'prompt_time': 0.444120837, 'queue_time': None, 'total_time': 0.600698412}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_87cbfbbc4d', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-269590d2-d614-4f96-972c-cdb145a60f97-0', tool_calls=[{'name': 'minus_tool', 'args': {'a': 3, 'b': 2}, 'id': 'call_7smc'}], usage_metadata={'input_tokens': 1106, 'output_tokens': 49, 'total_tokens': 1155})]

In [176]:
format_to_tool_messages([(result01[0], observation),])[0]

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_7smc', 'function': {'arguments': '{"a":3,"b":2}', 'name': 'minus_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 1106, 'total_tokens': 1155, 'completion_time': 0.156577575, 'prompt_time': 0.444120837, 'queue_time': None, 'total_time': 0.600698412}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_87cbfbbc4d', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-269590d2-d614-4f96-972c-cdb145a60f97-0', tool_calls=[{'name': 'minus_tool', 'args': {'a': 3, 'b': 2}, 'id': 'call_7smc'}], usage_metadata={'input_tokens': 1106, 'output_tokens': 49, 'total_tokens': 1155})

In [146]:
result02 = chain.invoke({
    "input" : "what's 3 mins 2?",
    "agent_scratchpad" : format_to_tool_messages([(result01[0], observation),])
})

In [147]:
result02

AgentFinish(return_values={'output': "It seems like there was a mistake in the tool call. The minus_tool function was called, but the result is an addition. Anyway, since the result is provided, I'll respond directly. The correct result for 3 minus 2 is 1."}, log="It seems like there was a mistake in the tool call. The minus_tool function was called, but the result is an addition. Anyway, since the result is provided, I'll respond directly. The correct result for 3 minus 2 is 1.")

In [153]:
result02.return_values

{'output': "It seems like there was a mistake in the tool call. The minus_tool function was called, but the result is an addition. Anyway, since the result is provided, I'll respond directly. The correct result for 3 minus 2 is 1."}

In [154]:
chain.invoke({
    "input" : "hi",
    "agent_scratchpad" : format_to_tool_messages([(result01[0], observation),])
})

AgentFinish(return_values={'output': 'It seems like there was a mistake in the tool call. The minus_tool function is supposed to subtract two numbers, not add them. Could you please retry the tool call with the correct operation?'}, log='It seems like there was a mistake in the tool call. The minus_tool function is supposed to subtract two numbers, not add them. Could you please retry the tool call with the correct operation?')

In [177]:
from langchain.schema.agent import AgentFinish
# from langchain.agents.format_scratchpad.openai_functions import format_to_openai_functions

In [220]:
"""因為是使用format_tool_message，所以result要取第一個值。
目前可以使用format_tool_messages在def內的方法，拿出來跟lambda一起放會出錯，因為找不到intermediate_steps這個key。
現在要來想要怎麼加回去chain中
參考: https://learn.deeplearning.ai/courses/functions-tools-agents-langchain/lesson/7/conversational-agent
"""

def run_chain(user_input):
    intermediate_steps = []
    while True:
        print(f"This is intermediate steps: {intermediate_steps}")
        result = chain.invoke({
            "input" : user_input,
            "agent_scratchpad" : format_to_tool_messages(intermediate_steps)
        })
        if isinstance(result, AgentFinish):
            return result
        tool = {
            "plus_tool" : plus_tool,
            "minus_tool": minus_tool
        }[result[0].tool]
        observation = tool.run(result[0].tool_input)
        print(f"This is result: {result}")
        print(" ")
        print(f"This is observation: {observation}")
        intermediate_steps.append((result[0], observation))

In [196]:
run_chain("what is 3 mins 2?")

This is intermediate steps: []
This is result: [ToolAgentAction(tool='minus_tool', tool_input={'a': 3, 'b': 2}, log="\nInvoking: `minus_tool` with `{'a': 3, 'b': 2}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_wqdz', 'function': {'arguments': '{"a":3,"b":2}', 'name': 'minus_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 1106, 'total_tokens': 1155, 'completion_time': 0.160619205, 'prompt_time': 0.371819564, 'queue_time': None, 'total_time': 0.532438769}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_c1a4bcec29', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-68b27d93-f30d-4558-94bf-a3f9cf6ac35e-0', tool_calls=[{'name': 'minus_tool', 'args': {'a': 3, 'b': 2}, 'id': 'call_wqdz'}], usage_metadata={'input_tokens': 1106, 'output_tokens': 49, 'total_tokens': 1155})], tool_call_id='call_wqdz')]
 
This is observation: 3 - 2 is 1
This is intermediate steps: [(ToolAge

AgentFinish(return_values={'output': 'The answer is 1.'}, log='The answer is 1.')

In [237]:
# from langchain.schema.runnable import RunnablePassthrough
# agent_chain = RunnablePassthrough.assign(
#     agent_scratchpad = lambda x: print(type(x))
# ) | chain

# def run_agent(user_input):
#     intermediate_steps = []
#     while True:
#         print(f"This is intermediate steps: {intermediate_steps}")
#         result = agent_chain.invoke({ # 修改這裡
#             "input" : user_input,
#             "agent_scratchpad" : intermediate_steps # 修改這裡
#         })
#         if isinstance(result, AgentFinish):
#             return result
#         tool = {
#             "plus_tool" : plus_tool,
#             "minus_tool": minus_tool
#         }[result[0].tool]
#         observation = tool.run(result[0].tool_input)
#         print(f"This is observation: {observation}")
#         intermediate_steps.append((result[0], observation))

# run_agent("what is 3 minus 1?")

This is intermediate steps: []
<class 'dict'>


ValueError: variable agent_scratchpad should be a list of base messages, got None

In [221]:
from langchain.memory import ConversationBufferMemory
from langchain.agents import AgentExecutor
memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history")

In [240]:
prompt = {
            "input": lambda x: x["input"],
            "agent_scratchpad": (
                lambda x: format_to_tool_messages(x["intermediate_steps"])
            ),
        } | ChatPromptTemplate.from_messages([
            ("user", "{input}"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ])

new_chain = prompt | model_with_tools | ToolsAgentOutputParser()
agent_executor = AgentExecutor(agent=new_chain, tools=tools, verbose=True, memory=memory)

In [242]:
agent_executor.invoke({"input": "what's 3 minus 2?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `minus_tool` with `{'a': 3, 'b': 2}`


[0m[33;1m[1;3m3 - 2 is 1[0m[32;1m[1;3mThe answer is 1.[0m

[1m> Finished chain.[0m


{'input': "what's 3 minus 2?",
 'chat_history': [HumanMessage(content="what's 3 minus 2?"),
  AIMessage(content='<tool-use>{"tool_calls": []}</tool-use>'),
  HumanMessage(content="what's 3 minus 2?"),
  AIMessage(content='The answer is 1.')],
 'output': 'The answer is 1.'}

In [251]:
agent_executor.invoke({"input": "my name is Leo, so what's my name?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is Leo.[0m

[1m> Finished chain.[0m


{'input': "my name is Leo, so what's my name?",
 'chat_history': [HumanMessage(content="what's 3 minus 2?"),
  AIMessage(content='<tool-use>{"tool_calls": []}</tool-use>'),
  HumanMessage(content="what's 3 minus 2?"),
  AIMessage(content='The answer is 1.'),
  HumanMessage(content='my name is leo'),
  AIMessage(content='<tool-use>{"tool_calls": []}</tool-use>'),
  HumanMessage(content="what's my name?"),
  AIMessage(content="I don't know your name."),
  HumanMessage(content='hi, my name is leo'),
  AIMessage(content="Hi Leo! It's nice to meet you."),
  HumanMessage(content="what's my name?"),
  AIMessage(content="I don't know your name."),
  HumanMessage(content="my name is Leo, so what's my name?"),
  AIMessage(content='Your name is Leo.')],
 'output': 'Your name is Leo.'}