In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# 失敗的話會列印出：'沒讀到 HUGGINGFACEHUB_API key'
os.environ["HUGGINGFACEHUB_API_TOKEN"]

In [2]:
from langchain.llms import HuggingFaceEndpoint
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langchain.agents import AgentType
from langchain.agents import initialize_agent
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
import requests
import pandas as pd

## Define Financial Statement Tools, Prompts, & Agent

In [3]:
@tool
def GetIncomeStatementItem(income_statement_item: str, ticker: int, year: int, season: int) -> float:
    '''
    A function that will get the income statement item of the ticker in the specific year and season.
    Returns -1 if something went wrong or the item is not found.
    
    If the return value is not -1, the result is correct. It's NOT an ID of any sort.
    Please return the income statement item's value and stop searching immediately.
    '''
    
    try:
        req = requests.post("https://mops.twse.com.tw/mops/web/ajax_t164sb04", data={
            "encodeURIComponent": 1,
            "step": 1,
            "firstin": 1,
            "off": 1,
            "queryName": ticker,
            "inpuType": ticker,
            "TYPEK": "all",
            "isnew": "false",
            "co_id": ticker,
            "year": year,
            "season": season
        })

        df = pd.read_html(req.text)[1]
        df = df.iloc[:, :2].dropna()
        display(df)
        for i, rows in df.iterrows():
            if income_statement_item in rows.iloc[0]:  # item found
                return float(rows.iloc[1])
        
        return -1
    except:
        return -1

    

@tool
def GetBalancedSheetItem(balanced_sheet_item: str, ticker: int, year: int, season: int) -> float:
    '''
    A function that will get the balanced item of the ticker in the specific year and season.
    Returns -1 if something went wrong. Please stop searching immediately and ask the user to modify their input.
    
    If the return value is not -1, the result is correct. It's NOT an ID of any sort.
    Please return the balanced sheet item's value and stop searching immediately.
    '''
    
    try:
        req = requests.post("https://mops.twse.com.tw/mops/web/ajax_t164sb03", data={
            "encodeURIComponent": 1,
            "step": 1,
            "firstin": 1,
            "off": 1,
            "queryName": ticker,
            "inpuType": ticker,
            "TYPEK": "all",
            "isnew": "false",
            "co_id": ticker,
            "year": year,
            "season": season
        })

        df = pd.read_html(req.text)[1]
        df = df.iloc[:, :2].dropna()
        display(df)
        for i, rows in df.iterrows():
            if balanced_sheet_item in rows.iloc[0]:  # item found
                return float(rows.iloc[1])
        
        return -1
    except:
        return -1

    
@tool
def GetYoYGrowth(previous_year_value:float, current_year_value:float) -> float:
    '''
    A function that will compute the YoY growth of a specific financial statement item.
    The formula is (current_year_value - previous_year_value) / previous_year_value.
    The functions reterns a floating number, not a percentage.
    The growth can be either negative or positive. The function is error-free.
    '''
    return (current_year_value - previous_year_value) / previous_year_value

In [4]:
user_template = """
Find the YoY growth rate of the financial statement item of the ticker from year1 to year2.

financial statement item: {financial_statement_item}
ticker: {ticker}
year1: {year1}
year2: {year2}
season: 4

Try to find the item in the income statement first. If not found, try to find it in the balanced sheet.
DO NOT translate the financial statement item into English, and DO NOT change the input years!


The input for the tools should be formatted as the following example.
```
"action_input": {{
    "balanced_sheet_item": <financial_statement_item, a string>,
    "ticker": <ticker, an integer>,
    "year": <year, an integer>,
    "season": <season, an integer>
}}
```

You must get the financial statement item values of the two years before invoking GetYoYGrowth.

Present your final answer as the following example.
```
{{
  "action": "Final Answer",
  "action_input": "The YoY growth rate of the cash item for ticker <ticker> from year <year1> to year <year2> in season 4 is approximately <yoy, in percentage>."
}}
```
"""


prompt = ChatPromptTemplate.from_messages([
    ("user", user_template),
    ("placeholder", "{agent_scratchpad}")
])

In [5]:
llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", max_new_tokens=1024, temperature=0.1)

financial_statement_agent = initialize_agent(
    tools = [GetIncomeStatementItem, GetBalancedSheetItem, GetYoYGrowth],
    llm = llm,
    agent = AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,  # supports two argument tools
    verbose = True
)

financial_statement_agent_executor = prompt | financial_statement_agent

  warn_deprecated(


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\ducky\.cache\huggingface\token
Login successful


  warn_deprecated(


## Get Stock Information from Gemini
Gemini can search google for information, which makes things much easier :)

In [6]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [7]:
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

user_template = """
You're a professional business analyst.
You're required to answer the following question.

{ticker}.TW在moneydj上所述業務內容為何?

Please do a browser search on the website, "moneydj", before answering the question.
DO NOT use your own knowledge!
"""

prompt = ChatPromptTemplate.from_messages([
    ("user", user_template)
])


llm = ChatGoogleGenerativeAI(model="gemini-pro")
chain = prompt | llm
result = chain.invoke(3665)
print(result.content)

根據 moneydj 網站上的資料，3665.TW 的業務內容為：

* 智慧型醫療雲平台開發與服務
* 醫療設備研發與製造
* 醫療器材代理經銷
* 醫療軟體開發與服務
* 醫療資訊系統整合


### Oops...
Well, Gemini sucks :(

## Get Stock Information from MoneyDJ

In [8]:
from bs4 import BeautifulSoup

In [9]:
def get_moneydj_text(ticker):
    headers = {
        'User-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82"
    }
    parameters = {'q': f"{ticker}+moneydj"}

    res = requests.get("https://www.google.com/search", headers=headers, params=parameters)
    soup = BeautifulSoup(res.text, 'html.parser')
    
    search_div = soup.find(id="search")
    correct_link = ""
    links = search_div.find_all("a", href=True)
    for link in links:
        if "kmdj" in link["href"]:
            correct_link = link["href"]
            break
    
    
    moneydj_res = requests.get(correct_link, headers=headers)
    moneydj_soup = BeautifulSoup(moneydj_res.text, "html.parser")
    moneydj_div = moneydj_soup.find("div", {"class": "UserDefined"})
    return moneydj_div.text

In [10]:
print(get_moneydj_text(3665))
# print(get_moneydj_text(5288))
# print(get_moneydj_text(2330))



 (一) 公司簡介

1.沿革與背景

		貿聯控股公司創立於1996年，2000年成立貿聯控股(BIZLINK HOLDING INC.)，註冊於開曼群島，總部位於加州矽谷，為全球連接線束解決方案的領導廠商。產品應用在資訊科技用線、電腦週邊設備、數據中心用線、消費電子用線、電器用線、車用線、醫療設備用線、光通訊設備、太陽能連接裝置、工業設備用線、半導體設備等線材、連接器等領域。

2.營業項目與產品結構

		2022年產品營收佔比為：電源線及數據連接線組32.54%、線束48.55%、擴充基座及擴展延接裝置11.54%、其他7.37%。




			產品圖來源：公司網站

(二) 產品與競爭條件

1.產品與技術簡介




						產品

						用途



						連接器、線材、連接線、太陽能接線盒


							訊號傳輸：資訊產品之高頻線束、資料中心高速傳輸線、車用線束、電動車電池包連接線束、運動車主線束，醫療設備與感應器連結線束，醫療長距離訊號監測裝置線束。

							 

							電力傳輸：電器、電動手工具、太陽能電能高壓線束、電動車充電高壓線、商用擴充機座快速充電介面。




						光主/被動元件模組


							光被動元件模組是光傳輸設備的基礎器件，是光通信系統以及光通訊網路的核心。透過光傳輸技術，將訊號高速傳送。

							 

							應用：數據中心、電話及通訊業、電視傳播媒體業。





2.重要原物料

			產品為主要原料為線材、連接器、端子等。

3.主要生產據點

			生產基地位於大陸深圳、昆山、常州、廈門、墨西哥、馬來西亞、美國德州、斯洛伐克、塞爾維亞、新加坡。
			 

			2022年，在台首間新廠落腳台南，將定位是高附加價值，以研發為主，未來亦將成為公司的自動化基地，預計於2024年第2季完成。而新廠區將會定位為「campus(園區)」，而不單單只是「factory(工廠)」。

			 

			另外，公司也首度揮軍印尼，正在巴淡島興建新廠房，預計2024年第3季完工，將可支援新加坡當地開發後量產的訂單和其他區域轉單需求。
			 
(三)市場銷售及競爭

1.銷售狀況

		2022年產品銷售地區佔比為：亞洲39.3

## Import Classification Functionality

In [11]:
user_template = """
You are a professional business analyst. Please help me classify by industry the following text delimited by <text> tags.
The possible industries are semiconductors, industrial automation, electric vehicle, and AI.

<text>
{text}
</text>

The output format is as follows, in JSON.

{{"category": <INTEGER_category>}}

The <INTEGER_category> to choose from are semiconductors (category: 1), industrial automation (category: 2), eletric vehicle (category: 3), and AI (category: 4).
Please output the category number ranging from 1 to 4, rather than the category name.
Each news must belong to EXACTLY one category.
Please ONLY output the json and nothing else. You should strictly adhere to the output format!

Below is an example of a valid output:
{{"category": 4}}
"""

zeroshot_prompt = ChatPromptTemplate.from_messages([
    ("user", user_template)
])


user_failed_template = """
The output format is illegal! Please re-generate a correct output.

The output format is as follows, in JSON.

{{"category": <INTEGER_category>}}

The <INTEGER_category> to choose from are semiconductors (category: 1), industrial automation (category: 2), eletric vehicle (category: 3), and AI (category: 4).
Please output the category number ranging from 1 to 4, rather than the category name.
Each news must belong to EXACTLY one category.
Please ONLY output the json and nothing else. 
"""


zeroshot_failed_prompt = ChatPromptTemplate.from_messages([
        ("user", user_template),
        ("ai", "{model_output}"),
        ("user", user_failed_template)
    ])

In [12]:
class Category(BaseModel):
    categoy: int = Field(description="the category number that the piece of news belongs to")


def get_json(text: str) -> str:
    index_start = text.find('{"')
    index_end = text.find('}', index_start)
    return text[index_start : (index_end + 1)]
        

parser = JsonOutputParser(pydantic_object=Category)

## Let LLM Explain the Relationships Between Financial Statement and News

In [13]:
recent_news = [None]

with open(f"./News/2024/semiconductors/1.txt", 'r', encoding='UTf-8') as file:
    recent_news.append(file.read())

with open(f"./News/2024/industrial_automation/1.txt", 'r', encoding='UTf-8') as file:
    recent_news.append(file.read())

with open(f"./News/2024/ev/1.txt", 'r', encoding='UTf-8') as file:
    recent_news.append(file.read())

with open(f"./News/2024/AI/1.txt", 'r', encoding='UTf-8') as file:
    recent_news.append(file.read())

In [14]:
llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", max_new_tokens=4096, temperature=0.1)

user_template = """
You are a professional business analyst.
Please help me analyze what's behind the change in the financial statement item according to \
the company's business and the news provided.

Financial Statement Item: {financial_statement_item}
Change from {year1} to {year2}: {growth}

<the company's business>
{business_text}
</the company's business>

<recent news in the industry>
{recent_news}
</recent news in the industry>
"""


explain_prompt = ChatPromptTemplate.from_messages([
        ("user", user_template)
    ])


explain_chain = explain_prompt | llm

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\ducky\.cache\huggingface\token
Login successful


## Merge Industrial News & Financial Statement Analysis

In [15]:
def demo_func(ticker, year):
    # Get sales YoY
    year2 = int(year)
    year1 = year2 - 1
    sales_result = financial_statement_agent_executor.invoke({
        "financial_statement_item": "營業收入",
        "ticker": ticker,
        "year1": year1,
        "year2": year2
    })
    
    # Get 公司業務內容 and industry
    moneydj_text = get_moneydj_text(ticker)
    category = 0
    try:
        llm_output_chain = zeroshot_prompt | llm
        llm_output = llm_output_chain.invoke({"text": moneydj_text})
        json_chain = get_json | parser
        answer = json_chain.invoke(llm_output)
        assert((1 <= answer["category"] and answer["category"] <= 4))
        category = answer["category"]
    except:
        llm_output_chain = zeroshot_failed_prompt | llm
        llm_output = llm_output_chain.invoke({"text": moneydj_text, "model_output": llm_output})
        json_chain = get_json | parser
        answer = json_chain.invoke(llm_output)
        assert((1 <= answer["category"] and answer["category"] <= 4))
        category = answer["category"]
    
    
    # Get industry news and let LLM explain
    explain_result = explain_chain.invoke({
        "financial_statement_item": "營業收入",
        "year1": year1,
        "year2": year2,
        "growth": sales_result["output"],
        "business_text": moneydj_text,
        "recent_news": recent_news[category]
    })
    
    return sales_result["output"], explain_result

In [16]:
import gradio as gr

In [17]:
demo = gr.Interface(
    demo_func,
    inputs=[
        gr.Textbox(label="Ticker"),
        gr.Textbox(label="Year")
    ],
    outputs=[
        gr.Textbox(label="Change in Sales"),
        gr.Textbox(label="Possible Reasons")
    ]
)

demo.launch(debug=True, share=True)

Running on local URL:  http://127.0.0.1:7860

Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB
IMPORTANT: You are using gradio version 3.48.0, however version 4.29.0 is available, please upgrade.
--------
Running on public URL: https://b59701dfcd53edb79b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The human is asking for the YoY growth rate of the income statement item "營業收入" (Revenue) for ticker 2330 from year 111 to year 112 in season 4. I will first try to find the income statement item for the given ticker, year, and season. If not found, I will try to find it in the balanced sheet.

Action:
```
{
  "action": "GetIncomeStatementItem",
  "action_input": {
    "income_statement_item": "營業收入",
    "ticker": 2330,
    "year": 111,
    "season": 4
  }
}
```
[0m

Unnamed: 0_level_0,民國111年第4季,民國111年第4季
Unnamed: 0_level_1,單位：新台幣仟元,單位：新台幣仟元
Unnamed: 0_level_2,會計項目,111年度
Unnamed: 0_level_3,Unnamed: 0_level_3,金額
0,營業收入合計,2263891000.0
1,營業成本合計,915536500.0
2,營業毛利（毛損）,1348355000.0
3,營業毛利（毛損）淨額,1348355000.0
5,推銷費用,9920446.0
6,管理費用,53524900.0
7,研究發展費用,163262200.0
8,營業費用合計,226707600.0
10,其他收益及費損淨額,-368403.0
11,營業利益（損失）,1121279000.0



Observation: [36;1m[1;3m2263891292.0[0m
Thought:[32;1m[1;3m I found the income statement item for the given ticker, year, and season. Now I will find the income statement item for the previous year.

Action:
```
{
  "action": "GetIncomeStatementItem",
  "action_input": {
    "income_statement_item": "營業收入",
    "ticker": 2330,
    "year": 110,
    "season": 4
  }
}
```

[0m

Unnamed: 0_level_0,民國110年第4季,民國110年第4季
Unnamed: 0_level_1,單位：新台幣仟元,單位：新台幣仟元
Unnamed: 0_level_2,會計項目,110年度
Unnamed: 0_level_3,Unnamed: 0_level_3,金額
0,營業收入合計,1587415000.0
1,營業成本合計,767877800.0
2,營業毛利（毛損）,819537300.0
3,營業毛利（毛損）淨額,819537300.0
5,推銷費用,7558591.0
6,管理費用,36929590.0
7,研究發展費用,124734800.0
8,營業費用合計,169222900.0
10,其他收益及費損淨額,-333435.0
11,營業利益（損失）,649980900.0



Observation: [36;1m[1;3m1587415037.0[0m
Thought:[32;1m[1;3m Now that I have the income statement item values for the two years, I can calculate the YoY growth rate using the GetYoYGrowth tool.

Action:
```
{
  "action": "GetYoYGrowth",
  "action_input": {
    "previous_year_value": 1587415037.0,
    "current_year_value": 2263891292.0
  }
}
```

[0m
Observation: [38;5;200m[1;3m0.42614958232879585[0m
Thought:[32;1m[1;3m The YoY growth rate of the income statement item "營業收入" (Revenue) for ticker 2330 from year 111 to year 112 in season 4 is approximately 42.61%.

Action:
```
{
  "action": "Final Answer",
  "action_input": "The YoY growth rate of the financial statement item 營業收入 (Revenue) for ticker 2330 from year 111 to year 112 in season 4 is approximately 42.61%."
}
```[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The human is asking for the YoY growth rate of a specific financial statement item for a given ticker and

Unnamed: 0_level_0,民國111年第4季,民國111年第4季
Unnamed: 0_level_1,單位：新台幣仟元,單位：新台幣仟元
Unnamed: 0_level_2,會計項目,111年度
Unnamed: 0_level_3,Unnamed: 0_level_3,金額
0,營業收入合計,8037354.0
1,營業成本合計,6478423.0
2,營業毛利（毛損）,1558931.0
3,未實現銷貨（損）益,0.0
4,已實現銷貨（損）益,313.0
5,營業毛利（毛損）淨額,1559244.0
7,推銷費用,117340.0
8,管理費用,238445.0
9,研究發展費用,84123.0
10,預期信用減損損失（利益）,25932.0



Observation: [36;1m[1;3m8037354.0[0m
Thought:[32;1m[1;3m I have the income statement item value for the first year. Now I need to find the income statement item value for the second year.

Action:
```json
{
  "action": "GetIncomeStatementItem",
  "action_input": {
    "income_statement_item": "營業收入",
    "ticker": 5288,
    "year": 112,
    "season": 4
  }
}
```

[0m

Unnamed: 0_level_0,民國112年第4季,民國112年第4季
Unnamed: 0_level_1,單位：新台幣仟元,單位：新台幣仟元
Unnamed: 0_level_2,會計項目,112年度
Unnamed: 0_level_3,Unnamed: 0_level_3,金額
0,營業收入合計,7267327.0
1,營業成本合計,5744734.0
2,營業毛利（毛損）,1522593.0
3,未實現銷貨（損）益,6.0
4,已實現銷貨（損）益,0.0
5,營業毛利（毛損）淨額,1522587.0
7,推銷費用,98952.0
8,管理費用,244606.0
9,研究發展費用,85790.0
10,預期信用減損損失（利益）,13224.0



Observation: [36;1m[1;3m7267327.0[0m
Thought:[32;1m[1;3m Now that I have the income statement item values for both years, I can compute the YoY growth rate using the GetYoYGrowth tool.

Action:
```json
{
  "action": "GetYoYGrowth",
  "action_input": {
    "previous_year_value": 8037354.0,
    "current_year_value": 7267327.0
  }
}
```

[0m
Observation: [38;5;200m[1;3m-0.09580603268190004[0m
Thought:[32;1m[1;3m The YoY growth rate is negative, indicating a decrease in the financial statement item value from year1 to year2.

Action:
```json
{
  "action": "Final Answer",
  "action_input": "The YoY growth rate of the 營業收入 financial statement item for ticker 5288 from year 111 to year 112 in season 4 is approximately -9.58%."
}
```[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The human is asking for the YoY growth rate of the income statement item "營業收入" (Revenue) for ticker 2330 from year 111 to year 112 in season 4. I will

Unnamed: 0_level_0,民國111年第4季,民國111年第4季
Unnamed: 0_level_1,單位：新台幣仟元,單位：新台幣仟元
Unnamed: 0_level_2,會計項目,111年度
Unnamed: 0_level_3,Unnamed: 0_level_3,金額
0,營業收入合計,2263891000.0
1,營業成本合計,915536500.0
2,營業毛利（毛損）,1348355000.0
3,營業毛利（毛損）淨額,1348355000.0
5,推銷費用,9920446.0
6,管理費用,53524900.0
7,研究發展費用,163262200.0
8,營業費用合計,226707600.0
10,其他收益及費損淨額,-368403.0
11,營業利益（損失）,1121279000.0



Observation: [36;1m[1;3m2263891292.0[0m
Thought:[32;1m[1;3m I found the income statement item for the given ticker, year, and season. Now I will find the income statement item for the previous year.

Action:
```
{
  "action": "GetIncomeStatementItem",
  "action_input": {
    "income_statement_item": "營業收入",
    "ticker": 2330,
    "year": 110,
    "season": 4
  }
}
```

[0m

Unnamed: 0_level_0,民國110年第4季,民國110年第4季
Unnamed: 0_level_1,單位：新台幣仟元,單位：新台幣仟元
Unnamed: 0_level_2,會計項目,110年度
Unnamed: 0_level_3,Unnamed: 0_level_3,金額
0,營業收入合計,1587415000.0
1,營業成本合計,767877800.0
2,營業毛利（毛損）,819537300.0
3,營業毛利（毛損）淨額,819537300.0
5,推銷費用,7558591.0
6,管理費用,36929590.0
7,研究發展費用,124734800.0
8,營業費用合計,169222900.0
10,其他收益及費損淨額,-333435.0
11,營業利益（損失）,649980900.0



Observation: [36;1m[1;3m1587415037.0[0m
Thought:[32;1m[1;3m Now that I have the income statement item values for the two years, I can calculate the YoY growth rate using the GetYoYGrowth tool.

Action:
```
{
  "action": "GetYoYGrowth",
  "action_input": {
    "previous_year_value": 1587415037.0,
    "current_year_value": 2263891292.0
  }
}
```

[0m
Observation: [38;5;200m[1;3m0.42614958232879585[0m
Thought:[32;1m[1;3m The YoY growth rate of the income statement item "營業收入" (Revenue) for ticker 2330 from year 111 to year 112 in season 4 is approximately 42.61%.

Action:
```
{
  "action": "Final Answer",
  "action_input": "The YoY growth rate of the financial statement item 營業收入 (Revenue) for ticker 2330 from year 111 to year 112 in season 4 is approximately 42.61%."
}
```[0m

[1m> Finished chain.[0m
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://b59701dfcd53edb79b.gradio.live


