# Token & Models

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("API_KEY")
os.environ["OPENAI_API_KEY"] = api_key

In [2]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini")
llm = ChatOpenAI(model="gpt-4o-mini")

# Tools Development

In [3]:
from langchain_core.tools import tool

## Websites Info Retriever

In [5]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

@tool
def website_info_retriever(query: str) -> str:
    """Perform RAG retrieval on the website dataset."""
    vectorstore = Chroma(
        embedding_function=OpenAIEmbeddings(),
        collection_name="ncu_office_websites",
        persist_directory="./Parse Websites v2/ncu_office_websites"
    )
    website_retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 30})
    docs = website_retriever.invoke(query)
    result = ""
    for i in range(len(docs)):
        link = docs[i].metadata.get("link")
        page_content = docs[i].page_content

        print("link: ", link)
        print("page_content: ", page_content)
        result += "link: " + link + "\n" + page_content + "\n"
    return result

website_info_retriever.invoke("請幫我找學務處最新消息")

link:  https://osa.ncu.edu.tw/news_article.php?event_id=3101/news.php
page_content:  網站標題: 最新消息
簡介: 網頁內容主要介紹了國立中央大學學務處的各個組別及其功能，並發布了最新的活動消息。特別是提到了一場名為「宿舍導師講座_12/13(五)下一站去哪裡？」的活動，將於2024年12月13日舉行，時間為18:30至20:30，地點在女14舍藝文空間。講座由李健榮博士主講，將探討人生選擇及自主學習的影響，參加者可獲得生活知能2小時，並符合申請宿舍學習獎助學金的資格。聯絡人為胡硯芬，提供了電話與電子郵件聯絡方式。
---
link:  https://pdc.adm.ncu.edu.tw/#/index.asp
page_content:  網站標題: 最新消息
簡介: 國立中央大學的教務處網站提供關於教務相關的資訊，包括招生、註冊、課務及教學發展等各個組別的公告和活動。網站上有最新消息的更新，例如研究所甄試入學的注意事項、課程停修的申請時間、各種教學研討會及競賽的通知等。此外，教務處也提供相關服務及資源，例如選課系統和跨領域學習諮詢等。網站建議使用支持JavaScript的瀏覽器以獲得最佳瀏覽體驗。
---
link:  https://pdc.adm.ncu.edu.tw/#U/index.asp
page_content:  網站標題: 最新消息
簡介: 國立中央大學的教務處提供各類學術及行政服務，主要包括招生、註冊、課務和教學發展等功能。網站上有最新消息公告，例如研究所甄試的入學注意事項、課程停修申請及各種教學活動的報名信息。此外，教務處還提供跨領域學習諮詢服務及著作權法相關指引。對於使用者，網站建議使用支持JavaScript的瀏覽器以確保功能正常運作，並提供聯絡資訊及隱私政策聲明。
---
link:  https://pdc.adm.ncu.edu.tw/.//index.asp
page_content:  網站標題: 最新消息
簡介: 國立中央大學教務處的網頁提供了關於教務處各組別的資訊，包括招生組、註冊組、課務組、教學發展中心及中大出版中心。網頁上有最新消息更新，如研究所甄試錄取生的提前入學申請注意事項、課程停修申請的時間、各種講座和工作坊的報名資訊等。此外，還

'link: https://osa.ncu.edu.tw/news_article.php?event_id=3101/news.php\n網站標題: 最新消息\n簡介: 網頁內容主要介紹了國立中央大學學務處的各個組別及其功能，並發布了最新的活動消息。特別是提到了一場名為「宿舍導師講座_12/13(五)下一站去哪裡？」的活動，將於2024年12月13日舉行，時間為18:30至20:30，地點在女14舍藝文空間。講座由李健榮博士主講，將探討人生選擇及自主學習的影響，參加者可獲得生活知能2小時，並符合申請宿舍學習獎助學金的資格。聯絡人為胡硯芬，提供了電話與電子郵件聯絡方式。\n---\nlink: https://pdc.adm.ncu.edu.tw/#/index.asp\n網站標題: 最新消息\n簡介: 國立中央大學的教務處網站提供關於教務相關的資訊，包括招生、註冊、課務及教學發展等各個組別的公告和活動。網站上有最新消息的更新，例如研究所甄試入學的注意事項、課程停修的申請時間、各種教學研討會及競賽的通知等。此外，教務處也提供相關服務及資源，例如選課系統和跨領域學習諮詢等。網站建議使用支持JavaScript的瀏覽器以獲得最佳瀏覽體驗。\n---\nlink: https://pdc.adm.ncu.edu.tw/#U/index.asp\n網站標題: 最新消息\n簡介: 國立中央大學的教務處提供各類學術及行政服務，主要包括招生、註冊、課務和教學發展等功能。網站上有最新消息公告，例如研究所甄試的入學注意事項、課程停修申請及各種教學活動的報名信息。此外，教務處還提供跨領域學習諮詢服務及著作權法相關指引。對於使用者，網站建議使用支持JavaScript的瀏覽器以確保功能正常運作，並提供聯絡資訊及隱私政策聲明。\n---\nlink: https://pdc.adm.ncu.edu.tw/.//index.asp\n網站標題: 最新消息\n簡介: 國立中央大學教務處的網頁提供了關於教務處各組別的資訊，包括招生組、註冊組、課務組、教學發展中心及中大出版中心。網頁上有最新消息更新，如研究所甄試錄取生的提前入學申請注意事項、課程停修申請的時間、各種講座和工作坊的報名資訊等。此外，還有關於交換學生申請和學位證書頒發的公告。網站建議使用Chrome、Firefox或Microsof

## Website Crawler

In [20]:
import re
import requests
from bs4 import BeautifulSoup

@tool
def website_crawler(link: str, cur_title: str) -> list[dict]:
    """Takes url of a website and reads the HTML content of the website and then extracts all the links on that website."""
    extract_link = re.search(r'(https?://[^\]]+)', link) # 使用正則表達式擷取連結
    url = extract_link.group(1) if extract_link else None
    
    try: # 發送HTTP請求，獲取網頁內容
        response = requests.get(url, verify=False, timeout=1)
        encoding = response.apparent_encoding
        response.encoding = encoding
    except requests.exceptions.RequestException as e:
        print(f"無法獲取[{cur_title}]: [{url}] 。錯誤: {e}")
        return f"無法獲取[{cur_title}]: [{url}] 。錯誤: {e}"

    websites = []
    result = ""
    if response.status_code == 200: # 確保請求成功
        page_content = response.text # 獲取頁面的HTML內容
        soup = BeautifulSoup(page_content, 'html.parser')
        links = soup.find_all('a') # 找到所有的<a>標籤，iframe有問題

        # 提取每個<a>標籤的title和href屬性，並組織成列表
        for link in links:
            title = link.get_text(strip=True)  # 提取連結的文本作為標題
            if title == '':  # 若標題為空，則跳過
                continue

            href = link.get('href') # 提取連結的href屬性
            if href: # 確保href存在
                final_url = ""
                if 'http' in href:  # 如果href是完整的連結，直接使用
                    final_url = href
                else:
                    postclitics = ['html', 'htm', 'asp', 'php', 'pdf', 'PDF']  # 定義可能的後綴類型
                    url_postclitic = next((p for p in postclitics if p in url), "")  # 從url中找到匹配的後綴
                    psotclitic = any(p in href for p in postclitics)  # 檢查href中是否有後綴

                    temp_url = re.sub(rf'/[^/]+\.{url_postclitic}$', '', url) if psotclitic and url_postclitic else url # 如果href和url都有後綴，從url中去除最後的部分，否則使用原始url
                    href = '/' + href.lstrip('/')  # 確保href以單個斜杠開頭
                    if temp_url.split('/')[-1] == href.split('/')[1]: # 處理相對連結
                        temp_url = '/'.join(temp_url.split('/')[:-1])
                    
                    final_url = temp_url + href

                try: # 檢查組合後的連結是否有效
                    test_response = requests.get(final_url, verify=False, timeout=1)
                    encoding = test_response.apparent_encoding
                    test_response.encoding = encoding
                    if test_response.status_code == 200:
                        print(f"成功獲取[{title}]: [{final_url}] ，加入資料中。")
                        websites.append({'title': title, 'link': final_url})  # 將完整連結加入資料中
                    else:
                        print(f"無法獲取[{title}]: [{final_url}] ，不加入資料中。HTTP 狀態碼: {test_response.status_code}")
                except requests.exceptions.RequestException as e:
                    print(f"無法獲取[{title}]: [{final_url}] ，不加入資料中。錯誤: {e}")

        result = "\n".join([f"[{item['title']}]: [{item['link']}]" for item in websites])
        print(result)
    else:
        print(f"無法獲取[{cur_title}]: [{url}] 。HTTP 狀態碼: {response.status_code}")
        return f"無法獲取[{cur_title}]: [{url}] 。HTTP 狀態碼: {response.status_code}"
    
    return result

In [None]:
websites = website_crawler.invoke({"link": "https://osa.ncu.edu.tw/news.php", "cur_title": "學務處最新消息"})

## Website Reader

In [13]:
from bs4 import BeautifulSoup
import requests
import ssl
import certifi


@tool
def website_reader(url: str) -> str:
    """Read the content of a website."""

    try: 
        response = requests.get(url, verify=False)
        encoding = response.apparent_encoding
        response.encoding = encoding
    except requests.exceptions.RequestException as e:
        print(f"無法獲取網頁。錯誤: {e}")
        return f"無法獲取網頁。錯誤: {e}"

    soup = BeautifulSoup(response.text, 'html.parser')
    content = soup.get_text()

    cleaned_content = "\n".join([line for line in content.split("\n") if line.strip()])
    return cleaned_content

In [None]:
content = website_reader.invoke("https://osa.ncu.edu.tw")
print(content)

## PDF Reader

In [None]:
import pdfplumber
from io import BytesIO

@tool
def pdf_reader(url: str) -> str:
    """Read the content of a PDF file."""
    
    try:
        response = requests.get(url)
        encoding = response.apparent_encoding
        response.encoding = encoding
    except requests.exceptions.RequestException as e:
        print(f"無法獲取網頁。錯誤: {e}")
        return f"無法獲取網頁。錯誤: {e}"

    # 檢查是否成功下載
    if response.status_code == 200:
        # 使用 BytesIO 將下載的內容轉為二進制流
        pdf_file = BytesIO(response.content)
        
        with pdfplumber.open(pdf_file) as pdf:
            pdf_text = ""
            for page in pdf.pages:
                pdf_text += page.extract_text()

        return pdf_text
    else:
        print(f"下載失敗，HTTP 狀態碼: {response.status_code}")

In [None]:
pdf_text = pdf_reader.invoke("https://pdc.adm.ncu.edu.tw/postM/post/reg/1131024_1.pdf")
print(pdf_text)

## Selenium Controller

In [None]:
from utils.selenium_controller import SeleniumController
import time

selenium_controller = SeleniumController()

@tool 
def create_browser(user_id: str):
    """Create a new browser session."""
    selenium_controller.create_browser(user_id)

@tool
def screen_shot(user_id: str, file_path: str):
    """Take a screenshot of the current page and save it to the file path."""
    selenium_controller.screen_shot(user_id, file_path)

@tool
def navigate(user_id: str, url: str):
    """Navigate to the specified URL."""
    selenium_controller.navigate(user_id, url)

@tool
def get_html_content(user_id: str) -> str:
    """Get the HTML content of the current page."""
    return selenium_controller.get_content(user_id)

@tool
def click_button_with_text(user_id: str, text: str):
    """Click the specified button based on the text."""
    selenium_controller.click_button_with_text(user_id, text)

@tool
def click_input_with_id(user_id: str, id: str):
    """Click the specified input based on the id."""
    selenium_controller.click_input_with_id(user_id, id)

@tool
def click_span_with_aria_label(user_id: str, arialabel: str, index: int = 0):
    """Click the specified span based on the aria-label."""
    selenium_controller.click_span_with_aria_label(user_id, arialabel)

@tool
def click_element(user_id: str, xpath: str):
    """Click the specified element based on the xpath."""
    selenium_controller.click_element(user_id, xpath)

@tool
def input_text(user_id: str, xpath: str, text: str):
    """Input text into the specified input field based on the xpath."""
    selenium_controller.input_text(user_id, xpath, text)

@tool
def upload_file(user_id: str, xpath: str, file_path: str):
    """Upload a file to the specified input field based on the xpath."""
    selenium_controller.upload_file(user_id, xpath, file_path)

@tool
def select_dropdown_option(user_id: str, xpath: str, option: str):
    """Select an option from the dropdown menu based on the xpath."""
    selenium_controller.select_dropdown_option(user_id, xpath, option)

In [None]:
import shutil
import os

screenshots_dir = "screenshots"
if os.path.exists(screenshots_dir):
    for filename in os.listdir(screenshots_dir):
        file_path = os.path.join(screenshots_dir, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f'Failed to delete {file_path}. Reason: {e}')
print("Cleared screenshots directory")

In [None]:
create_browser.invoke("user1")

In [None]:
navigate.invoke({"user_id": "user1", "url": "https://portal.ncu.edu.tw/login"})
time.sleep(0.5)
screen_shot.invoke({"user_id": "user1", "file_path": "screenshots/portal.png"})

In [None]:
website_content = get_html_content.invoke("user1")
print(website_content)

In [None]:
del selenium_controller

## Save Pipeline Instruction

In [None]:
@tool
def save_pipeline_instruction(instruction: str, file_name: str) -> str:
    """Save the pipeline instruction as txt file."""
    file_path = "pipelines/" + file_name + ".txt"
    with open(file_path, "w", encoding='utf-8') as file:
        file.write(instruction)
    return instruction

## Pipeline Instruction Extractor

In [None]:
@tool
def pipeline_instruction_extractor(file_name: str) -> str:
    """Extract instructions from a pipeline given by its name."""
    with open("pipelines/" + file_name + ".txt", "r", encoding='utf-8') as file:
        instruction = file.read()
    return instruction

In [None]:
pipeline_instruction_extractor.invoke("Leave_Request_Process")

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By

# 初始化 WebDriver
driver = webdriver.Chrome()

# 打開目標網頁
url = "https://portal.ncu.edu.tw/login"  # 替換為目標網站 URL
driver.get(url)

# 定位所有元素
all_elements = driver.find_elements(By.XPATH, "//*")

# 遍歷所有元素，檢查是否有文字
unique_elements = set()
for index, element in enumerate(all_elements):
    text = element.text.strip()  # 去除前後空格
    if text and text not in unique_elements:
        unique_elements.add(text)
        print(f"元素 {index + 1}: {text}")

# 關閉瀏覽器
driver.quit()

# Call Tools Using ToolNode

## Define Tools

In [None]:
from langchain_core.messages import AIMessage
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode

In [None]:
@tool
def get_weather(location: str):
    """Call to get the current weather."""
    if location.lower() in ["sf", "san francisco"]:
        return "It's 60 degrees and foggy."
    else:
        return "It's 90 degrees and sunny."


@tool
def get_coolest_cities():
    """Get a list of coolest cities"""
    return "nyc, sf"

In [None]:
tools = [get_weather, get_coolest_cities]
tool_node = ToolNode(tools)

## Manually call Tool Node

In [None]:
message_with_single_tool_call = AIMessage(
    content="",
    tool_calls=[
        {
            "name": "get_weather",
            "args": {"location": "sf"},
            "id": "tool_call_id",
            "type": "tool_call",
        }
    ],
)

tool_node.invoke({"messages": [message_with_single_tool_call]})

In [None]:
message_with_multiple_tool_calls = AIMessage(
    content="",
    tool_calls=[
        {
            "name": "get_coolest_cities",
            "args": {},
            "id": "tool_call_id_1",
            "type": "tool_call",
        },
        {
            "name": "get_weather",
            "args": {"location": "sf"},
            "id": "tool_call_id_2",
            "type": "tool_call",
        },
    ],
)

tool_node.invoke({"messages": [message_with_multiple_tool_calls]})

## Using with Chat Models

In [None]:
from typing import Literal
from langgraph.graph import StateGraph, MessagesState
from langgraph.prebuilt import ToolNode

model_with_tools = llm.bind_tools(tools)

In [None]:
model_with_tools.invoke("what's the weather in sf?").tool_calls

In [None]:
tool_node.invoke({"messages": [model_with_tools.invoke("what's the weather in sf?")]})

## ReAct Agent

In [None]:
from typing import Literal
from langgraph.graph import StateGraph, MessagesState, START, END


def should_continue(state: MessagesState):
    messages = state["messages"]
    last_message = messages[-1]
    if last_message.tool_calls:
        return "tools"
    return END


def call_model(state: MessagesState):
    messages = state["messages"]
    response = model_with_tools.invoke(messages)
    return {"messages": [response]}


workflow = StateGraph(MessagesState)

# Define the two nodes we will cycle between
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

workflow.add_edge(START, "agent")
workflow.add_conditional_edges("agent", should_continue, ["tools", END])
workflow.add_edge("tools", "agent")

app = workflow.compile()

In [None]:
from IPython.display import Image, display

display(Image(app.get_graph(xray=2).draw_mermaid_png()))

In [None]:
# example with a single tool call
for chunk in app.stream(
    {"messages": [("human", "what's the weather in sf?")]}, stream_mode="values"
):
    chunk["messages"][-1].pretty_print()