In [11]:
# import libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from IPython.display import display

# from selenium.webdriver.chrome.options import Options
# set up browser options (不支援 Edge)
# browser_options = Options()
# browser_options.add_argument("--headless")  # 啟用 Headless 模式

### Preprocessing Method

- search_code: stock code user want to search
- _query_search: do search for method 'search_code'
- beautify_table: make table prettier
- get_module: get module according to query of search_code, usually is 'xtai'

In [2]:
def search_code(code: str) -> pd.DataFrame:
    """Function: Search the stock code from Morningstar"""
    
    # set search url
    url = f'https://www.morningstar.com/search?query={code}'

    # get the response in the form of html
    response = requests.get(url)

    # if response is ok then go ahead and parse the response
    if response.status_code == 200:
        companys = _query_search(response)
    return companys


def _query_search(response: requests.models.Response) -> pd.DataFrame:
    """Function: Parse the response from html into a beautifulsoup object"""

    # parse the response from html into a beautifulsoup object
    soup = BeautifulSoup(response.content, 'html.parser')

    titles = soup.find_all('a', class_='mdc-link mdc-security-module__name mds-link mds-link--no-underline mdc-link--no-underline')
    modules = soup.find_all('span', class_="mdc-security-module__exchange")

    # Create lists to store the data
    title_list = [title.get_text() for title in titles]
    module_list = [module.get_text() for module in modules]

    return pd.DataFrame({'Company': title_list, 'Module': module_list})


def beautify_table(companys: pd.DataFrame) -> None:
    """Function: Beautify the table"""

    # 創建 Styler 物件
    styler = companys.style

    # 設定欄位的文字對齊為置中
    styles = [
        {'selector': '.col_heading', 'props': [('text-align', 'center')]},
        {'selector': '.data', 'props': [('text-align', 'left')]},
    ]
    styler.set_table_styles(styles)

    # 顯示 DataFrame
    display(styler)


def get_module(companys: pd.DataFrame) -> str:
    """Function: Get the module"""

    # get the stock module
    try: 
        stock = int(input("Enter the company index: "))
        # get the stock code
        module = companys.iloc[stock, 1]
    except:
        raise ValueError("Not a number")

    return module.lower()

### Get Information

In [3]:
def get_valuation(code: str, module:str, driver: webdriver.edge.webdriver.WebDriver) -> pd.DataFrame:
    """Function: Get the stock quote from Morningstar"""

    # set the url
    url = f'https://www.morningstar.com/stocks/{module}/{code}/valuation'

    # get into the url
    driver.get(url)

    # get key statistics
    # 不能使用 find_element_by_class_name !!!
    key_classes = driver.find_elements("css selector", ".sal-panel-header")
    key_names = driver.find_elements("css selector", ".dp-name")
    key_values = driver.find_elements("css selector", ".dp-value")

    key_class = [key_class.text for key_class in key_classes]
    key_names = [key_name.text for key_name in key_names]
    key_values = [key_value.text for key_value in key_values]

    for i in range(len(key_class)):
        print(key_class[i])
        beautify_table(pd.DataFrame({'Key': key_names[i*4:(i+1)*4], 'Value': key_values[i*4:(i+1)*4]}))

    return pd.DataFrame({'Key': key_names, 'Value': key_values})

In [4]:
# set up the browser
driver = webdriver.Edge()

# get the stock code
stock_code = input("Enter the stock code: ")

# do the search
companys = search_code(stock_code)
beautify_table(companys)

# get the module
module = get_module(companys)

Unnamed: 0,Company,Module
0,Taiwan Semiconductor Manufacturing Co Ltd,XTAI
1,Advanced Petrochemical Co,XSAU
2,Forside Co Ltd,XTKS


In [5]:
# key_statistics = get_valuation(stock_code, module, driver)
# key_statistics = beautify_table(key_statistics)     # cannot put two functions in one line

In [16]:
# get Sector、Industry、Investemnt Style、Beta

# Overview、Key Ratios、Trading Information 都是同一個class同一頁面
# 用指定的class找到目標element，再根據element的index找到對應的value

def get_quote(code: str, module:str, driver: webdriver.edge.webdriver.WebDriver) -> pd.DataFrame:

    # set the url
    url = f'https://www.morningstar.com/stocks/{module}/{code}/quote'

    # get into the url
    driver.get(url)

    # # get all information keys
    # info_keys = driver.find_elements("css selector", ".stock__quote-content-item-label")
    # # [:2] is Sector and Industry
    # industries = driver.find_elements("css selector", ".mdc-data-point.mdc-data-point--string")
    # # style only have one element
    # target_span = driver.find_elements("css selector", ".mdc-data-point.mdc-data-point--style-box")
    # # get the style
    # style = ''.join(span.text for span in target_span if span.text != '')

    # info_key = [info_key.text for info_key in info_keys]
    # print(info_key)

    # 等待目標元素出現
    wait = WebDriverWait(driver, 10)
    overview_button = wait.until(EC.presence_of_element_located((By.XPATH, "//button[@data-testid='stock__quote-button-overview']")))
    keyratios_button = wait.until(EC.presence_of_element_located((By.XPATH, "//button[@data-testid='stock__quote-button-keyRatios']")))
    tradinginfo_button = wait.until(EC.presence_of_element_located((By.XPATH, "//button[@data-testid='stock__quote-button-tradingInformation']")))

    # 點擊 Overview 按鈕
    overview_button.click()

    # 等待 XHR 載入完成
    overview_content = wait.until(EC.presence_of_element_located((By.XPATH, "//button[@data-testid='stock__quote-button-overview']")))



In [17]:
basic_info = get_quote(stock_code, module, driver)

Overview
