In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# 公開資訊觀測站

## 上市 / 上櫃 / 興櫃 累計與當月營業收入統計表

In [16]:
def getMonthlyRevenue(year, month):
    companyMarketCategories = ['sii', 'otc', 'rotc']

    columns=['公司代號', 
             '公司名稱', 
             '當月營收', 
             '上月營收', 
             '去年當月營收', 
             '上月比較增減(%)', 
             '去年同月增減(%)', 
             '當月累計營收', 
             '去年累計營收', 
             '前期比較增減(%)', 
             '備註']

    monthlyRevenueDataFrame = pd.DataFrame(columns=columns)

    for companyMarketCategory in companyMarketCategories:
        url = 'https://mops.twse.com.tw/nas/t21/' + companyMarketCategory + '/t21sc03_' + str(year) + '_' + str(month) + '_0.html'
        try:
            response = requests.get(url)
            response.encoding = 'big5'

            htmlTextParsed = BeautifulSoup(response.text, 'html.parser')
            allCompaniesByIndustrialCategoryTables = htmlTextParsed.find_all(name='table', attrs={'border': 5})

            # the last table is summation
            for allCompaniesPerIndustrialCategoryTable in allCompaniesByIndustrialCategoryTables[:-1]:
                allCompaniesPerIndustrialCategoryRows = allCompaniesPerIndustrialCategoryTable.find_all(name='tr', attrs={'align':'right'})
                # the last row is summation
                for singleCompanyRow in allCompaniesPerIndustrialCategoryRows[:-1]:
                    singleCompanyColumns = singleCompanyRow.find_all('td')
                    singleCompanyDictionary = {}
                    for i in range(len(singleCompanyColumns)):
                        singleCompanyDictionary[columns[i]] = singleCompanyColumns[i].text.strip()
                    monthlyRevenueDataFrame.loc[len(monthlyRevenueDataFrame)] = singleCompanyDictionary
            
        except requests.RequestException as errorMessage:
            print(errorMessage)
        
        # pause for website access limitation
        time.sleep(random.randint(1, 5))

    monthlyRevenueDataFrame.to_csv('./Data/monthly_revenue_' + str(year) + '_' + str(month) + '.csv', index=False)
    return monthlyRevenueDataFrame.shape

In [17]:
monthlyRevenueDataFrameShape = getMonthlyRevenue(113,7)
print(monthlyRevenueDataFrameShape)

(1867, 11)


## 上市 / 上櫃 / 興櫃 財務分析資料查詢彙總表

In [20]:
def getAnnualFinantialAnalysis(year):
    url = 'https://mops.twse.com.tw/mops/web/ajax_t51sb02'

    companyMarketCategories = ['sii', 'otc', 'rotc']

    columns = ['公司代號','公司簡稱','負債佔資產比率(%)','長期資金佔不動產、廠房及設備比率(%)','流動比率(%)','速動比率(%)','利息保障倍數(%)','應收款項週轉率(次)','平均收現日數','存貨週轉率(次)','平均銷貨日數','不動產、廠房及設備週轉率(次)','總資產週轉率(次)','資產報酬率(%)','權益報酬率(%)','稅前純益佔實收資本比率(%)','純益率(%)','每股盈餘(元)','現金流量比率(%)','現金流量允當比率(%)','現金再投資比率(%)']
    annualFinantialAnalysisDataFrame = pd.DataFrame(columns=columns)

    dataForPostRequest = {
        'encodeURIComponent': 1,
        'step': 1,
        'TYPEK': 'sii',
        'year': year,
        'firstin': 1,
        'off': 1,
        'ifrs': 'Y',
    }

    for companyMarketCategory in companyMarketCategories:
        dataForPostRequest['TYPEK'] = companyMarketCategory
        try:
            response = requests.post(url, data=dataForPostRequest)
            
            htmlTextParsed = BeautifulSoup(response.text, 'html.parser')
            allCompaniesTable = htmlTextParsed.find(name='table', attrs={'class': 'hasBorder'})

            allCompaniesRows = allCompaniesTable.find_all('tr', attrs={'class': ['even', 'odd']})

            for singleCompanyRow in allCompaniesRows:
                singleCompanyColumns = singleCompanyRow.find_all('td')
                singleCompanyDictionary = {}
                for i in range(len(singleCompanyColumns)):
                    singleCompanyDictionary[columns[i]] = singleCompanyColumns[i].text.strip()
                annualFinantialAnalysisDataFrame.loc[len(annualFinantialAnalysisDataFrame)] = singleCompanyDictionary
            
        except requests.RequestException as errorMessage:
            print(errorMessage)
        
        # pause for website access limitation
        time.sleep(random.randint(1, 5))    
    annualFinantialAnalysisDataFrame.to_csv('./Data/annual_financial_analysis_' + str(year) + '.csv', index=False)
    return annualFinantialAnalysisDataFrame.shape

In [21]:
annualFinantialAnalysisDataFrameShape = getAnnualFinantialAnalysis(112)
print(annualFinantialAnalysisDataFrameShape)

(2107, 21)


## 上市 / 上櫃 / 興櫃 綜合損益表

In [24]:
def getQuarterlyIncomeStatement(year, season):
    url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb04'

    companyMarketCategories = ['sii', 'otc', 'rotc']

    quarterlyIncomeStatementDataFrame = pd.DataFrame()

    dataForPostRequest = {
        'encodeURIComponent': 1,
        'step': 1,
        'firstin': 1,
        'off': 1,
        'isQuery': 'Y',
        'TYPEK': 'sii',
        'year': str(year),
        'season': '0'+str(season),
    }

    for companyMarketCategory in companyMarketCategories:
        dataForPostRequest['TYPEK'] = companyMarketCategory

        try:
            response = requests.post(url, data=dataForPostRequest)

            htmlTextParsed = BeautifulSoup(response.text, 'html.parser')
            allCompaniesByIndustrialCategoryTables = htmlTextParsed.find_all(name='table', attrs={'class':'hasBorder'})

            for allCompaniesPerIndustrialCategoryTable in allCompaniesByIndustrialCategoryTables:
                columnNames = allCompaniesPerIndustrialCategoryTable.find_all('th')
                columns = []
                for columnName in columnNames:
                    columns.append(columnName.text.strip())

                quarterlyIncomeStatementPerIndustrialCategoryDataFrame = pd.DataFrame(columns=columns)

                allCompaniesPerIndustrialCategoryRows = allCompaniesPerIndustrialCategoryTable.find_all(name='tr', attrs={'class':['even', 'odd']})
                for singleCompanyRow in allCompaniesPerIndustrialCategoryRows:
                    singleCompanyColumns = singleCompanyRow.find_all('td')
                    singleCompanyDictionary = {}
                    for i in range(len(singleCompanyColumns)):
                        singleCompanyDictionary[columns[i]] = singleCompanyColumns[i].text.strip()
                    quarterlyIncomeStatementPerIndustrialCategoryDataFrame.loc[len(quarterlyIncomeStatementPerIndustrialCategoryDataFrame)] = singleCompanyDictionary

                quarterlyIncomeStatementDataFrame = pd.concat([quarterlyIncomeStatementDataFrame, quarterlyIncomeStatementPerIndustrialCategoryDataFrame], ignore_index=True)

        except requests.RequestException as errorMessage:
            print(errorMessage)

        # pause for website access limitation
        time.sleep(random.randint(1, 5))
 
    quarterlyIncomeStatementDataFrame.to_csv('./Data/quarterly_income_statement_' + str(year) + '_' + str(season) + '.csv', index=False)
    return quarterlyIncomeStatementDataFrame.shape

In [25]:
quarterlyIncomeStatementDataFrameShape = getQuarterlyIncomeStatement(113, 1)
print(quarterlyIncomeStatementDataFrameShape)

(1879, 57)
