In [1]:
# libraries
import os
import requests
import threading
import pandas as pd
from bs4 import BeautifulSoup as bs4
from requests.adapters import HTTPAdapter

# from Project
from GetSheetStatement import Constractv2


class BalanceSheet(object):

    def __init__(self, sheet: str, sid: str, year: int, season: int):
        #super().__init__()
        self.sheet = Constractv2.FINANCIAL_STATMENT[sheet]
        self.url = Constractv2.MOPS_URL.format(sheet=self.sheet)
        self.header = Constractv2.HEADERS
        self.payload = Constractv2.PAYLOAD
        self.payload ["co_id"] = sid
        self.payload["year"] = int(year)
        self.payload["season"] = f"0{season}"
        self.file_path = Constractv2.FILE_PATH.format(
            sheet="Balance", sid=self.payload["co_id"],year=self.payload["year"] + 1911, season=self.payload["season"])

        print(f"INIT Thread: Balance_Sheet\nURL: {self.url}\nFILE_PATH: {self.file_path}\n")
        pass

    def run(self):
        # set session retry option
        web_ss = requests.session()
        ss_adapter = HTTPAdapter(max_retries=3)
        web_ss.mount("https://", adapter=ss_adapter)
        try:
            # start get html
            res = web_ss.post(url=self.url, headers=self.header, data=self.payload, timeout=3)
            # check iff: Response OK
            if res.status_code == 200:
                soup = bs4(res.text, "html.parser")
                tables = soup.select('table.hasBorder')
                #print(tables)
                return tables
            
        except Exception as e:
            print(f"Get HTML FAIL: {e}", end="\n\n")

            
sheet = 'Balance'
sid = '2330'
year = 109
season= 1

BSS=BalanceSheet(sheet, sid, year, season)
result=BSS.run()

INIT Thread: Balance_Sheet
URL: https://mops.twse.com.tw/mops/web/ajax_t164sb03
FILE_PATH: ./Balance/2330-2020-01.json



In [4]:
even=result[0].select('td.even')
odd=result[0].select('td.odd')

columns_odd=[]
for i, v in enumerate(even):
    if i% 7 ==0 :
        columns_odd.append(v.text.strip())
#print(columns_odd , len(columns_odd))

columns_even=[]
for i, v in enumerate(odd):
    if i% 7 ==0 :
        columns_even.append(v.text.strip())
        
#print(columns_even , len(columns_even))

columns=[]
for _o, _e in zip(columns_odd, columns_even):
    columns.append(_o)
    columns.append(_e)
    
print(columns, len(columns))

['流動資產', '現金及約當現金', '透過損益按公允價值衡量之金融資產－流動', '透過其他綜合損益按公允價值衡量之金融資產－流動', '按攤銷後成本衡量之金融資產－流動', '避險之金融資產－流動', '應收帳款－關係人淨額', '應收帳款淨額', '其他應收款－關係人淨額', '其他流動資產', '存貨', '非流動資產', '流動資產合計', '透過其他綜合損益按公允價值衡量之金融資產－非流動', '按攤銷後成本衡量之金融資產－非流動', '採用權益法之投資', '不動產、廠房及設備', '其他非流動資產', '使用權資產', '資產總額', '無形資產', '短期借款', '遞延所得稅資產', '透過損益按公允價值衡量之金融負債－流動', '非流動資產合計', '應付帳款', '流動負債', '其他應付款', '應付短期票券', '本期所得稅負債', '避險之金融負債－流動', '其他流動負債', '應付帳款－關係人', '非流動負債', '流動負債合計', '遞延所得稅負債', '應付公司債', '其他非流動負債', '租賃負債－非流動', '負債總額', '非流動負債合計', '股本', '歸屬於母公司業主之權益', '股本合計', '普通股股本', '資本公積－發行溢價', '資本公積', '資本公積－採用權益法認列關聯企業及合資股權淨值之變動數', '資本公積-認列對子公司所有權權益變動數', '資本公積－合併溢額', '資本公積－受贈資產', '保留盈餘', '資本公積合計', '特別盈餘公積', '法定盈餘公積', '保留盈餘合計', '未分配盈餘（或待彌補虧損）', '權益總額', '其他權益', '母公司暨子公司所持有之母公司庫藏股股數（單位：股）'] 60


In [8]:
content_odd=[]
for i, v in enumerate(even):
    if i% 7 ==0 :
        content_odd.append(even[i+1].text.strip().replace(',',''))   
#print(content_odd)

content_even=[]
for i, v in enumerate(odd):
    if i% 7 ==0 :
        content_even.append(odd[i+1].text.strip().replace(',',''))   
#print(content_even)

content=[]
for _o, _e in zip(content_odd, content_even):
    content.append(_o)
    content.append(_e)
    
#print(content, len(content))

Newcolumns=['sID','Quarter']
Newcontent=[sid, season]
Newcolumns.extend(columns)
Newcontent.extend(content)

df=pd.DataFrame(columns=Newcolumns)

df.loc[1]=Newcontent


#json = df.to_json(orient='records')

df.to_json(BSS.file_path)

df

Unnamed: 0,sID,Quarter,流動資產,現金及約當現金,透過損益按公允價值衡量之金融資產－流動,透過其他綜合損益按公允價值衡量之金融資產－流動,按攤銷後成本衡量之金融資產－流動,避險之金融資產－流動,應收帳款－關係人淨額,應收帳款淨額,...,資本公積－受贈資產,保留盈餘,資本公積合計,特別盈餘公積,法定盈餘公積,保留盈餘合計,未分配盈餘（或待彌補虧損）,權益總額,其他權益,母公司暨子公司所持有之母公司庫藏股股數（單位：股）
1,2330,1,,430777229,1254253,129219277,302393,0,427888,145992744,...,33336,,56339785,27568179,311146899,1385495748,1046780670,1677028531,,0
