In [2]:
import requests
from requests.adapters import HTTPAdapter
from bs4 import BeautifulSoup as bs4
import re
import pandas as pd
import numpy as np
import json

class MOPS_ALL_2019(object):
    
    def __init__(self, sid, year, season, rid):
        self.sid=sid
        self.year=year
        self.season=season
        self.rid=rid
        keyword={'sid':self.sid, 'y':self.year, 's':self.season, 'rid':self.rid}
        self.header= {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
        self.url='https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID={sid}&SYEAR={y}&SSEASON={s}&REPORT_ID={rid}'.format(**keyword)
        self.get_tbl()
        
    def fetchall(self):
        self.fetch_BS()
        self.fetch_PLS()
        self.fetch_CFS()
        #self.fetch_SES()
        self.jsondata = {'data': [self.Balance, self.ProfitLoss , self.CashFlows]}
        
        return self.jsondata        
    
    def get_tbl(self):
        web_ss = requests.session()
        ss_adapter = HTTPAdapter(max_retries=3)
        web_ss.mount("https://", adapter=ss_adapter)
        res = web_ss.get(url=self.url, headers=self.header, timeout=5)
        res.encoding = 'big5'
        try:
            if res.status_code == 200:
                #soup = bs4(res.text, "lxml")
                soup = bs4(res.text, "html.parser")
                self.tables = soup.select('table')
        except Exception as e:
            print(e)
            
        return self.tables
            
            
    def fetch_BS(self, Ntable=0):
        start= len(stock.tables[Ntable].select('th'))
        k=start-3          
        code = self.tables[Ntable].select('td')
        
        code_list=[]

        for _code in code:
            try:
                if (_code['style'])!='':
                    code_list.append(_code.text)
            except:
                pass
        #print(code_list)

        cht = self.tables[Ntable].select('span.zh')
        en = self.tables[Ntable].select('span.en')
        amt = self.tables[Ntable].select('td.amt')

        #print(code)
        #print(len(cht[start:]), len(amt)/k)
        #df_cht, df_eng = {} , {}

        Balance_list=[]
        j=0
        for i, _amt in enumerate(amt):

            if i%k==0:
                #print(i, _amt.text)
                #balance_list[code_list[j]]= [{'cht':cht[6+j].text.strip(),'value':_amt.text.strip().replace(',','')}]
                Balance_list.append({'code':code_list[j], 'en':en[start+j].text.strip(), 'cht':cht[start+j].text.strip()
                                      ,'value':_amt.text.strip().replace(',','')})
                j+=1
        if len(code_list)!=len(Balance_list):
            print('length is not matched--BS!!:')      
        #print(self.Balance)
        self.Balance={'Balance':Balance_list}
        return self.Balance
        
    def fetch_PLS(self, Ntable=1):
        start= len(stock.tables[Ntable].select('th'))
        k=start-3
        #print(start, k)
        code = self.tables[Ntable].select('td')
        code_list=[]

        for _code in code:
            try:
                if (_code['style'])!='':
                    code_list.append(_code.text)

            except:
                pass
        #print(code_list, len(code_list))

        cht = self.tables[Ntable].select('span.zh')
        en = self.tables[Ntable].select('span.en')
        amt = self.tables[Ntable].select('td.amt')
        

        ProfitLoss_list=[]
        j=0
        for i, _amt in enumerate(amt):
            if i%k==0:
                ProfitLoss_list.append({'code':code_list[j], 'en':en[start+j].text.strip(), 'cht':cht[start+j].text.strip(),
                                      'value':_amt.text.strip().replace(',','')})
                j+=1
                #print(i, _amt.text)
        
        if len(code_list)!=len(ProfitLoss_list):
            print('length is not matched--PLS!!:')
           
           
        self.ProfitLoss={'ProfitLoss':ProfitLoss_list}

        return self.ProfitLoss
        
    def fetch_CFS(self, Ntable = 2):
        start= len(stock.tables[Ntable].select('th'))
        k=start-3
        
        code = self.tables[Ntable].select('td')
        code_list=[]

        for _code in code:
            try:
                if (_code['style'])!='':
                    code_list.append(_code.text)
            except:
                pass
        #print(code_list)

        cht = self.tables[Ntable].select('span.zh')
        en = self.tables[Ntable].select('span.en')
        amt = self.tables[Ntable].select('td.amt')

        #print(code)

        #print(len(cht[start:]), len(amt)/2)

        CashFlows_list=[]
        j=0
        for i, _amt in enumerate(amt):

            if i%k==0:
                CashFlows_list.append({'code':code_list[j], 'en':en[start+j].text.strip(), 'cht':cht[start+j].text.strip(),
                                      'value':_amt.text.strip().replace(',','')})
                j+=1
        
           
        if len(code_list)!=len(CashFlows_list):
            print('length is not matched--CFS!!:') 
        self.CashFlows={'CashFlows':CashFlows_list}  
        
        
        return self.CashFlows
        
        
    def fetch_SES(self, Ntable = 3):
        #start= len(stock.tables[Ntable].select('th'))
        #k=start-3

        Etable = self.tables[Ntable]
        #print(Etable)

        td=Etable.select('td')

        CI=[]
        for _td in td:
            if re.findall('^[A-Z][0-9]', _td.text.strip()) !=[]:
                CI.append(re.findall('^[A-Z][0-9]', _td.text.strip()))
                #print(code_index[0])


        th=Etable.select('th')
        cht = Etable.select('span.zh')
        en = Etable.select('span.en')
        amt = Etable.select('td.amt')

        #code4=[]
        self.code4={}

        j=1
        for _th in th:
            if re.findall('^3.+', _th.text.strip()) !=[]:
                self.code4[_th.text.strip()]={'cName':{'cht': cht[j].text.strip(),'en':en[j].text.strip()}}
                j+=1
        #print(code4)
        #print(code4.keys())

        #index=[]
        self.index2={}
        for _cht, _en, _CI in zip(cht[j:], en[j:], CI):
            #index.append({'code_index':_CI[0],'cht_index':_cht.text.strip(), 'en_index': _en.text.strip()})
            self.index2[_CI[0]]={'iName':{'cht': _cht.text.strip(),'en': _en.text.strip()}}
            #print(code_index)
            #print(cht_index)
            #print(en_index)               
        #print(index2)
        #print(index2.keys())

        k1= len(self.code4)
        k2= len(self.index2.keys())
 
        #for _amt in amt:
        #    print(_amt.text)
        amt_row= [[] for x in range(k2)]

        j=0
        for i, _amt in enumerate(amt):
            #print(i, _amt.text)
            if i==0 or i % k1!= 0:
                amt_row[j].append(_amt.text.strip().replace(',','').replace('(','-').replace(')',''))
                #print(amt_row[j])
            else:
                j+=1
                if j<k2:
                    amt_row[j].append(_amt.text.strip().replace(',','').replace('(','-').replace(')',''))
                else:
                    break
                    
        #print(amt_row)

        self.df=pd.DataFrame(columns=self.code4.keys())
        self.df=pd.DataFrame(np.array(amt_row),columns=self.code4.keys())
        self.df['items']=self.index2.keys()
        self.df.set_index('items', inplace=True)
        
        self.Account={'code4':self.code4, 'index2':self.index2}
        return self.df

# Fetch DATA

In [None]:
import os
import time
import random
from requests.exceptions import ConnectionError
from codes import codes


def dlcheck(path):
    filename=os.listdir(path)
    code_cap=[]
    for _file in filename:
        code_cap.append(_file[0:4])
    return code_cap

year='2019'
season='4'


#path='AllRpt'
path=year+'Q'+season
           




if not os.path.exists(path):
    os.mkdir(path)
code_cap=dlcheck(path)

csvpath=year+'Q'+season+'SES'
if not os.path.exists(csvpath):
    os.mkdir(csvpath)
    
code_cap_csv=dlcheck(csvpath)


for code,v in codes.items():
    if v.type=="股票" and v.market=="上市":
        try:
            #fetch BS, PLS and CFS
            if code not in code_cap:
                rid='C'
                stock=MOPS_ALL_2019(code,year,season,rid)
                print('get data from:' ,stock.url)
                
                if stock.tables == []:
                    rid = 'A'
                    print('get data again (rid=A) from:' ,stock.url)
                    stock=MOPS_ALL_2019(code,year,season,rid)        
                if stock.tables != []:
                    filename=path+'/'+code+'-'+year+'-'+'Q'+season+'.json'
                    data = stock.fetchall()
                    with open(filename, 'w', encoding='utf8') as f:
                        json.dump(data, f)
                else:
                    print('error-nodata:', code) 
                    with open('error.log', 'a+') as f:   
                        f.write('nodata:'+ code+':' + stock.url+'\n')
            # fetch SES
            if code not in code_cap_csv:
                rid='C'
                stock=MOPS_ALL_2019(code,year,season,rid)
                
                if stock.tables == []:
                    rid = 'A'
                    print('get data again (rid=A) from:' ,stock.url)
                    stock=MOPS_ALL_2019(code,year,season,rid)
                    
                
                if stock.tables != []:
                    df = stock.fetch_SES()
                    csvname=csvpath+'/'+code+'-'+year+'-'+'Q'+season+'.csv'
                    df.to_csv(csvname)

                    jsonname=csvpath+'/'+code+'-'+year+'-'+'Q'+season+'.json'
                    with open(jsonname, 'w', encoding='utf8') as f:
                        json.dump(stock.Account, f)
                else:
                    print('error-nodata:', code) 
                    with open('error.log', 'a+') as f:   
                        f.write('nodata:'+ code+':' + stock.url+'\n')
                        
                        
                time.sleep(random.uniform(2,5))
                
        except ConnectionError:
            code_cap=dlcheck(path)
            time.sleep(120)
            continue
print('download finished!')

get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1413&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1414&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1423&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1438&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1456&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1516&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1527&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data again (rid=A) from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1540&SYEAR=2019&SSEASON=4&REPO

# Check ALL

In [213]:
#path='AllRpt'
year='2019'
season='4'
rid='C'
code='1101'

#path='AllRpt'
path=year+'Q'+season

if not os.path.exists(path):
    os.mkdir(path)
code_cap=dlcheck(path)


stock=MOPS_ALL_2019(code,year,season,rid)
filename=path+'/'+code+'-'+year+'-'+'Q'+season+'.json'
print('get data from:' ,stock.url)
data = stock.fetchall()
with open(filename, 'w', encoding='utf8') as f:
    json.dump(data, f)

#stock.tables[1]

data=json.load(open(filename))['data']
#print(data[1]['ProfitLoss'])
df=pd.DataFrame(data[1]['ProfitLoss'])
df

get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2019&SSEASON=4&REPORT_ID=C


Unnamed: 0,code,en,cht,value
0,,Operating revenue,營業收入,
1,4000,Total operating revenue,營業收入合計,122783014
2,,Operating costs,營業成本,
3,5000,Total operating costs,營業成本合計,86872759
4,5900,Gross profit (loss) from operations,營業毛利（毛損）,35910255
...,...,...,...,...
56,9710,Basic earnings (loss) per share from continuin...,繼續營業單位淨利（淨損）,4.43
57,9750,Total basic earnings per share,基本每股盈餘合計,4.43
58,,Diluted earnings per share,稀釋每股盈餘,
59,9810,Diluted earnings (loss) per share from continu...,繼續營業單位淨利（淨損）,4.25


# Equity Check

In [10]:
import os

year='2019'
season='3'
rid='C'
code='1101'

csvpath=year+'Q'+season+'SES'

if not os.path.exists(csvpath):
    os.mkdir(csvpath)

stock=MOPS_ALL_2019(code,year,season,rid)

csvname=csvpath+'/'+code+'-'+year+'-'+'Q'+season+'.csv'
print('get data from:' ,stock.url)


df = stock.fetch_SES()
df.to_csv(csvname)


jsonname=csvpath+'/'+code+'-'+year+'-'+'Q'+season+'.json'
with open(jsonname, 'w', encoding='utf8') as f:
    json.dump(stock.Account, f)

df

get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2019&SSEASON=3&REPORT_ID=C


Unnamed: 0_level_0,3110,3120,3130,3140,3150,3190,3100,3200,3310,3320,...,3300,3410,3420,3425,3450,3400,3500,31XX,36XX,3XXX
items,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1,51080599.0,2000000.0,,,,,53080599.0,47836241.0,14784534.0,13049062.0,...,61588761.0,-5037221.0,24074566.0,0.0,1109.0,19038454.0,-2545.0,181541510,15837946.0,197379456
A3,0.0,0.0,,,,,0.0,,0.0,0.0,...,0.0,,,,,,,0,0.0,0
A4,0.0,0.0,,,,,0.0,,0.0,0.0,...,0.0,,,,,,,0,0.0,0
A5,51080599.0,2000000.0,,,,,53080599.0,47836241.0,14784534.0,13049062.0,...,61588761.0,-5037221.0,24074566.0,0.0,1109.0,19038454.0,-2545.0,181541510,15837946.0,197379456
B1,,,,,,,,,2118082.0,,...,0.0,,,,,,,0,,0
B3,,,,,,,,,,,...,,,,,,,,0,,0
B5,,,,,,,,,,,...,-16856367.0,,,,,,,-16856367,,-16856367
B7,,,,,,,,,,,...,-18219.0,,,,,,,-18219,,-18219
B9,3575593.0,,,,,,3575593.0,,,,...,-3575593.0,,,,,,,0,,0
C1,,,,,,,,,,,...,,,,,,,,0,,0
