In [170]:
import requests
from requests.adapters import HTTPAdapter
from bs4 import BeautifulSoup as bs4
import re
import pandas as pd
import numpy as np
import json

class MOPS_ALL_2019(object):
    
    def __init__(self, sid, year, season, rid):
        self.sid=sid
        self.year=year
        self.season=season
        self.rid=rid
        keyword={'sid':self.sid, 'y':self.year, 's':self.season, 'rid':self.rid}
        self.header= {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
        self.url='https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID={sid}&SYEAR={y}&SSEASON={s}&REPORT_ID={rid}'.format(**keyword)
        self.get_tbl()
        
    def fetchall(self):
        self.fetch_BS()
        self.fetch_PLS()
        self.fetch_CFS()
        #self.fetch_SES()
        self.jsondata = {'data': [self.Balance, self.ProfitLoss , self.CashFlows]}
        
        return self.jsondata        
    
    def get_tbl(self):
        web_ss = requests.session()
        ss_adapter = HTTPAdapter(max_retries=3)
        web_ss.mount("https://", adapter=ss_adapter)
        res = web_ss.get(url=self.url, headers=self.header)
        res.encoding = 'big5'
        try:
            if res.status_code == 200:
                #soup = bs4(res.text, "lxml")
                soup = bs4(res.text, "html.parser")
                self.tables = soup.select('table')
        except Exception as e:
            print(e)
            
        return self.tables
            
            
    def fetch_BS(self, Ntable=0):
        start= len(stock.tables[0].select('th'))
        k=start-3          
        code = self.tables[Ntable].select('td')
        
        code_list=[]

        for _code in code:
            try:
                if (_code['style'])!='':
                    code_list.append(_code.text)
            except:
                pass
        #print(code_list)

        cht = self.tables[Ntable].select('span.zh')
        en = self.tables[Ntable].select('span.en')
        amt = self.tables[Ntable].select('td.amt')

        #print(code)
        #print(len(cht[start:]), len(amt)/k)
        #df_cht, df_eng = {} , {}

        Balance_list=[]
        j=0
        for i, _amt in enumerate(amt):

            if i%k==0:
                #print(i, _amt.text)
                #balance_list[code_list[j]]= [{'cht':cht[6+j].text.strip(),'value':_amt.text.strip().replace(',','')}]
                Balance_list.append({'code':code_list[j], 'en':en[start+j].text.strip(), 'cht':cht[start+j].text.strip()
                                      ,'value':_amt.text.strip().replace(',','')})
                j+=1
        if len(code_list)!=len(Balance_list):
           print('length is not matched--BS!!:')      
        #print(self.Balance)
        self.Balance={'Balance':Balance_list}
        return self.Balance
        
    def fetch_PLS(self, Ntable=1):
        start= len(stock.tables[0].select('th'))
        k=start-3
        code = self.tables[Ntable].select('td')
        code_list=[]

        for _code in code:
            try:
                if (_code['style'])!='':
                    code_list.append(_code.text)

            except:
                pass
        #print(code_list)

        cht = self.tables[Ntable].select('span.zh')
        en = self.tables[Ntable].select('span.en')
        amt = self.tables[Ntable].select('td.amt')
        

        ProfitLoss_list=[]
        j=0
        for i, _amt in enumerate(amt):
            if i%k==0:
                ProfitLoss_list.append({'code':code_list[j], 'en':en[start+j].text.strip(), 'cht':cht[start+j].text.strip(),
                                      'value':_amt.text.strip().replace(',','')})
                j+=1
                #print(i, _amt.text)
        
        if len(code_list)!=len(ProfitLoss_list):
           print('length is not matched--PLS!!:')
           
           
        self.ProfitLoss={'ProfitLoss':ProfitLoss_list}

        return self.ProfitLoss
        
    def fetch_CFS(self, Ntable = 2):
        start= len(stock.tables[0].select('th'))
        k=start-3  

        code = self.tables[Ntable].select('td')
        code_list=[]

        for _code in code:
            try:
                if (_code['style'])!='':
                    code_list.append(_code.text)
            except:
                pass
        #print(code_list)

        cht = self.tables[Ntable].select('span.zh')
        en = self.tables[Ntable].select('span.en')
        amt = self.tables[Ntable].select('td.amt')

        #print(code)

        #print(len(cht[start:]), len(amt)/2)

        CashFlows_list=[]
        j=0
        for i, _amt in enumerate(amt):

            if i%k==0:
                CashFlows_list.append({'code':code_list[j], 'en':en[start+j].text.strip(), 'cht':cht[start+j].text.strip(),
                                      'value':_amt.text.strip().replace(',','')})
                j+=1
        
           
        if len(code_list)!=len(CashFlows_list):
            print('length is not matched--CFS!!:') 
        self.CashFlows={'CashFlows':CashFlows_list}  
        
        
        return self.CashFlows
        
        
    def fetch_SES(self, Ntable = 3, start=5, k=2):

        Etable = self.tables[Ntable]
        #print(Etable)

        td=Etable.select('td')

        CI=[]
        for _td in td:
            if re.findall('^[A-Z][0-9]', _td.text.strip()) !=[]:
                CI.append(re.findall('^[A-Z][0-9]', _td.text.strip()))
                #print(code_index[0])


        th=Etable.select('th')
        cht = Etable.select('span.zh')
        en = Etable.select('span.en')
        amt = Etable.select('td.amt')

        #code4=[]
        code4={}

        j=1
        for _th in th:
            if re.findall('^3.+', _th.text.strip()) !=[]:
                code4[_th.text.strip()]={'cName':{'cht': cht[j].text.strip(),'en':en[j].text.strip()}}
                j+=1
        #print(code4)
        #print(code4.keys())

        #index=[]
        index2={}
        for _cht, _en, _CI in zip(cht[j:], en[j:], CI):
            #index.append({'code_index':_CI[0],'cht_index':_cht.text.strip(), 'en_index': _en.text.strip()})
            index2[_CI[0]]={'iName':{'cht': _cht.text.strip(),'en': _en.text.strip()}}
            #print(code_index)
            #print(cht_index)
            #print(en_index)               
        #print(index2)
        #print(index2.keys())

        k1= len(code4)
        k2= len(index2.keys())
        #print(k)
        #for _amt in amt:
        #    print(_amt.text)
        amt_row= [[] for x in range(k1)]
        #print(amt_row)
        j=0
        for i, _amt in enumerate(amt):
            #print(i, _amt.text)
            if i==0 or i % k1!= 0:
                amt_row[j].append(_amt.text.strip().replace(',','').replace('(','-').replace(')',''))
                #print(amt_row[j])
            else:
                j+=1
                if j>k2:
                    break
                else:
                    amt_row[j].append(_amt.text.strip().replace(',','').replace('(','-').replace(')',''))
        #print(amt_row)

        self.df=pd.DataFrame(columns=code4.keys())

        self.df=pd.DataFrame(np.array(amt_row),
                           columns=code4.keys())
        self.df['items']=index2.keys()
        self.df.set_index('items', inplace=True)
        return self.df

In [None]:
import os
import time
import random
from codes import codes


def dlcheck(path):
    filename=os.listdir(path)
    code_cap=[]
    for _file in filename:
        code_cap.append(_file[0:4])
    return code_cap



path='AllRpt'
year='2019'
season='4'
rid='C'

code_cap=dlcheck(path)

for code,v in codes.items():
    if v.type=="股票" and v.market=="上市":
        try:
            if code not in code_cap:                         
                stock=MOPS_ALL_2019(code,year,season,rid)
                filename=path+'/'+code+'-'+year+'-'+'Q'+season+'.json'
                print('get data from:' ,stock.url)
                if stock.tables != []:
                    data = stock.fetchall()
                    with open(filename, 'w', encoding='utf8') as f:
                        json.dump(data, f)
                else:
                    print('error-nodata:', code) 
                    with open('error.log', 'a+') as f:   
                        f.write('nodata:'+ code+':' + stock.url+'\n')
                        
                        
                time.sleep(random.uniform(2,5))
                
        except ConnectionError:
            code_cap=dlcheck(path)
            time.sleep(120)
            continue  

get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1413&SYEAR=2019&SSEASON=4&REPORT_ID=C
error-nodata: 1413
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1414&SYEAR=2019&SSEASON=4&REPORT_ID=C
error-nodata: 1414
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1423&SYEAR=2019&SSEASON=4&REPORT_ID=C
error-nodata: 1423
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1438&SYEAR=2019&SSEASON=4&REPORT_ID=C
error-nodata: 1438
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1456&SYEAR=2019&SSEASON=4&REPORT_ID=C
error-nodata: 1456
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1471&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1472&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1473&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data 

get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1712&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1713&SYEAR=2019&SSEASON=4&REPORT_ID=C
error-nodata: 1713
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1714&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1717&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1718&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1720&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1721&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1722&SYEAR=2019&SSEASON=4&REPORT_ID=C
get data from: https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1723&SYEAR=

# Check

In [None]:
path='AllRpt'
year='2019'
season='4'
rid='C'
code='1203'

stock=MOPS_ALL_2019(code,year,season,rid)
filename=path+'/'+code+'-'+year+'-'+'Q'+season+'.json'
print('get data from:' ,stock.url)
data = stock.fetchall()
with open(filename, 'w', encoding='utf8') as f:
    json.dump(data, f)

#stock.tables[1]

data=json.load(open('AllRpt/1203-2019-Q4.json'))['data']
#print(data[1]['ProfitLoss'])
df=pd.DataFrame(data[1]['ProfitLoss'])
df
