In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import re
import random
import tqdm
from tqdm.contrib import tzip

In [None]:
# 전처리를 위한 함수 구현
def replacer(text, input):
    for garbage in input:
        text = text.replace(garbage, "")
    return text

In [None]:
class KIND_REPORTS:
    __kind_url = "https://kind.krx.co.kr/disclosure/details.do"
    __kind_header = {
        'authority': 'kind.krx.co.kr',
        'method': 'POST',
        'path': '/disclosure/details.do',
        'scheme': 'https',
        'accept': 'text/html, */*; q=0.01',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
        'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'origin': 'https://kind.krx.co.kr',
        'referer': 'https://kind.krx.co.kr/disclosure/details.do?method=searchDetailsMain',
        'sec-ch-ua': """"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105""""",
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': "Windows",
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
        'x-requested-with': 'XMLHttpRequest'
    }
    __kind_params ={
        'method': 'searchDetailsSub',
        'currentPageSize': '100',
        'pageIndex': '1',
        'orderMode': '1',
        'orderStat': 'D',
        'forward': 'details_sub',
        'disclosureType01': '',
        'disclosureType02': '',
        'disclosureType03': '',
        'disclosureType04': '',
        'disclosureType05': '',
        'disclosureType06': '',
        'disclosureType07': '',
        'disclosureType08': '',
        'disclosureType09': '',
        'disclosureType10': '',
        'disclosureType11': '',
        'disclosureType13': '',
        'disclosureType14': '',
        'disclosureType20': '',
        'pDisclosureType01': '',
        'pDisclosureType02': '',
        'pDisclosureType03': '',
        'pDisclosureType04': '',
        'pDisclosureType05': '',
        'pDisclosureType06': '',
        'pDisclosureType07': '',
        'pDisclosureType08': '',
        'pDisclosureType09': '',
        'pDisclosureType10': '',
        'pDisclosureType11': '',
        'pDisclosureType13': '',
        'pDisclosureType14': '',
        'pDisclosureType20': '',
        'searchCodeType': '',
        'repIsuSrtCd': '',
        'allRepIsuSrtCd': '',
        'oldSearchCorpName': '',
        'disclosureType': '',
        'disTypevalue': '',
        'reportNm': '',
        'reportCd': '',
        'searchCorpName': '',
        'business': '',
        'marketType': '',
        'settlementMonth': '',
        'securities': '',
        'submitOblgNm': '',
        'enterprise': '',
        'fromDate': '2',
        'toDate': '',
        'reportNmTemp': '',
        'reportNmPop': '',
        'bfrDsclsType': 'on'
    }

    def __init__(self):
        self.__sleep_min = 0.2
        self.__sleep_max = 0.5
        self.report_list = []
        self.report_content = []
        pass
    
    def __isResultExist(self, what_result):
        if len(what_result) != 0:
            print("Warning! Previous "+ str(what_result)+" remains")
            answer = input("Do you want to continue and replace the result? [y/n] :")
            if answer == 'y':
                pass
            else:
                raise Exception("Process closed")
        else:
            pass

    def set_sleep(self, min=0.2, max=0.5):
        self.__sleep_min = min
        self.__sleep_max = max
    
    def get_list(self, report, start, end, market_type, submit_by=""):
        '''
        marker_type = 1 : 코스피, 2 코스닥 / 검색어는 report에 입력
        '''
        self.__isResultExist(self.report_list)
        self.market_type = market_type
        params = self.__kind_params   # __kind_params 인수를 복제해서 사용
        params['reportNm'] = report
        params['fromDate'] = start
        params['toDate'] = end
        params['marketType'] = market_type
        params['reportNmTemp'] = report

        if submit_by == 'krx':
            submit_by = "유가증권시장본부"
        params['submitOblgNm'] = submit_by
        res = requests.post(self.__kind_url, headers=self.__kind_header, data=params) # 쿼리 날려요
        bs = BeautifulSoup(res.text, 'html.parser')

        end_page_loc = str(bs).find('</strong>/')
        page_length = int(str(bs)[end_page_loc+len('</strong>/'):end_page_loc+str(bs)[end_page_loc:].find('\xa0')])

        report_date = []
        company_name = []
        report_code = []
        report_name = []
        isCorrection = []

        for i in tqdm.tqdm(range(page_length)):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            params['pageIndex'] = str(i+1)
            res = requests.post(self.__kind_url, headers=self.__kind_header, data=params) # 쿼리 날려요
            bs = BeautifulSoup(res.text, 'html.parser')

            tags = bs.find_all("a")

            for tag in tags:
                if tag.get('href') == "#companysum":
                    company_name.append(tag.get('title'))

                elif tag.get('href') == "#viewer":
                    tmp_code = re.findall(r'\d+', str(tag.get('onclick')))[0]
                    report_code.append(tmp_code)
                    report_date.append(tmp_code[:4]+'-'+tmp_code[4:6]+'-'+tmp_code[6:8])                
                    report_name.append(tag.get('title'))

                    if '정정' in str(tag):
                        isCorrection.append('YES')
                    else:
                        isCorrection.append('NO')

        self.report_list = pd.DataFrame([report_date,company_name,report_code,report_name,isCorrection]).T
        self.report_list.columns = ['공시일','회사명','코드','보고서명','정정신고']
        # 자회사인 경우는 drop
        self.report_list = self.report_list.loc[~self.report_list["보고서명"].str.contains("자회사"), :].reset_index(drop=True)

        #if report == '상호변경':
        #    self.report_list = self.report_list.loc[(self.report_list['보고서명'] == '상호변경안내') | (self.report_list['보고서명'] == '변경상장(상호변경)')]
        #    self.report_list.reset_index(drop=True)

        # 회사명이 None인 경우를 해결해보자
        if self.report_list['회사명'].isnull().sum() >= 1:
            df_null = self.report_list.loc[self.report_list['회사명'].isnull()]
            index_list = df_null.index
            date_list = df_null['공시일'].values
            report_l = df_null['보고서명'].values

            find_name = []
            for date, r in zip(date_list, report_l):
                name_tag = self.__find_none(report = r, start=date,end = date, market_type=market_type)
                find_name.append(name_tag)
            
            for indx, name in zip(index_list, find_name):
                self.report_list['회사명'].loc[indx] = name
        print()
        print('Jobs Done')
        print('check the result with .report_list')
    
    # v6에서 추가
    def __find_none(self, report, start, end, market_type, submit_by=""):

        if len(report) > 6:
            report = report[:5]  #띄어쓰기는 검색이 안되는 오류를 잡기 위해서 추가

        params = self.__kind_params   # __kind_params 인수를 복제해서 사용
        params['reportNm'] = report
        params['fromDate'] = start
        params['toDate'] = end
        params['marketType'] = market_type
        params['reportNmTemp'] = report
        
        if submit_by == 'krx':
            submit_by = "유가증권시장본부"
        params['submitOblgNm'] = submit_by
        res = requests.post(self.__kind_url, headers=self.__kind_header, data=params) # 쿼리 날려요
        bs = BeautifulSoup(res.text, 'html.parser')
        tag = bs.a
        tag = tag.text.replace(' ', '')  # tag가 해당 회사 이름이 된다
        return tag

    def __engine_read_report(self, doc):
        url = "https://kind.krx.co.kr/common/disclsviewer.do?method=search&acptno=" + doc + "&docno=&viewerhost=&viewerport="
        res = requests.get(url) # 쿼리 날려요
        bs = BeautifulSoup(res.text, 'html.parser')
        
        # 기업공시코드를 찾는다
        h1_tag = bs.find_all("h1")[0]
        company_code = re.findall(r"\d{6,6}", str(h1_tag))[0]

        # url을 찾는다
        option_tag = bs.find_all("option")
        select_str = ''

        for tag in option_tag:
            if str(tag).startswith('<option selected="selected"'):
                select_str = str(tag)
                if "[정정]" in select_str:
                    return ('No','No','No', 'No') #정정공시일 경우에는 읽지 않는다(기존공시의 부수로 읽을 거기 때문에)
                else:
                    new_doc = re.findall(r'\d{6,}', str(option_tag))
                    doc_code = None  # 현재 읽은 공시의 코드를 리턴한다

                    url = "https://kind.krx.co.kr/common/disclsviewer.do?method=searchContents&docNo=" + str(new_doc[0]) #최초의 공시로 쿼리
                    doc_code = new_doc[0]
                    after_list = new_doc[1:] # 쿼리를 날린 0번째를 제외한 나머지를    

                    res = requests.get(url) # 쿼리 날려요
                    bs = str(BeautifulSoup(res.text, 'html.parser'))

                    inner_url = re.findall('https://[a-z0-9/.]+\.htm', bs)[0] #v7코드개선
                    res = requests.get(inner_url) # 쿼리 날려요
                    html = res.content.decode('utf-8','replace')
                    bs = BeautifulSoup(html, 'html.parser')

                    text_only_list = []

                    if self.read_what == 'sales':
                        text_only_list.append([tag.get_text() for tag in bs.find_all('span')])
                    else:
                        if '▶ 업종코드 :' not in str(bs):
                            tags = bs.find_all('td')
                            for tag in tags:
                                text = tag.get_text()
                                text = text.replace('\r\n', ', ')
                                text = replacer(text, [", \n","\r",'\n'])
                                text_only_list.append(text)
                        elif '▶ 업종코드 :' in str(bs): # 유가증권시장본부가 공시하는 '변경상장(상호변경)'
                            text = replacer(bs.get_text(), [", \n","\r",'\n','   → ','- '])
                            text = text.split('    ')

                            if len(text) <=4:
                                return "No","No","No","No"

                            text_only_list = text_only_list + replacer(text[3], ['(영문명',')','보통주','우선주']).split(': ')
                            text_only_list = text_only_list + replacer(text[5], ['(영문명',')','보통주','우선주']).split(': ')

                    if len(after_list) <=0:
                        after_list = '없음' 

                    return text_only_list, after_list, doc_code, company_code # after_list는 이후 정정된 공시코드 리스트임 / doc_code는 지금 읽은 공시의 코드
    
    # v7 추가
    def __doc_filter(self, doc_list):    # 필터 기업코드 찾는 걸로 손봐야한다
        '''정정된 공시를 제외한 첫번째 공시만 읽어온다'''
        self.__isResultExist(self.report_content)
        self.read_what = 'filter'

        announce_date = doc_list['공시일'].values
        name_ar = doc_list['회사명'].values
        code_ar = doc_list['코드'].values
        report_list = doc_list['보고서명'].values
        rereport_list = doc_list['정정신고'].values

        preprocessed_data = []

        for code, name, date, report, rereport in tzip(code_ar, name_ar, announce_date, report_list, rereport_list):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            url = "https://kind.krx.co.kr/common/disclsviewer.do?method=search&acptno=" + code + "&docno=&viewerhost=&viewerport="
            res = requests.get(url) # 쿼리 날려요
            bs = BeautifulSoup(res.text, 'html.parser')
            option_tag = bs.find_all("option")
            select_str = ''

            for tag in option_tag:
                if str(tag).startswith('<option selected="selected"'):
                    select_str = str(tag)
                    if "[정정]" in select_str:
                        continue #정정공시일 경우에는 읽지 않는다(기존공시의 부수로 읽을 거기 때문에)
                    preprocessed_data.append([date, name, code, report, rereport])

        return pd.DataFrame(preprocessed_data, columns=["공시일","회사명","코드","보고서명","정정신고"])
    
    def read_business_change(self, doc_list):
        '''업종변경'''
        self.__isResultExist(self.report_content)
        self.read_what = 'business'

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []
        err_lst = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code= self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []): # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []
                
            # 예외는 여기서 처리
            if (pd.to_datetime(date) >= pd.to_datetime("2017-06-30")) & (pd.to_datetime(date) <= pd.to_datetime("2017-07-03")):
                if self.market_type == '2':
                    split_list = text_only_list[0].split('         ')
                    bef = ''.join([split_list[5] , split_list[6]]).replace('\xa0\xa0 ', '')
                    aft = ''.join([split_list[8] , split_list[9]]).replace('\xa0\xa0 ', '')
                    change_day = split_list[11].replace("4. 변경일 : ", '')
                    reason = split_list[12].replace("5. 변경사유 : ", '')
                    list_of_corp.extend([date, change_day, name, company_code, bef, aft, reason,"없음",correct_list, now_doc])
                elif self.market_type == '1':
                    split_list = text_only_list[3].split('         ')
                    if "변경일" in split_list[10]:
                        bef = ''.join([split_list[4] , split_list[5]]).replace('\xa0', '')
                        aft = ''.join([split_list[8] , split_list[9]]).replace('\xa0', '')
                        change_day = split_list[10].replace("4. 변경일 :", '')
                        reason = split_list[11].replace("5. 변경사유 :", '')
                    else:
                        bef = ''.join([split_list[4] , split_list[5]]).replace('\xa0', '')
                        aft = ''.join([split_list[8] , split_list[9]]).replace('\xa0', '')
                        change_day = split_list[11].replace("4. 변경일 :", '')
                        reason = split_list[12].replace("5. 변경사유 :", '')
                    list_of_corp.extend([date, change_day, name, company_code, bef, aft, reason,"-",correct_list, now_doc])
            else:
                if self.market_type == '1': # 코스피
                    list_of_corp.extend([date, text_only_list[14], text_only_list[1], company_code, text_only_list[5], text_only_list[10], text_only_list[16],text_only_list[20],correct_list, now_doc])     
                elif self.market_type == '2': # 코스닥
                    list_of_corp.extend([date, text_only_list[12], text_only_list[1], company_code, text_only_list[4], text_only_list[7], text_only_list[10],text_only_list[14],correct_list, now_doc])

            preprocessed_data.append(list_of_corp) 
        
        self.report_content = pd.DataFrame(preprocessed_data, columns=["공시일","변경일","회사명","기업공시코드","변경전","변경후","변경사유","기타","정정공시코드","현재공시코드"])
        print()
        print('Jobs Done')
        print('check the result with .report_content')

    def read_company_name_change(self, doc_list):
        '''상호변경 -- Not Detail ver. (상호변경 횟수만 카운트)'''
        self.__isResultExist(self.report_content)
        self.read_what = 'name'

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []
        err_lst = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code= self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []): # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []
            
            if self.market_type =='1':
                list_of_corp.extend([name, company_code, date, report_n, correct_list, now_doc])
                preprocessed_data.append(list_of_corp)
            elif self.market_type =='2':
                if "변경상장" in str(report_n):
                    list_of_corp.extend([name, company_code, date, report_n, correct_list, now_doc])
                    preprocessed_data.append(list_of_corp)
        
        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업코드","공시일","보고서명","정정공시코드","현재공시코드"])
        print()
        print('Jobs Done')
        print('check the result with .report_content')
#    def read_company_name_change(self, doc_list):
#        '''상호변경 - 미완성인 정확한 버전'''
#        self.__isResultExist(self.report_content)   #상호변경은 정정공시의 정보가 쓸모없다 판단해서 최초 공시만 읽는다(정정무시)
#        self.read_what = 'business'
#        doc_list = doc_list['코드']
#
#        tmp_date = []
#        tmp_kr_before = []
#        tmp_kr_after = []
#        tmp_us_before = []
#        tmp_us_after = []
#        correct_code = [] # 정정을 담는다
#        now_doc_list = []
#        company_code_list = []
#
#        for i in tqdm.tqdm(range(len(doc_list))):
#            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
#            doc = doc_list.copy()[i]
#            text_only_list, correct_list, now_doc, company_code = self.__engine_read_report(doc) # 쿼리 날려요 두번 날려요
#            
#            if text_only_list == 'No': # 정정공시일 경우에는 None을 리턴해서 SKIP
#                continue
#            
#            print(text_only_list)
#
#            if len(text_only_list) == 4: # 유가증권시장본부가 공시하는 '변경상장(상호변경)'
#                tmp_date.append(doc[0:4]+'-'+doc[4:6]+'-'+doc[6:8])
#                tmp_kr_before.append(text_only_list[0])
#                tmp_kr_after.append(text_only_list[2])
#                tmp_us_before.append(text_only_list[1])
#                tmp_us_after.append(text_only_list[3])
#                correct_code.append(correct_list)
#                now_doc_list.append(now_doc)
#                company_code_list.append(company_code)
#
#            elif len(text_only_list) == 23: # 코스닥시장본부에서 공시하는 '변경상장(상호변경)'
#                tmp_date.append(doc[0:4]+'-'+doc[4:6]+'-'+doc[6:8])
#                tmp_kr_before.append(text_only_list[1])
#                tmp_kr_after.append(text_only_list[7])
#                tmp_us_before.append('-')
#                tmp_us_after.append(text_only_list[9])
#                correct_code.append(correct_list)
#                now_doc_list.append(now_doc)
#                company_code_list.append(company_code)
#
#            elif len(text_only_list) == 21: # 각 회사에서 공시하는 '상호변경안내'
#                tmp_date.append(doc[0:4]+'-'+doc[4:6]+'-'+doc[6:8])
#                tmp_kr_before.append(text_only_list[3])
#                tmp_kr_after.append(text_only_list[8])
#                tmp_us_before.append(text_only_list[5])
#                tmp_us_after.append(text_only_list[10])
#                correct_code.append(correct_list)
#                now_doc_list.append(now_doc)
#                company_code_list.append(company_code)
#
#        result_dict = {
#                '공시일': tmp_date,
#                '변경전(한글)': tmp_kr_before,
#                '변경후(한글)': tmp_kr_after,
#                '변경전(영문)': tmp_us_before,
#                '변경후(영문)': tmp_us_after,
#                '정정공시코드': correct_code,
#                '현재공시코드': now_doc_list,
#                '기업공시코드': company_code_list
#            }
#
#        self.report_content = pd.DataFrame(result_dict)
#        self.report_content['변경전(한글)'] = self.report_content['변경전(한글)'].apply(lambda x: replacer(x, ['주식회사 ', '(주)']))
#        self.report_content['변경후(한글)'] = self.report_content['변경후(한글)'].apply(lambda x: replacer(x, ['주식회사 ', '(주)']))
#        print()
#        print('Jobs Done')
#        print('check the result with .report_content')

    def read_sales_report(self, doc_list): 
        '''단일판매ㆍ공급계약체결'''
        self.__isResultExist(self.report_content)
        self.read_what = 'sales' 
         
        code_ar = doc_list['코드'].values
        name_ar = doc_list['회사명'].values
        report_title = doc_list["보고서명"].values
        correct_ar = doc_list['정정신고'].values
        announce_date = doc_list['공시일'].values

        preprocess_Data = []
        
        for code, name, correct, date,title in tzip(code_ar, name_ar, correct_ar, announce_date, report_title): 
            if "기타" in str(title):
                continue
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text, after_list, now_code, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요
            if text == 'No': # 정정공시일 경우에는 None을 리턴해서 SKIP
                continue
            
            text = text[0]
            list_of_corp = []

            if self.market_type == '1': # 코스피
                list_of_corp.extend([name, company_code, date, correct, text[6],text[9],text[11],text[13], text[30], '-', '-', after_list, now_code])
                    # 정정인경우
                #elif correct == "YES":
                #    if '금액' in text[13]:
                #        list_of_corp.extend([name, date, correct, '-','-','-','-', text[6], text[2], text[8]], '-', now_code)
                #    else:
                #        list_of_corp.extend([name, date, correct, '-','-','-','-', text[6], text[2], text[8]], '-', now_code)
                preprocess_Data.append(list_of_corp)

            elif self.market_type =='2':   # 코스닥
                # v7에서 예전 코스닥 공시 읽는 문제를 해결하기 위해서 추가
                if  pd.to_datetime(date) < pd.to_datetime("2018-07-29"):
                    list_of_corp.extend([name, company_code, date, correct, text[4], text[7], text[9], text[11], text[33], '-','-',after_list, now_code])
                else:
                    if text[9] == '-':
                        continue
                    else:
                        list_of_corp.extend([name,company_code, date, correct, text[4],text[9],text[15],text[17], text[45], '-', '-', after_list, now_code])
                # 정정인경우
                #elif correct == 'YES': 
                #    if '금액' in text[8]:
                #        list_of_corp.extend([name, date, correct, '-','-','-','-', text[6], text[2], text[8]], '-', now_code)
                #    else:
                #        list_of_corp.extend([name, date, correct, '-','-','-','-', text[6], text[2], text[8]], '-', now_code)     
                preprocess_Data.append(list_of_corp)

        df = pd.DataFrame(preprocess_Data, columns=['회사명',"기업공시코드",'공시일','정정공시존재여부','계약명','계약금액','최근매출액','매출액 대비(%)','계약(수주)일자', '정정공시일','정정공시사유', "정정공시코드","현재공시코드"])
        self.report_content = df 
        print()
        print('Jobs Done')
        print('check the result with .report_content')
    
    def read_right_issue(self, doc_list):   # 아직 미구현임!!!
        '''(v7수정사항) 좀 더 정확한 "추가상장 유상증자" 검색어를 입력해야 한다'''
        self.__isResultExist(self.report_content)
        self.read_what = 'right_issue'
        
        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values

        preprocessed_data = []

        for code, name, date in tzip(code_ar, name_ar, announce_date):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, after_list, now_code, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요
            list_of_corp = []

            if text_only_list == 'No': # 정정공시일 경우에는 None을 리턴해서 SKIP
                continue
            
            if self.market_type == '1': #코스피
                continue
            else:   #코스닥인 경우
                list_of_corp.extend([name,company_code, date, text_only_list[-4], text_only_list[-3], text_only_list[-2]])

            preprocessed_data.append(list_of_corp)

        self.report_content = pd.DataFrame(preprocessed_data, columns=['회사명','기업공시코드','공시일','발행방법','발행주식수(주)','발행가(원)'])
        print()
        print('Jobs Done')
        print('check the result with .report_content')
    
    def read_cb_issue(self, doc_list):   # v7추가: 전환사채 발행
        ''' "전환사채권발행결정"으로 검색한다 '''
        self.__isResultExist(self.report_content)
        self.read_what = 'cb_issue'
        
        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            if ("기타" in str(report_n)) or ("철회" in str(report_n)):
                continue
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, after_list, now_code,company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []):
                continue
        
            list_of_corp = []

            if pd.to_datetime(date) > pd.to_datetime("2020-07-05"):
                if "공 시 대 리 인" in text_only_list[25]: # +14를 해주면 된다
                    list_of_corp.extend([name,company_code, date, text_only_list[29+14], text_only_list[31+14],text_only_list[63+14], text_only_list[59+14], text_only_list[61+14], text_only_list[69+14], code])
                
                elif "답변입니다" in text_only_list[25]:
                    list_of_corp.extend([name,company_code, date, text_only_list[29+1], text_only_list[31+1],text_only_list[63+1], text_only_list[59+1], text_only_list[61+1], text_only_list[69+1], code]) 
                else:
                    list_of_corp.extend([name,company_code, date, text_only_list[29], text_only_list[31],text_only_list[63], text_only_list[59], text_only_list[61], text_only_list[69], code]) 
                preprocessed_data.append(list_of_corp)
            elif pd.to_datetime(date) > pd.to_datetime("2019-12-07"):
                if "공 시 대 리 인" in text_only_list[25]: # +14를 해주면 된다
                    list_of_corp.extend([name,company_code, date, text_only_list[29+14], text_only_list[31+14],text_only_list[61+14], text_only_list[57+14], text_only_list[59+14], text_only_list[67+14], code])
                elif "답변입니다" in text_only_list[25]:
                    list_of_corp.extend([name,company_code, date, text_only_list[29+1], text_only_list[31+1],text_only_list[61+1], text_only_list[57+1], text_only_list[59+1], text_only_list[67+1], code])                                         
                else:
                    list_of_corp.extend([name,company_code, date, text_only_list[29], text_only_list[31],text_only_list[61], text_only_list[57], text_only_list[59],text_only_list[67], code])
                preprocessed_data.append(list_of_corp)
            else: 
                if "공 시 대 리 인" in text_only_list[25]: # +14를 해주면 된다
                    print("여기!")
                    list_of_corp.extend([name,company_code, date, text_only_list[29+14], text_only_list[31+14],text_only_list[57+14], text_only_list[53+14], text_only_list[55+14], text_only_list[63+14], code])
                elif "답변입니다" in text_only_list[25]:
                    list_of_corp.extend([name,company_code, date, text_only_list[29+1], text_only_list[31+1],text_only_list[57+1], text_only_list[53+1], text_only_list[55+1], text_only_list[63+1], code])      
                else:
                    list_of_corp.extend([name,company_code, date, text_only_list[29], text_only_list[31],text_only_list[57], text_only_list[53], text_only_list[55], text_only_list[63], code])
                preprocessed_data.append(list_of_corp)
        
        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업공시코드","공시일","종류","총액(원)","사채만기일","표면이자율(%)","만기이자율(%)","사채발행방법","공시코드"])
        print()
        print('Jobs Done')
        print('check the result with .report_content')
    
    # v7에서 추가 / 최대주주변경
    def read_shareholder_change(self, doc_list):
        ''' "최대주주변경" 으로 검색!!'''
        doc = doc_list.loc[doc_list['보고서명'] == "최대주주변경", :]
        new_doc = self.__doc_filter(doc_list=doc)
        self.report_content = new_doc
        print()
        print('Jobs Done')
        print('check the result with .report_content')
    
    def read_bw_issue(self, doc_list):   # v8추가: 신주인수권부사채 발행
        ''' "신주인수권부사채권발행결정"으로 검색한다 '''
        self.__isResultExist(self.report_content)
        self.read_what = 'bw_issue'
        
        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            if "기타" in str(report_n):
                continue
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, after_list, now_code, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려        

            if text_only_list == "No":
                continue
            
            list_of_corp = []
            
            # 대표이사 확인서가 들어간 경우가 2012년에 있는데 이 경우는 +2를 해주면 된다. 근데 귀찮으니 이 경우만 직접 수정하도록 하자 (대표이사등의 확인서가 text_only_list[26]에 담겨있다)

            if pd.to_datetime(date) > pd.to_datetime("2020-07-05"):
                if "공 시 대 리 인" in text_only_list[25]: # +14를 해주면 된다
                    list_of_corp.extend([name, company_code,date, text_only_list[29+14], text_only_list[31+14],text_only_list[63+14], text_only_list[59+14], text_only_list[61+14], text_only_list[69+14], code])
                
                elif "답변입니다" in text_only_list[25]:
                    list_of_corp.extend([name,company_code, date, text_only_list[29+1], text_only_list[31+1],text_only_list[63+1], text_only_list[59+1], text_only_list[61+1], text_only_list[69+1], code]) 
                else:
                    list_of_corp.extend([name, company_code,date, text_only_list[29], text_only_list[31],text_only_list[63], text_only_list[59], text_only_list[61], text_only_list[69], code]) 
                preprocessed_data.append(list_of_corp)
            elif pd.to_datetime(date) > pd.to_datetime("2019-12-07"):
                if "공 시 대 리 인" in text_only_list[25]: # +14를 해주면 된다
                    list_of_corp.extend([name,company_code, date, text_only_list[29+14], text_only_list[31+14],text_only_list[61+14], text_only_list[57+14], text_only_list[59+14], text_only_list[67+14], code])
                elif "답변입니다" in text_only_list[25]:
                    list_of_corp.extend([name,company_code, date, text_only_list[29+1], text_only_list[31+1],text_only_list[61+1], text_only_list[57+1], text_only_list[59+1], text_only_list[67+1], code])                                         
                else:
                    list_of_corp.extend([name,company_code, date, text_only_list[29], text_only_list[31],text_only_list[61], text_only_list[57], text_only_list[59],text_only_list[67], code])
                preprocessed_data.append(list_of_corp)
            else: 
                if "공 시 대 리 인" in text_only_list[25]: # +14를 해주면 된다
                    print("여기!")
                    list_of_corp.extend([name,company_code, date, text_only_list[29+14], text_only_list[31+14],text_only_list[57+14], text_only_list[53+14], text_only_list[55+14], text_only_list[63+14], code])
                elif "답변입니다" in text_only_list[25]:
                    list_of_corp.extend([name, company_code,date, text_only_list[29+1], text_only_list[31+1],text_only_list[57+1], text_only_list[53+1], text_only_list[55+1], text_only_list[63+1], code])      
                else:
                    list_of_corp.extend([name, company_code,date, text_only_list[29], text_only_list[31],text_only_list[57], text_only_list[53], text_only_list[55], text_only_list[63], code])
                preprocessed_data.append(list_of_corp)
        
        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업공시코드","공시일","종류","총액(원)","사채만기일","표면이자율(%)","만기이자율(%)","사채발행방법","공시코드"])
        print()
        print('Jobs Done')
        print('check the result with .report_content')

    def read_investment_caution(self, doc_list): # v8추가!!
        ''' "투자주의환기종목지정"으로 검색한다 (Only코스닥만 해당함)'''
        self.__isResultExist(self.report_content)
        self.read_what = 'caution_issue'
        
        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date, report_name):
            if ("기타" in str(report_n)) or ("해제" in str(report_n)):
                continue
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, after_list, now_code, company_code= self.__engine_read_report(code) # 쿼리 날려요 두번 날려        

            if text_only_list == "No":
                continue
            
            list_of_corp = []

            if "추가" in report_n:
                add = "Yes"
            else:
                add = "No"
            
            if pd.to_datetime(date) >= pd.to_datetime("2012-04-01"):
                list_of_corp.extend([name,company_code, date, text_only_list[3], text_only_list[7], add])
            else:
                list_of_corp.extend([name,company_code,date, text_only_list[4], text_only_list[8], add])
            preprocessed_data.append(list_of_corp)

            df = pd.DataFrame(preprocessed_data, columns=["회사명","기업공시코드","공시일","지정사유","근거규정","추가지정인지여부"])
            df['지정사유'] = df["지정사유"].str.replace(',','')

        self.report_content = df
        print()
        print('Jobs Done')
        print('check the result with .report_content')

    def read_unfaithful(self, doc_list): # v8추가!!
        ''' "불성실공시법인지정"으로 검색한다
         !!!중료!!! 코스닥 13년3월28일 공시2개가 문제있어서 이 구간 없애야함!! '''
        self.__isResultExist(self.report_content)
        self.read_what = 'unfaithful_issue'
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("관리종목지정"), :]
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("매매거래정지"), :]

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date, report_name):
            if ("주권" in str(report_n)) or ("예고" in str(report_n)):
                continue
            if str(report_n).startswith("기타"):
                continue
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))

            text_only_list, after_list, now_code, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려        
            if text_only_list == "No":
                continue
            
            list_of_corp = []

            if self.market_type == '1':  
                list_of_corp.extend([name, company_code, date, text_only_list[3],text_only_list[10],text_only_list[14],text_only_list[16], text_only_list[5], after_list, now_code])
            elif self.market_type =='2': # 코스닥인 경우 (벌점: 최근 1년간 불성실공시법인 부과벌점(당해 부과벌점 포함)   -> 코스피와 다른가?? check필요)
                if pd.to_datetime(date) >= pd.to_datetime("2015-09-10"):
                    list_of_corp.extend([name, company_code, date, text_only_list[2], text_only_list[14], text_only_list[24], text_only_list[16], text_only_list[4], after_list, now_code])
                else:
                   list_of_corp.extend([name, company_code, date, text_only_list[2], text_only_list[14], text_only_list[18], text_only_list[16], text_only_list[4], after_list,now_code]) # 제재금은 벌점+2인듯
            preprocessed_data.append(list_of_corp)

        self.report_content = pd.DataFrame(preprocessed_data,columns=["회사명","기업공시코드","공시일","불성실공시유형","부과벌점","누계벌점","과징금","불성실공시내용","정정공시코드","현재공시코드"])
        print()
        print('Jobs Done')
        print('check the result with .report_content')

    def read_requests_issue(self, doc_list):
        '''조회공시요구'''
        self.__isResultExist(self.report_content)
        self.read_what = 'requests'

        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("답변"), :]

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []): # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []
            if self.market_type =='2':
                if "현저한시황변동" in report_n:
                    if pd.to_datetime(date) <= pd.to_datetime("2019-07-31"):
                        list_of_corp.extend([name,company_code,date,report_n, text_only_list[3], now_doc])
                        preprocessed_data.append(list_of_corp)          
                    else:    
                        list_of_corp.extend([name,company_code,date,report_n, text_only_list[2], now_doc])
                        preprocessed_data.append(list_of_corp)
                else:
                    list_of_corp.extend([name,company_code,date,report_n, text_only_list[3], now_doc])
                    preprocessed_data.append(list_of_corp)

            elif self.market_type =='1':
                if "시황변동" in report_n:
                    if pd.to_datetime(date) >= pd.to_datetime("2022-10-01"):
                        list_of_corp.extend([name,company_code,date,report_n, text_only_list[6], now_doc])
                        preprocessed_data.append(list_of_corp)  
                    else:
                        list_of_corp.extend([name,company_code,date,report_n, text_only_list[2], now_doc])
                        preprocessed_data.append(list_of_corp)  
                elif "외부감사" in report_n:
                    list_of_corp.extend([name,company_code,date,report_n, text_only_list[3], now_doc])
                    preprocessed_data.append(list_of_corp) 
                else:
                    list_of_corp.extend([name,company_code,date,report_n, text_only_list[2], now_doc])
                    preprocessed_data.append(list_of_corp)  

        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업코드","공시일","보고서명","조회공시요구내용","현재공시코드"])
        print()
        print("Jobs Done")
        
    def read_get_stock(self, doc_list):
        '''타법인주식및출자증권취득결정'''
        self.__isResultExist(self.report_content)
        self.read_what = 'get'
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("종속"), :]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("취소"), :]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("철회"), :]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("해제"), :]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("기타"), :]

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []): # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []

            if self.market_type =='1':
                list_of_corp.extend([name,company_code,date, text_only_list[2]+"(" +text_only_list[4] +")" ,text_only_list[17],text_only_list[19],text_only_list[23],text_only_list[32],text_only_list[34],text_only_list[36]])
                preprocessed_data.append(list_of_corp)
            elif self.market_type =='2':
                if "자율공시" in report_n:
                    list_of_corp.extend([name,company_code,date,text_only_list[2],text_only_list[15], text_only_list[17],text_only_list[21],text_only_list[30],text_only_list[32],text_only_list[34]])
                    preprocessed_data.append(list_of_corp)                   
                else:
                    list_of_corp.extend([name,company_code,date,text_only_list[2],text_only_list[17], text_only_list[19],text_only_list[23],text_only_list[32],text_only_list[34],text_only_list[36]])
                    preprocessed_data.append(list_of_corp)
        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업코드","공시일","발행회사명(국적)", "취득주식수(주)","취득금액(원)","자기자본대비(%)","취득방법","취득목적","취득예정일자"])
        print()
        print("Jobs Done")
        
    def read_lend_money(self, doc_list):
        '''특수관계인에대한자금대여'''
        self.__isResultExist(self.report_content)
        self.read_what = 'lend'

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []): # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []

            list_of_corp.extend([name,company_code,date, text_only_list[7], text_only_list[11],text_only_list[13],text_only_list[18],text_only_list[22]])
            preprocessed_data.append(list_of_corp)
        
        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업코드","공시일","관련법규","거래상대방","회사와의 관계","거래금액(백만 원)","이자율(%)"])

    def read_sell_stock(self, doc_list):
        '''자기주식처분결정'''
        self.__isResultExist(self.report_content)
        self.read_what = 'sell'
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("기타"), :]
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("철회"), :]
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("주요사항"), :]

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list) == []: # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []

            if pd.to_datetime(date) >= pd.to_datetime("2014-01-01"):
                    list_of_corp.extend([name,company_code,date,text_only_list[37],text_only_list[39],text_only_list[42],text_only_list[44],text_only_list[46],text_only_list[48]])
                    preprocessed_data.append(list_of_corp)
            else:
                list_of_corp.extend([name,company_code,date,text_only_list[7],text_only_list[9],text_only_list[12],text_only_list[14],text_only_list[16],text_only_list[18]])
                preprocessed_data.append(list_of_corp)
                    
            self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업코드","공시일","처분예정금액(보통주식)(원)","처분예정금액(기타주식)(원)","처분시작일","처분종료일","처분목적","처분방법"])

    def read_change_sell_stock(self, doc_list):
        '''최대주주변경을수반하는주식''' #최대주주 변경을 수반하는 주식양수도 계약 체결
        self.__isResultExist(self.report_content)
        self.read_what = 'change'
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("기타"), :]
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("해제"), :]
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("취소"), :]
        doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("연장"), :]
        #doc_list = doc_list.loc[~ doc_list["보고서명"].str.contains("양수도"), :]

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code = self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []): # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []

            list_of_corp.extend([name,company_code,date,text_only_list[14],text_only_list[16], correct_list, now_doc])
            preprocessed_data.append(list_of_corp)

        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업코드","공시일","차입금액총액(원)","담보설정금액(원)","정정공시코드","현재공시코드"])

    def read_investment_warning(self, doc_list):
        '''투자주의'''
        self.__isResultExist(self.report_content)
        self.read_what = 'warning'

        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("ELW")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("예고")]

        code_ar = doc_list['코드'].values
        name_ar = doc_list["회사명"].values
        announce_date = doc_list["공시일"].values
        report_name = doc_list["보고서명"].values

        preprocessed_data = []

        for code, name, date, report_n in tzip(code_ar, name_ar, announce_date,report_name):
            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
            text_only_list, correct_list, now_doc, company_code= self.__engine_read_report(code) # 쿼리 날려요 두번 날려요

            if (text_only_list == "No") or (text_only_list == []): # 정정공시일 경우에는 No을 리턴해서 SKIP
                continue
            
            list_of_corp = []
            list_of_corp.extend([name,company_code,date,report_n, correct_list, now_doc])
            preprocessed_data.append(list_of_corp)
        
        self.report_content = pd.DataFrame(preprocessed_data, columns=["회사명","기업코드","공시일","보고서명","정정공시코드","현재공시코드"])
        print()
        print('Jobs Done')
        print('check the result with .report_content') 

    def read_investment_yellocard(self, doc_list):
        '''투자경고종목지정 or  투자위험종목지정 으로 검색한다 / 
        투자주의 -> 투자경고 -> 투자위험 순으로 지정된다'''
        self.__isResultExist(self.report_content)
        self.read_what = 'yello_card'

        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("해제")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("예고")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("지정중")]

        name_df = doc_list["회사명"]
        announce_df = doc_list["공시일"]
        report_df = doc_list["보고서명"]

        merge_df = pd.concat([name_df, announce_df,report_df], axis=1)
        self.report_content = merge_df.reset_index(drop=True)

    def read_management_report(self, doc_list):
        '''관리종목지정'''
        self.__isResultExist(self.report_content)
        self.read_what = 'manage'

        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("기타시장안내")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("해제")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("신주인수권")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("주권매매거래정지")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("내부결산시점")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("조회공시요구")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("소속부변경")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("기타 경영사항")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("ETF")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("투자유의안내")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("사유변경")]
        doc_list = doc_list.loc[~doc_list["보고서명"].str.contains("우선주")]

        name_df = doc_list["회사명"]
        announce_df = doc_list["공시일"]
        report_df = doc_list["보고서명"]

        merge_df = pd.concat([name_df, announce_df,report_df], axis=1)
        self.report_content = merge_df.reset_index(drop=True)

    #def read_embezzle(self, doc_list):   # '횡령ㆍ배임사실확인' 로 검색해야함
    #    self.__isResultExist(self.report_content)
    #    doc_list = doc_list['코드']
    #    content = []
    #
    #    for i in tqdm.tqdm(range(len(doc_list))):
    #        time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
    #        doc = doc_list.copy()[i]
    #        text_only_list = self.__engine_read_report(doc) # 쿼리 날려요 두번 날려요
    #        content.append(text_only_list)
    #        
    #    df = pd.DataFrame(content)
    #    df.to_csv("sample.csv", encoding='utf-8-sig')
    #    return df
    #    
    #    def test(self, doc_list):
    #        doc_list = doc_list['코드']
    #        for i in tqdm.tqdm(range(len(doc_list))):
    #            time.sleep(random.uniform(self.__sleep_min, self.__sleep_max))
    #            doc = doc_list.copy()[i]
    #            self.__engine_read_report(doc, read_sales=True)
    #        
    #        print("Done")

In [None]:
# 검색어 : [단일판매ㆍ공급계약체결 / 업종변경 / 상호변경 / 추가상장 유상증자 /전환사채권발행결정 / 신주인수권부사채권발행결정/투자주의환기종목지정 / 최대주주변경 / 불성실공시법인지정/조회공시요구/타법인주식및출자증권취득결정]
test20 = KIND_REPORTS()

test20.get_list('관리종목지정', start='2022-01-01', end='2022-12-31', market_type='1')

In [None]:
test20.read_management_report(test20.report_list)

In [None]:
test20.report_list

In [None]:
test20.report_content["보고서명"].unique()

In [None]:
pd.set_option('display.max_rows', 63)

test20.report_content
#test20.report_content.loc[test20.report_content["보고서명"].str.contains("외부감사")]

# 데이터 저장

* QS-Q는 분기별로
*YS-Y는 연도별로?

* cb발행결정 코스닥 2016년에서 오류떴음 -> 수정한듯! 파일 오늘 밤에 돌리고 자자
* 상호변경 코스피 2012년에서 오류

In [None]:
start_index = pd.date_range(start="2012-01-01",end="2022-12-31",freq="Y")   
end_index = pd.date_range(start="2012-01-01",end="2022-12-31",freq="Y")

for start_idx,end_idx in zip(start_index, end_index): 
    start_idx,end_idx = str(start_idx)[:10], str(end_idx)[:10]
    save_kind = KIND_REPORTS()
    save_kind.get_list("투자경고종목지정",start=start_idx, end=end_idx, market_type='2')
    save_kind.read_investment_warning(save_kind.report_list)       # 340이 한솔?
    save_kind.report_content.to_csv("투자경고종목지정-{}_{}-{}.csv".format("코스닥", start_idx, end_idx))

In [None]:
start_index = pd.date_range(start="2012-01-01",end="2022-12-31",freq="YS")   
end_index = pd.date_range(start="2012-01-01",end="2022-12-31",freq="Y")

for kor, num in zip(["코스피","코스닥"],["1","2"]):
    for start_idx,end_idx in zip(start_index, end_index): 
        start_idx,end_idx = str(start_idx)[:10], str(end_idx)[:10]
        save_kind = KIND_REPORTS()
        save_kind.get_list("관리종목",start=start_idx, end=end_idx, market_type=num)
        save_kind.read_management_report(save_kind.report_list)       # 340이 한솔?
        save_kind.report_content.to_csv("관리종목_{}_{}-{}.csv".format(kor, start_idx, end_idx))

# 데이터 확인

In [None]:
pd.set_option('display.max_rows', 500)

name = 'BW(완)'
market = '코스닥'
year = "2017"

df = pd.read_csv("{}/{}/{}_{}_{}-01-01-{}-12-31.csv".format(name,"신주인수권부사채발행결정","코스닥",year,year),index_col=0)
print(df.shape) 

df

In [None]:
pd.set_option('display.max_rows', 500)

df = pd.read_csv("CB(완)/코스피/전환사채권발행결정_코스피_2016-01-01-2016-12-31.csv", index_col=0)

df[24:].shape

In [None]:
df2 = pd.read_csv("CB(완)/코스닥/전환사채권발행결정_코스닥_2016-01-01-2016-12-31.csv", index_col=0)

df2[176:].shape