In [46]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC

from bs4 import BeautifulSoup
import pandas as pd

In [109]:
kospi_df = pd.read_csv('kospi_company_info.csv')
kospi_codes = kospi_df['stock_code']

In [None]:
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 15)

In [122]:
code = '271980'

In [132]:
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 15)
driver.get('https://kind.krx.co.kr/disclosure/searchdisclosurebycorp.do?method=searchDisclosureByCorpMain')
original_window_handle = driver.current_window_handle


In [133]:
report_element = wait.until(EC.presence_of_element_located((By.ID, 'AKCKwd')))
report_element.clear()
report_element.send_keys(code)

# input the start date
date_element = wait.until(EC.element_to_be_clickable((By.ID, 'fromDate')))
date_element.clear()
date_element.send_keys('2025-01-01')

# input the target document title (Disclosure of Corporate Governance Report)
report_element = wait.until(EC.presence_of_element_located((By.ID, 'reportNmTemp')))
report_element.send_keys('기업지배구조 보고서 공시')

# click on the search button
search_element = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a.btn-sprite.search-btn')))
search_element.click()

# wait for the search results to load
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tbody tr')))

# extract the table from the first search result, which will be the most recent report
disclosure_link = wait.until(EC.element_to_be_clickable(((By.XPATH, "//a[contains(text(), '기업지배구조 보고서 공시')]"))))
disclosure_link.click()

In [134]:
all_window_handles = driver.window_handles
for handle in all_window_handles:
    if handle != original_window:
        driver.switch_to.window(handle)
        break
print(f'New window: {driver.title}')

New window: 대한민국 대표 기업공시채널 KIND


In [None]:
# 8/19 TODO: 
# Fix how the below extracts the table 
# Build embedded dict where first key (corp code) -> 준수율  number (ie 26.7) and inner key = the built df 

report = 'table-group[aclass="DisclosureOfComplianceStatusWithKeyIndicatorsOfCorporateGovernanceAbstract"]'
report_number = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, report)))

In [137]:
try:
    iframe = wait.until(
        EC.presence_of_element_located((By.ID, "docViewFrm"))
    )
    driver.switch_to.frame(iframe)

    # Wait for the target table to load, then locate it.
    css_selector = 'table-group[aclass="krx-cg_VotingResultsOfTheGeneralMeetingOfShareholdersAbstract"] table.fact-table'
    fact_table_element = wait.until(
        EC.presence_of_element_located((By.CSS_SELECTOR, css_selector))
    )

    # Convert the Selenium web element to a string for BeautifulSoup and pandas.
    table_html_string = fact_table_element.get_attribute('outerHTML')

    # Use BeautifulSoup to parse for headers.
    soup = BeautifulSoup(table_html_string, 'html.parser')
    scraped_headers = [th.get_text(strip=True) for th in soup.find_all('th')]

    # Use pandas to read the table from the HTML string.
    dfs = pd.read_html(table_html_string, header=None)

    if dfs:
        df = dfs[0]

        # Define final headers with the first two columns.
        final_headers = ['총회', '의안'] + scraped_headers[1:]
        
        # Clean the DataFrame to match the number of headers.
        df = df.iloc[1:]
        df.reset_index(drop=True, inplace=True)
        
        # Rename columns with the new headers.
        if len(final_headers) == len(df.columns):
            df.columns = final_headers
            print(f"Successfully extracted and processed table.")
        else:
            print(f"Error: The number of columns ({len(df.columns)}) does not match the number of headers ({len(final_headers)}).")
    else:
        print("No tables found in the HTML.")
        
except Exception as e:
    print(f"Error extracting table.")
    
finally:
    # Switch back to the main document from the iframe, regardless of success or failure.
    driver.switch_to.default_content()

Error extracting table.


In [138]:
try:
    # 1. Wait for and switch to the iframe.
    iframe = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.ID, "docViewFrm"))
    )
    driver.switch_to.frame(iframe)

    # 2. Wait for the target table to be visible.
    css_selector = 'table-group[aclass="krx-cg_VotingResultsOfTheGeneralMeetingOfShareholdersAbstract"] table.fact-table'
    fact_table_element = WebDriverWait(driver, 20).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, css_selector))
    )

    # 3. Get the HTML of the table.
    table_html_string = fact_table_element.get_attribute('outerHTML')
    
    # 4. Use BeautifulSoup to parse the headers from the first <tr>.
    # This is more precise than finding all 'th' tags which might be duplicated in a two-row header.
    soup = BeautifulSoup(table_html_string, 'html.parser')
    header_row = soup.find('thead').find('tr') if soup.find('thead') else soup.find('tbody').find('tr')
    scraped_headers = [th.get_text(strip=True) for th in header_row.find_all('th')]

    # 5. Use pandas to read the table body.
    # We specify the header and skiprows to let pandas handle the rest.
    # The 'header' and 'skiprows' parameters can be tricky, so it's often better to read without headers and manually assign.
    # Here's a clean way:
    dfs = pd.read_html(table_html_string, header=None)

    if dfs:
        df = dfs[0]

        # 6. Correct the number of columns and rename.
        num_columns = len(df.columns)
        if len(scraped_headers) != num_columns:
            print("Warning: Header count and column count do not match. Attempting to adjust...")
            # This is a common issue with colspan/rowspan. Let's try to infer headers differently.
            # You might need to manually inspect the table to get the right number of columns.
            # A common fix is to use the first row of data as a header.
            df.columns = df.iloc[0].values
            df = df.iloc[1:]
        else:
            df.columns = scraped_headers

        print(f"Successfully extracted and processed table with {len(df.columns)} columns and {len(df)} rows.")
        print(df.head()) # Print the first few rows to verify.
    else:
        print("No tables found in the HTML.")

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    # Switch back to the main document from the iframe.
    driver.switch_to.default_content()


An error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff7aac6e415+77285]
	GetHandleVerifier [0x0x7ff7aac6e470+77376]
	(No symbol) [0x0x7ff7aaa39a6a]
	(No symbol) [0x0x7ff7aaa90406]
	(No symbol) [0x0x7ff7aaa906bc]
	(No symbol) [0x0x7ff7aaae3ac7]
	(No symbol) [0x0x7ff7aaab864f]
	(No symbol) [0x0x7ff7aaae087f]
	(No symbol) [0x0x7ff7aaab83e3]
	(No symbol) [0x0x7ff7aaa81521]
	(No symbol) [0x0x7ff7aaa822b3]
	GetHandleVerifier [0x0x7ff7aaf51efd+3107021]
	GetHandleVerifier [0x0x7ff7aaf4c29d+3083373]
	GetHandleVerifier [0x0x7ff7aaf6bedd+3213485]
	GetHandleVerifier [0x0x7ff7aac8884e+184862]
	GetHandleVerifier [0x0x7ff7aac9055f+216879]
	GetHandleVerifier [0x0x7ff7aac77084+113236]
	GetHandleVerifier [0x0x7ff7aac77239+113673]
	GetHandleVerifier [0x0x7ff7aac5e298+11368]
	BaseThreadInitThunk [0x0x7ffe9e367374+20]
	RtlUserThreadStart [0x0x7ffe9e5fcc91+33]



In [10]:
df

Unnamed: 0,총회,의안,결의 구분,회의 목적사항,가결 여부,의결권 있는 발행주식 총수(1),(1) 중 의결권 행사 주식수,찬성주식수,찬성 주식 비율 (%),반대 기권 등 주식수,반대 기권 등 주식 비율 (%)
0,제38기 정기 주주총회,제2-1호 의안,특별(Extraordinary),이사의 인원수 명확화,가결(Approved),107856043,91736706,91628348,99.9,108358.0,0.1
1,제38기 정기 주주총회,제2-2호 의안,특별(Extraordinary),감사위원 선임 관련 조문 정비,가결(Approved),107856043,91736706,77909093,84.9,13827613.0,15.1
2,제38기 정기 주주총회,제2-3호 의안,특별(Extraordinary),대표이사 사장 선임 방법 명확화,가결(Approved),107856043,74689281,53946867,72.2,20742414.0,27.8
3,제38기 정기 주주총회,제2-4호 의안,특별(Extraordinary),분기배당기준일 변경,가결(Approved),107856043,91736706,91626520,99.9,110186.0,0.1
4,제38기 정기 주주총회,제3호 의안,보통(Ordinary),사내이사 이상학 선임의 건,가결(Approved),107856043,91736706,90539809,98.7,1196897.0,1.3
5,제38기 정기 주주총회,제4-1호 의안,보통(Ordinary),사외이사 손관수 선임의 건,가결(Approved),107856043,91736706,89269740,97.3,2466966.0,2.7
6,제38기 정기 주주총회,제4-2호 의안,보통(Ordinary),사외이사 이지희 선임의 건,가결(Approved),107856043,91736706,90424942,98.6,1311764.0,1.4
7,제38기 정기 주주총회,제5호 의안,보통(Ordinary),감사위원회 위원 손관수 선임의 건,가결(Approved),107856043,74689281,73254834,98.1,1434447.0,1.9
8,제38기 정기 주주총회,제6호 의안,보통(Ordinary),이사 보수한도 승인의 건,가결(Approved),107856043,87807386,87469388,99.6,337998.0,0.4
9,제37기 정기 주주총회,제1호 의안,보통(Ordinary),제37기 재무제표 및 이익잉여금처분계산서 승인의 건,가결(Approved),112809923,87368552,83585189,95.7,3783363.0,4.3


In [11]:
ranked_voting = df[df['반대 기권 등 주식수'].isna()]
majority_voting = df[df['반대 기권 등 주식수'].notna()]

In [12]:
ranked_voting

Unnamed: 0,총회,의안,결의 구분,회의 목적사항,가결 여부,의결권 있는 발행주식 총수(1),(1) 중 의결권 행사 주식수,찬성주식수,찬성 주식 비율 (%),반대 기권 등 주식수,반대 기권 등 주식 비율 (%)
16,제37기 정기 주주총회,제3-1호 의안,보통(Ordinary),대표이사 사장 방경만 선임의 건 (KT&G 이사회 안),가결(Approved),112809923,165207264,84097688,50.9,,0.0
17,제37기 정기 주주총회,제3-2호 의안,보통(Ordinary),사외이사 임민규 선임의 건 (KT&G 이사회 안),부결(Not approved),112809923,165207264,24505618,14.8,,0.0
18,제37기 정기 주주총회,제3-3호 의안,보통(Ordinary),사외이사 손동환 선임의 건 (주주제안_중소기업은행),가결(Approved),112809923,165207264,56603958,34.3,,0.0


In [13]:
majority_voting

Unnamed: 0,총회,의안,결의 구분,회의 목적사항,가결 여부,의결권 있는 발행주식 총수(1),(1) 중 의결권 행사 주식수,찬성주식수,찬성 주식 비율 (%),반대 기권 등 주식수,반대 기권 등 주식 비율 (%)
0,제38기 정기 주주총회,제2-1호 의안,특별(Extraordinary),이사의 인원수 명확화,가결(Approved),107856043,91736706,91628348,99.9,108358.0,0.1
1,제38기 정기 주주총회,제2-2호 의안,특별(Extraordinary),감사위원 선임 관련 조문 정비,가결(Approved),107856043,91736706,77909093,84.9,13827613.0,15.1
2,제38기 정기 주주총회,제2-3호 의안,특별(Extraordinary),대표이사 사장 선임 방법 명확화,가결(Approved),107856043,74689281,53946867,72.2,20742414.0,27.8
3,제38기 정기 주주총회,제2-4호 의안,특별(Extraordinary),분기배당기준일 변경,가결(Approved),107856043,91736706,91626520,99.9,110186.0,0.1
4,제38기 정기 주주총회,제3호 의안,보통(Ordinary),사내이사 이상학 선임의 건,가결(Approved),107856043,91736706,90539809,98.7,1196897.0,1.3
5,제38기 정기 주주총회,제4-1호 의안,보통(Ordinary),사외이사 손관수 선임의 건,가결(Approved),107856043,91736706,89269740,97.3,2466966.0,2.7
6,제38기 정기 주주총회,제4-2호 의안,보통(Ordinary),사외이사 이지희 선임의 건,가결(Approved),107856043,91736706,90424942,98.6,1311764.0,1.4
7,제38기 정기 주주총회,제5호 의안,보통(Ordinary),감사위원회 위원 손관수 선임의 건,가결(Approved),107856043,74689281,73254834,98.1,1434447.0,1.9
8,제38기 정기 주주총회,제6호 의안,보통(Ordinary),이사 보수한도 승인의 건,가결(Approved),107856043,87807386,87469388,99.6,337998.0,0.4
9,제37기 정기 주주총회,제1호 의안,보통(Ordinary),제37기 재무제표 및 이익잉여금처분계산서 승인의 건,가결(Approved),112809923,87368552,83585189,95.7,3783363.0,4.3


In [None]:
driver.quit()