In [2]:
import pprint
import requests
import re
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

In [6]:
from itertools import product

def table_to_2d(table_tag):
    rowspans = []  # track pending rowspans
    rows = table_tag.find_all('tr')

    # first scan, see how many columns we need
    colcount = 0
    for r, row in enumerate(rows):
        cells = row.find_all(['td', 'th'], recursive=False)
        # count columns (including spanned).
        # add active rowspans from preceding rows
        # we *ignore* the colspan value on the last cell, to prevent
        # creating 'phantom' columns with no actual cells, only extended
        # colspans. This is achieved by hardcoding the last cell width as 1. 
        # a colspan of 0 means “fill until the end” but can really only apply
        # to the last cell; ignore it elsewhere. 
        colcount = max(
            colcount,
            sum(int(c.get('colspan', 1)) or 1 for c in cells[:-1]) + len(cells[-1:]) + len(rowspans))
        # update rowspan bookkeeping; 0 is a span to the bottom. 
        rowspans += [int(c.get('rowspan', 1)) or len(rows) - r for c in cells]
        rowspans = [s - 1 for s in rowspans if s > 1]

    # it doesn't matter if there are still rowspan numbers 'active'; no extra
    # rows to show in the table means the larger than 1 rowspan numbers in the
    # last table row are ignored.

    # build an empty matrix for all possible cells
    table = [[None] * colcount for row in rows]

    # fill matrix from row data
    rowspans = {}  # track pending rowspans, column number mapping to count
    for row, row_elem in enumerate(rows):
        span_offset = 0  # how many columns are skipped due to row and colspans 
        for col, cell in enumerate(row_elem.find_all(['td', 'th'], recursive=False)):
            # adjust for preceding row and colspans
            col += span_offset
            while rowspans.get(col, 0):
                span_offset += 1
                col += 1

            # fill table data
            rowspan = rowspans[col] = int(cell.get('rowspan', 1)) or len(rows) - row
            colspan = int(cell.get('colspan', 1)) or colcount - col
            # next column is offset by the colspan
            span_offset += colspan - 1
            value = re.sub('\s','',cell.get_text())
            for drow, dcol in product(range(rowspan), range(colspan)):
                try:
                    table[row + drow][col + dcol] = value
                    rowspans[col + dcol] = rowspan
                except IndexError:
                    # rowspan or colspan outside the confines of the table
                    pass

        # update rowspan bookkeeping
        rowspans = {c: s - 1 for c, s in rowspans.items() if s > 1}

    return table

In [73]:
def get_result(name):
    URL = "https://finance.naver.com/item/main.nhn?code=" + name
    samsung_electronic = requests.get(URL)
    html = samsung_electronic.text
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find('table', attrs={'class':'tb_type1 tb_num tb_type1_ifrs'})
    data_table = table_to_2d(table)
    col_name = []
    row_name = []
    for i in range(1,len(data_table[0])):
        col_name.append(data_table[0][i] + "_" + data_table[1][i])
    for j in range(3, len(data_table[:])):
        row_name.append(data_table[j][0])
    data = np.array(data_table[3:])[:,1:]
    result = pd.DataFrame(data, index = row_name ,columns = col_name)
    return result

In [19]:
print(soup.find('table', attrs={'class':'first'}))


None


In [67]:
df = pd.read_excel('C:\\Users\\312263\\Desktop\\python\\stock\\법인목록.xlsx'
                   ,sheet_name = 'Sheet1',converters={'종목코드':str})

In [20]:
name = '033640'
URL = "https://finance.naver.com/item/main.nhn?code=" + name
samsung_electronic = requests.get(URL)
html = samsung_electronic.text
soup = BeautifulSoup(html, 'html.parser')
table = soup.select('#tab_con1 > div.first > table')[0]
data_table = table_to_2d(table)

In [24]:
pd.DataFrame(data_table)

Unnamed: 0,0,1
0,시가총액,"5,188억원"
1,시가총액순위,코스닥70위
2,상장주식수,23059202
3,액면가l매매단위,500원l1주


In [None]:
#총액관련
soup.select('#tab_con1 > div.first > table')[0]
#소진율관련
soup.select('#tab_con1 > div.gray > table.lwidth')[0]
#의견관련
soup.select('#tab_con1 > div.gray > table.rwidth')[0]
#peps관련
soup.find('table', attrs={'summary':'per_table'})
#동일업종 PER 관련
soup.find('table', attrs={'summary':'동일업종 PER 정보'})
#tab_con1 > div:nth-child(6) > table

In [77]:
b = get_result(df['종목코드'].iloc[0])

In [79]:
b.T

Unnamed: 0,매출액,영업이익,당기순이익,영업이익률,순이익률,ROE(지배주주),부채비율,당좌비율,유보율,EPS(원),BPS(원),주당배당금(원),시가배당률(%),배당성향(%)
최근연간실적_2016.12,13360.0,804.0,647.0,6.02,4.84,18.73,113.05,164.3,3510.89,3120.0,17959.0,,,22.12
최근연간실적_2017.12,14734.0,729.0,552.0,4.95,3.74,13.72,93.37,179.1,3941.58,2600.0,19927.0,,,26.54
최근연간실적_2018.12,14249.0,702.0,552.0,4.93,3.88,12.59,83.59,187.22,4269.97,2614.0,21602.0,,,26.4
최근연간실적_2019.12(E),15031.0,770.0,553.0,5.12,3.68,,,,,2877.0,,,,
최근분기실적_,,,,,,,,,,,,,,
최근분기실적_,,,,,,,,,,,,,,
최근분기실적_,,,,,,,,,,,,,,
최근분기실적_2018.09,3468.0,196.0,152.0,5.66,4.39,,80.09,192.94,4176.82,721.0,21131.0,,,
최근분기실적_2018.12,4284.0,211.0,151.0,4.93,3.53,,83.59,187.22,4269.97,719.0,21602.0,,,
최근분기실적_2019.03(E),,,,,,,,,,,,,,
