In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import FinanceDataReader as fdr

In [2]:
BASE_URL = 'https://finance.naver.com/sise/sise_market_sum.nhn?sosok='

KOSPI_CODE = 0
KOSDAK_CODE = 1
START_PAGE = 1
field = []

def main(code):
    res = requests.get(BASE_URL + str(code) + '&page=' + str(START_PAGE))
    page_soup = BeautifulSoup(res.text, 'lxml')
    
    total_page_num = page_soup.select_one('td.pgRR > a')
    total_page_num = int(total_page_num.get('href').split('=')[-1])
    
    ipt_html = page_soup.select_one('div.subcnt_sise_item_top')
    global fields
    fields = [item.get('value') for item in ipt_html.select('input')]
    
    result = [crawl(code,str(page)) for page in range(1, total_page_num + 1)]
    
    df = pd.concat(result, axis = 0, ignore_index = True)
    return df
    
def crawl(code, page):
    global fields
    data = {'menu': 'market_sum',
            'fieldIds': fields,
            'returnUrl': BASE_URL + str(code) + "&page=" + str(page)}
    
    res = requests.post('https://finance.naver.com/sise/field_submit.nhn', data = data)
    page_soup = BeautifulSoup(res.text, 'lxml')
    
    table_html = page_soup.select_one('div.box_type_l')
    
    header_data = [item.get_text().strip() for item in table_html.select('thead th')][1:-1]
    
    inner_data = [item.get_text().strip() for item in table_html.find_all(lambda x:
                                                                            (x.name == 'a' and
                                                                             'tltle' in x.get('class', [])) or
                                                                            (x.name == 'td' and
                                                                             'number' in x.get('class', []))
                                                                            )]
    no_data = [item.get_text().strip() for item in table_html.select('td.no')]
    number_data = np.array(inner_data)
    
    number_data.resize(len(no_data), len(header_data))
    
    df = pd.DataFrame(data = number_data, columns = header_data)
    return df
main(KOSPI_CODE)

Unnamed: 0,종목명,현재가,전일비,등락률,액면가,거래량,거래대금,전일거래량,시가,고가,...,주당순이익,보통주배당금,매출액증가율,영업이익증가율,외국인비율,PER,ROE,ROA,PBR,유보율
0,삼성전자,57400,900,+1.59%,100,2691429,154525,18685880,57400,57600,...,3196,1416,-5.49,-52.84,55.79,17.96,8.69,6.28,1.49,28856.0
1,SK하이닉스,78600,200,+0.26%,5000,267256,21003,2786930,78700,79000,...,3140,1000,-33.27,-86.99,47.09,25.03,4.25,3.14,1.09,1287.0
2,NAVER,313500,1000,-0.32%,100,102119,32178,1308247,316000,318000,...,4290,376,18.02,-24.66,55.18,73.08,10.56,3.58,6.59,44216.1
3,삼성바이오로직스,777000,1000,+0.13%,2500,5156,4010,85511,777000,781000,...,5191,,30.94,64.77,10.23,149.68,4.77,3.41,11.57,2539.1
4,LG화학,710000,10000,-1.39%,5000,81162,58125,635302,720000,726000,...,5801,2000,1.57,-60.12,35.95,122.39,1.84,1.20,3.13,4362.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1558,KBSTAR 팔라듐선물인버스(H),5475,25,+0.46%,0,3,0,1102,5475,5475,...,,,,,0.00,,,,,
1559,ARIRANG 200동일가중,8730,0,0.00%,0,0,0,76,8730,8730,...,,,,,0.00,,,,,
1560,KBSTAR 200금융,6785,85,+1.27%,0,2,0,3821,6750,6785,...,,,,,0.00,,,,,
1561,KBSTAR 200경기소비재,8600,70,+0.82%,0,1,0,392,8600,8600,...,,,,,0.00,,,,,


###현재 네이버에 기재된 매출액, 총자산은 마지막 재무제표 결산일을 기준으로 한다 

In [3]:
KOSPI = main(KOSPI_CODE)
KOSDAK = main(KOSDAK_CODE)

In [4]:
KOSPI = KOSPI.replace('N/A',np.nan).dropna(axis = 0)
KOSPI['PER'] = KOSPI['PER'].str.replace(',','').astype('float64')
KOSPI['ROE'] = KOSPI['ROE'].str.replace(',','').astype('float64')
KOSPI['ROA'] = KOSPI['ROA'].str.replace(',','').astype('float64')
KOSPI['시가총액'] = KOSPI['시가총액'].str.replace(',','').astype('int64')
KOSPI['매출액'] = KOSPI['매출액'].str.replace(',','').astype('int64')
KOSPI['영업이익'] = KOSPI['영업이익'].str.replace(',','').astype('int64')
KOSPI['PSR'] = KOSPI['시가총액'] / KOSPI['매출액']
KOSPI['POR'] = KOSPI['시가총액'] / KOSPI['영업이익']
KOSPI = KOSPI[['종목명','PER','ROE','ROA','PBR','PSR','POR']]
c = ['PER','POR']
KOSPI[c] = KOSPI[c].mask(KOSPI[c] < 0)
KOSPI = KOSPI.dropna(axis = 0)

In [5]:
KOSPI['PBR + PSR'] = KOSPI[['PBR','PSR']].rank(method = 'min').sum(axis = 1)
KOSPI['PBR + PSR + POR'] = KOSPI[['PBR','PSR','POR']].rank(method = 'min').sum(axis = 1)
KOSPI['total_rank'] = (KOSPI[['PER','PBR','PSR']].rank(method = 'min').sum(axis = 1) + KOSPI[['ROE','ROA']].rank(method = 'min',ascending = False).sum(axis = 1))

In [6]:
PBR_PSR = KOSPI[['종목명','PBR','PSR','PBR + PSR']].sort_values(by = 'PBR + PSR')
PBR_PSR = PBR_PSR.drop(['PBR + PSR'],axis = 1)
PBR_PSR.index = list(range(1,len(PBR_PSR)+1))
PBR_PSR

Unnamed: 0,종목명,PBR,PSR
1,한화생명,0.09,0.055288
2,세아홀딩스,0.14,0.057436
3,동양생명,0.18,0.078686
4,삼천리,0.19,0.086203
5,아세아,0.18,0.107235
...,...,...,...
439,일양약품,7.61,4.845040
440,유나이티드제약,5.99,7.891098
441,NAVER,6.59,7.810325
442,엔씨소프트,6.06,10.530508


In [7]:
#PBR_PSR.to_excel('PBR_PSR.xlsx')

In [8]:
PBR_PSR_POR = KOSPI[['종목명','PBR','PSR','POR','PBR + PSR + POR']].sort_values(by = 'PBR + PSR + POR')
PBR_PSR_POR = PBR_PSR_POR.drop(['PBR + PSR + POR'],axis = 1)
PBR_PSR_POR.index = list(range(1,len(PBR_PSR_POR)+1))
#PBR_PSR_POR.to_excel('PBR_PSR_POR.xlsx')

In [9]:
TOTAL = KOSPI.sort_values(by = 'total_rank')
TOTAL = TOTAL.drop(['PBR + PSR','PBR + PSR + POR','total_rank'], axis = 1)
TOTAL.index = list(range(1,len(TOTAL)+1))
#TOTAL.to_excel('TOTAL.xlsx')

In [45]:
KOSPI = fdr.StockListing('KOSPI')
KOSDAQ = fdr.StockListing('KOSDAQ')
Symbol = KOSDAQ['Symbol']
Name = KOSDAQ['Name']
target = [ i for i in zip(Symbol,Name)]
target = pd.DataFrame(target)
target.columns = ['Symbol','Name']

### 코스닥 상위 종목

In [94]:
ALL = pd.read_excel('all case_v2.xlsx')
c = ['매출액','PER(배)','PBR(배)']
ALL[c] = ALL[c].mask(ALL[c] <= 0)
ALL = ALL.dropna(axis = 0)
ALL.index = list(range(1,len(ALL)+1))
ALL = ALL.drop(['당기순이익','영업이익','EPS(원)','발행주식수(보통주)','total_rank'],axis = 1)
ALL

Unnamed: 0,종목명,PER(배),PBR(배),ROE(%),ROA(%),매출액,시가총액,PSR,POR
1,이글루시큐리티,8.02,1.24,17.61,11.71,771.0,503.622250,0.653207,3.965530
2,우리넷,5.05,0.62,12.85,9.43,496.0,400.403159,0.807264,6.458115
3,삼영엠텍,8.02,1.24,17.61,11.71,771.0,514.800000,0.667704,4.053543
4,코위버,5.16,0.55,12.01,9.93,600.0,798.439200,1.330732,12.878052
5,시큐브,5.05,0.62,12.85,9.43,496.0,591.600000,1.192742,9.541935
...,...,...,...,...,...,...,...,...,...
201,진성티이씨,236.12,3.70,1.66,0.59,439.0,2371.879274,5.402914,50.465516
202,안랩,257.11,1.16,0.46,0.39,627.0,5377.440135,8.576460,215.097605
203,태광,236.12,3.70,1.66,0.59,439.0,3193.250000,7.273918,67.941489
204,한글과컴퓨터,155.11,10.00,6.66,4.45,124.0,4143.256874,33.413362,517.907109


### 테마별 수익률 (당일 계속 크롤링)

In [131]:
url = 'https://finance.naver.com/sise/sise_group.nhn?type=upjong'
req = requests.get(url)
session = requests.session()
res = session.get(url)

html = req.text
soup = BeautifulSoup(html, 'html.parser')
a = soup.select('div > #contentarea > #contentarea_left > table > tr > td > a')

name = []
for i in range(0,79):
    title = a[i].get_text().strip()
    name.append(title)

b = soup.select('div > #contentarea > #contentarea_left > table > tr > td > span')

value = []
for i in range(0,79):
    title = b[i].get_text().strip()
    value.append(title)
target1 = [ i for i in zip(name,value)]
target1 = pd.DataFrame(target1)
target1.columns = ['테마명','등락률']

target1.to_excel('실시간 테마별 수익.xlsx')

In [132]:
target1.head(10)

Unnamed: 0,테마명,등락률
0,독립전력생산및에너지거래,+7.02%
1,전기장비,+5.58%
2,조선,+5.41%
3,생명과학도구및서비스,+5.21%
4,증권,+4.86%
5,디스플레이패널,+4.31%
6,에너지장비및서비스,+3.84%
7,기계,+3.07%
8,통신장비,+2.86%
9,건강관리장비와용품,+2.65%


### 실시간 뉴스 타이틀 크롤링

In [133]:
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup

plusUrl = urllib.parse.quote_plus(input('검색어를 입력하세요:'))

pageNum = 1
count = 1

i = input('몇페이지 크롤링 할까요? : ')

lastPage = int(i) * 10 - 9
while pageNum < lastPage + 1:
    url = f'https://search.naver.com/search.naver?date_from=&date_option=0&date_to=&dup_remove=1&nso=&post_blogurl=&post_blogurl_without=&query={plusUrl}&sm=tab_pge&srchby=all&st=sim&where=post&start={pageNum}'

    html = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(html, 'html.parser')

    title = soup.find_all(class_='sh_blog_title')

    print(f'-----{count}페이지 결과입니다.-----')
    for i in title:
        print(i.attrs['title'])
        print(i.attrs['href'])
    print()
        
    pageNum += 10
    count += 1

검색어를 입력하세요:삼성전자
몇페이지 크롤링 할까요? : 5
-----1페이지 결과입니다.-----
삼성전자 주식 수익률 340배, 35년 장기 투자 그리고 어이없는 양도세 납부.. 그의 이름 양향자..
https://blog.naver.com/djgkrrl1234?Redirect=Log&logNo=222078395976
삼성전자, 상해종합지수, 신한 레버리지 WTI 원유선물ETN, KODEX WTI 원유선물 확인하기~
https://blog.naver.com/horang_j_h?Redirect=Log&logNo=222082813892
2010년에 삼성전자에 투자했었더라면.
https://blog.naver.com/how2invest?Redirect=Log&logNo=222077219143
9월 7일의 기업분석 Letter - 삼성전자(005930)
https://blog.naver.com/ionia17?Redirect=Log&logNo=222082684896
삼성전자? or 테슬라? 응 아냐 엔비디아야!
https://blog.naver.com/peteryim15?Redirect=Log&logNo=222035385112
삼성전자 주가흐름은 왜 지지부진한가?
https://blog.naver.com/masterwu?Redirect=Log&logNo=222074860326
[추억시리즈] 삼성전자 그리고 중소형주
https://tosoha1.blog.me/221769660469
삼성전자 주가 앞으로의 전망이 궁금하다면?
https://blog.naver.com/poohgirlhs?Redirect=Log&logNo=222039528646
삼성전자를 산게 아니고 외국인은 코스피 선물지수를 샀을 뿐이다
https://thyoon55.blog.me/222045613688
삼성전자 -- 엔비디아 훈풍, 비메모리 보폭 확대
https://blog.naver.com/pokara61?Redirect=Log&logNo=222079115275

-----2페이지 결과입니다.----