In [4]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import FinanceDataReader as fdr

In [5]:
BASE_URL = 'https://finance.naver.com/sise/sise_market_sum.nhn?sosok='

KOSPI_CODE = 0
KOSDAK_CODE = 1
START_PAGE = 1

def main(code):
    res = requests.get(BASE_URL + str(code) + '&page=' + str(START_PAGE))
    page_soup = BeautifulSoup(res.text, 'lxml')
    
    total_page_num = page_soup.select_one('td.pgRR > a')
    total_page_num = int(total_page_num.get('href').split('=')[-1])
    
    ipt_html = page_soup.select_one('div.subcnt_sise_item_top')
    global fields
    fields = [item.get('value') for item in ipt_html.select('input')]
    
    result = [crawl(code,str(page)) for page in range(1, total_page_num + 1)]
    
    df = pd.concat(result, axis = 0, ignore_index = True)
    return df
    
def crawl(code, page):
    global fields
    data = {'menu': 'market_sum',
            'fieldIds': fields,
            'returnUrl': BASE_URL + str(code) + "&page=" + str(page)}
    
    res = requests.post('https://finance.naver.com/sise/field_submit.nhn', data = data)
    page_soup = BeautifulSoup(res.text, 'lxml')
    
    table_html = page_soup.select_one('div.box_type_l')
    
    header_data = [item.get_text().strip() for item in table_html.select('thead th')][1:-1]
    
    inner_data = [item.get_text().strip() for item in table_html.find_all(lambda x:
                                                                            (x.name == 'a' and
                                                                             'tltle' in x.get('class', [])) or
                                                                            (x.name == 'td' and
                                                                             'number' in x.get('class', []))
                                                                            )]
    no_data = [item.get_text().strip() for item in table_html.select('td.no')]
    number_data = np.array(inner_data)
    
    number_data.resize(len(no_data), len(header_data))
    
    df = pd.DataFrame(data = number_data, columns = header_data)
    return df


###현재 네이버에 기재된 매출액, 총자산은 마지막 재무제표 결산일을 기준으로 한다 

In [6]:
KOSPI = main(KOSPI_CODE)
KOSDAK = main(KOSDAK_CODE)

In [7]:
KOSPI = KOSPI.replace('N/A',np.nan).dropna(axis = 0)
KOSPI['PER'] = KOSPI['PER'].str.replace(',','').astype('float64')
KOSPI['ROE'] = KOSPI['ROE'].str.replace(',','').astype('float64')
KOSPI['ROA'] = KOSPI['ROA'].str.replace(',','').astype('float64')
KOSPI['시가총액'] = KOSPI['시가총액'].str.replace(',','').astype('int64')
KOSPI['매출액'] = KOSPI['매출액'].str.replace(',','').astype('int64')
KOSPI['영업이익'] = KOSPI['영업이익'].str.replace(',','').astype('int64')
KOSPI['PSR'] = KOSPI['시가총액'] / KOSPI['매출액']
KOSPI['POR'] = KOSPI['시가총액'] / KOSPI['영업이익']
KOSPI = KOSPI[['종목명','PER','ROE','ROA','PBR','PSR','POR']]
c = ['PER','POR']
KOSPI[c] = KOSPI[c].mask(KOSPI[c] < 0)
KOSPI = KOSPI.dropna(axis = 0)

In [8]:
KOSPI['PBR + PSR'] = KOSPI[['PBR','PSR']].rank(method = 'min').sum(axis = 1)
KOSPI['PBR + PSR + POR'] = KOSPI[['PBR','PSR','POR']].rank(method = 'min').sum(axis = 1)
KOSPI['total_rank'] = (KOSPI[['PER','PBR','PSR']].rank(method = 'min').sum(axis = 1) + KOSPI[['ROE','ROA']].rank(method = 'min',ascending = False).sum(axis = 1))

In [9]:
PBR_PSR = KOSPI[['종목명','PBR','PSR','PBR + PSR']].sort_values(by = 'PBR + PSR')
PBR_PSR = PBR_PSR.drop(['PBR + PSR'],axis = 1)
PBR_PSR.index = list(range(1,len(PBR_PSR)+1))
PBR_PSR

Unnamed: 0,종목명,PBR,PSR
1,한화생명,0.09,0.055111
2,세아홀딩스,0.14,0.057436
3,동양생명,0.18,0.077918
4,삼천리,0.19,0.086087
5,아세아,0.18,0.106263
...,...,...,...
439,신풍제약,34.87,41.477596
440,일양약품,7.63,4.856747
441,NAVER,6.61,7.835229
442,엔씨소프트,6.03,10.478897


In [None]:
PBR_PSR.to_excel('PBR_PSR.xlsx')

In [None]:
PBR_PSR_POR = KOSPI[['종목명','PBR','PSR','POR','PBR + PSR + POR']].sort_values(by = 'PBR + PSR + POR')
PBR_PSR_POR = PBR_PSR_POR.drop(['PBR + PSR + POR'],axis = 1)
PBR_PSR_POR.index = list(range(1,len(PBR_PSR_POR)+1))
PBR_PSR_POR.to_excel('PBR_PSR_POR.xlsx')

In [23]:
TOTAL = KOSPI.sort_values(by = 'total_rank')
TOTAL = TOTAL.drop(['PBR + PSR','PBR + PSR + POR','total_rank'], axis = 1)
TOTAL.index = list(range(1,len(TOTAL)+1))
TOTAL.to_excel('TOTAL.xlsx')

In [2]:
KOSPI = fdr.StockListing('KOSPI')
KOSDAQ = fdr.StockListing('KOSDAQ')
Symbol = KOSPI['Symbol']
Name = KOSPI['Name']
target = [ i for i in zip(Symbol,Name)]
target = pd.DataFrame(target)
target.columns = ['Symbol','Name']

In [11]:
ALL = pd.read_excel('all case.xlsx')
ALL.columns = ALL['Unnamed']

Unnamed: 0.1,Unnamed: 0,PER(배),PBR(배),EPS(원),ROE(%),ROA(%),당기순이익,매출액,영업이익,발행주식수(보통주),시가총액,PSR,POR,total_rank
0,미디어젠,0.0,0.0,406,20.57,13.27,422,954,584,0,272.6,0.285744,0.466781,1.0


Unnamed: 0.1,Unnamed: 0,PER(배),PBR(배),EPS(원),ROE(%),ROA(%),당기순이익,매출액,영업이익,발행주식수(보통주),시가총액,PSR,POR,total_rank
0,미디어젠,0.00,0.00,406,20.57,13.27,422,954,584,0,272.600000,0.285744,0.466781,1.0
1,포시에스,0.00,0.00,406,20.57,13.27,422,954,584,0,397.855247,0.417039,0.681259,2.0
2,피앤씨테크,0.00,0.00,406,20.57,13.27,422,954,584,0,540.517120,0.566580,0.925543,3.0
3,해성산업,0.00,0.00,406,20.57,13.27,422,954,584,0,1198.050000,1.255818,2.051455,4.0
4,이니텍,0.00,0.00,406,20.57,13.27,422,954,584,0,1389.322303,1.456313,2.378977,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,진성티이씨,236.12,3.70,151,1.66,0.59,2,439,47,0,2371.879274,5.402914,50.465516,526.0
526,안랩,257.11,1.16,21,0.46,0.39,4,627,25,0,5377.440135,8.576460,215.097605,527.0
527,태광,236.12,3.70,151,1.66,0.59,2,439,47,0,3193.250000,7.273918,67.941489,528.0
528,한글과컴퓨터,155.11,10.00,22,6.66,4.45,7,124,8,0,4143.256874,33.413362,517.907109,529.0


In [25]:
# 첫번째 그룹 수익률 계산.
group1 = list(PBR_PSR_POR.loc[:40,'종목명'])
def EnterJongmok(target):
    StockList1 = []
    for c in group1:
        StockList1.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList1
stock_list1 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list1]
df1 = pd.concat(df_list, axis=1)
df1.columns = [name for name, code in stock_list1] 

In [26]:
# 두번째 그룹 수익률 계산.
group2 = list(PBR_PSR_POR.loc[41:80,'종목명'])
def EnterJongmok(target):
    StockList2 = []
    for c in group2:
        StockList2.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList2
stock_list2 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list2]
df2 = pd.concat(df_list, axis=1)
df2.columns = [name for name, code in stock_list2] 

In [27]:
# 세번째 그룹 수익률 계산.
group3 = list(PBR_PSR_POR.loc[81:120,'종목명'])
def EnterJongmok(target):
    StockList3 = []
    for c in group3:
        StockList3.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList3
stock_list3 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list3]
df3 = pd.concat(df_list, axis=1)
df3.columns = [name for name, code in stock_list3] 

In [28]:
# 네번째 그룹 수익률 계산.
group4 = list(PBR_PSR_POR.loc[121:160,'종목명'])
def EnterJongmok(target):
    StockList4 = []
    for c in group4:
        StockList4.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList4
stock_list4 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list4]
df4 = pd.concat(df_list, axis=1)
df4.columns = [name for name, code in stock_list4] 

In [29]:
# 다섯번째 그룹 수익률 계산.
group5 = list(PBR_PSR_POR.loc[161:200,'종목명'])
def EnterJongmok(target):
    StockList5 = []
    for c in group5:
        StockList5.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList5
stock_list5 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list5]
df5 = pd.concat(df_list, axis=1)
df5.columns = [name for name, code in stock_list5] 

In [30]:
# 여섯번째 그룹 수익률 계산.
group6 = list(PBR_PSR_POR.loc[201:240,'종목명'])
def EnterJongmok(target):
    StockList6 = []
    for c in group6:
        StockList6.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList6
stock_list6 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list6]
df6 = pd.concat(df_list, axis=1)
df6.columns = [name for name, code in stock_list6] 

In [31]:
# 일곱번째 그룹 수익률 계산.
group7 = list(PBR_PSR_POR.loc[241:280,'종목명'])
def EnterJongmok(target):
    StockList7 = []
    for c in group7:
        StockList7.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList7
stock_list7 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list7]
df7 = pd.concat(df_list, axis=1)
df7.columns = [name for name, code in stock_list7] 

In [32]:
# 여덟번째 그룹 수익률 계산.
group8 = list(PBR_PSR_POR.loc[281:320,'종목명'])
def EnterJongmok(target):
    StockList8 = []
    for c in group8:
        StockList8.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList8
stock_list8 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list8]
df8 = pd.concat(df_list, axis=1)
df8.columns = [name for name, code in stock_list8] 

In [33]:
# 아홉번째 그룹 수익률 계산.
group9 = list(PBR_PSR_POR.loc[321:360,'종목명'])
def EnterJongmok(target):
    StockList9 = []
    for c in group9:
        StockList9.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList9
stock_list9 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list9]
df9 = pd.concat(df_list, axis=1)
df9.columns = [name for name, code in stock_list9] 

In [34]:
# 열번째 그룹 수익률 계산.
group10 = list(PBR_PSR_POR.loc[361:400,'종목명'])

def EnterJongmok(target):
    StockList10 = []
    for c in group10:
        StockList10.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList10
stock_list10 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list10]
df10 = pd.concat(df_list, axis=1)
df10.columns = [name for name, code in stock_list10] 

In [35]:
# 열한번째 그룹 수익률 계산.
group11 = list(PBR_PSR_POR.loc[401:440,'종목명'])
def EnterJongmok(target):
    StockList11 = []
    for c in group11:
        StockList11.append([c,target.loc[target['Name']==c,'Symbol'].values[0]])
    return StockList11
stock_list11 = EnterJongmok(target)
df_list = [fdr.DataReader(code, '2000-01-01', '2019-12-31')['Close'] for name, code in stock_list11]
df11 = pd.concat(df_list, axis=1)
df11.columns = [name for name, code in stock_list11] 

In [36]:
def CalProfitRatio(df,t_s,t_e):
    res = pd.DataFrame(round(df.apply(lambda x: round(x.loc[t_e]-x.loc[t_s])*100/x.loc[t_s]),2),columns=['수익률 (%)'])    
    return res    

In [37]:
CalProfitRatio(df1,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    324.37
dtype: float64

In [38]:
CalProfitRatio(df2,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    1023.04
dtype: float64

In [39]:
CalProfitRatio(df3,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    1427.38
dtype: float64

In [40]:
CalProfitRatio(df4,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    1016.98
dtype: float64

In [41]:
CalProfitRatio(df5,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    2835.87
dtype: float64

In [42]:
CalProfitRatio(df6,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    1892.48
dtype: float64

In [43]:
CalProfitRatio(df7,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    2349.51
dtype: float64

In [44]:
CalProfitRatio(df8,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    3023.97
dtype: float64

In [45]:
CalProfitRatio(df9,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    2932.19
dtype: float64

In [46]:
CalProfitRatio(df10,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    10784.8
dtype: float64

In [47]:
CalProfitRatio(df11,'2010-01-04','2019-12-24').sum(axis = 0)

수익률 (%)    8500.77
dtype: float64

In [48]:
url = 'https://finance.naver.com/sise/sise_group.nhn?type=upjong'
req = requests.get(url)
session = requests.session()
res = session.get(url)

html = req.text
soup = BeautifulSoup(html, 'html.parser')
a = soup.select('div > #contentarea > #contentarea_left > table > tr > td > a')

name = []
for i in range(0,79):
    title = a[i].get_text().strip()
    name.append(title)
    
html = req.text
soup = BeautifulSoup(html, 'html.parser')
b = soup.select('div > #contentarea > #contentarea_left > table > tr > td > span')

value = []
for i in range(0,79):
    title = b[i].get_text().strip()
    value.append(title)
target1 = [ i for i in zip(name,value)]
target1 = pd.DataFrame(target1)
target1.columns = ['테마명','등락률']

target1.to_excel('실시간 테마별 수익.xlsx')

In [123]:
target1.head(10)

Unnamed: 0,테마명,등락률
0,095570,AJ네트웍스
1,006840,AK홀딩스
2,027410,BGF
3,282330,BGF리테일
4,138930,BNK금융지주
5,001460,BYC
6,001465,BYC우
7,001040,CJ
8,079160,CJ CGV
9,00104K,CJ4우(전환)


In [50]:
BASE_URL = 'https://finance.naver.com/sise/sise_market_sum.nhn?sosok='

KOSPI_CODE = 0
KOSDAK_CODE = 1
START_PAGE = 1

def main(code):
    res = requests.get(BASE_URL + str(code) + '&page=' + str(START_PAGE))
    page_soup = BeautifulSoup(res.text, 'lxml')
    
    total_page_num = page_soup.select_one('td.pgRR > a')
    total_page_num = int(total_page_num.get('href').split('=')[-1])
    
    ipt_html = page_soup.select_one('div.subcnt_sise_item_top')
    global fields
    fields = [item.get('value') for item in ipt_html.select('input')]
    
    result = [crawl(code,str(page)) for page in range(1, total_page_num + 1)]
    
    df = pd.concat(result, axis = 0, ignore_index = True)
    
    df.to_excel('NaverFinance.final.xlsx')
    
def crawl(code, page):
    global fields
    data = {'menu': 'market_sum',
            'fieldIds': fields,
            'returnUrl': BASE_URL + str(code) + "&page=" + str(page)}
    
    res = requests.post('https://finance.naver.com/sise/field_submit.nhn', data = data)
    page_soup = BeautifulSoup(res.text, 'lxml')
    
    table_html = page_soup.select_one('div.box_type_l')
    
    header_data = [item.get_text().strip() for item in table_html.select('thead th')][1:-1]
    
    inner_data = [item.get_text().strip() for item in table_html.find_all(lambda x:
                                                                            (x.name == 'a' and
                                                                             'tltle' in x.get('class', [])) or
                                                                            (x.name == 'td' and
                                                                             'number' in x.get('class', []))
                                                                            )]
    no_data = [item.get_text().strip() for item in table_html.select('td.no')]
    number_data = np.array(inner_data)
    
    number_data.resize(len(no_data), len(header_data))
    
    df = pd.DataFrame(data = number_data, columns = header_data)
    return df
