<a href="https://colab.research.google.com/github/HoYoungChun/financial_data_analysis/blob/main/Fama%2BLSV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Basic Setting

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
import pandas as pd
from tqdm import tqdm
from bs4 import BeautifulSoup
#import urllib.request
from urllib import request as rq

In [6]:
SNAP_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Main.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=101&stkGb=701'
INV_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701'
sample_code = '005930'

### Market Value(시가총액)

In [7]:
snap = rq.urlopen(SNAP_URL.format(sample_code)).read()
snap_soup = BeautifulSoup(snap,'html.parser')
mv_sells = snap_soup.find('div',{'id':"svdMainGrid1"}).find_all('td',{'class':'r'})
market_value = float(mv_sells[8].string.replace(',',''))
market_value

4769856.0

### PER

In [10]:
inv_html = rq.urlopen(INV_URL.format(sample_code)).read()
inv_soup = BeautifulSoup(inv_html, 'html.parser')
per_cells = inv_soup.find('tr',{'id':'p_grid1_9'}).find_all('td',{'class':'r'})
per = float(per_cells[3].string.replace(',',''))
per

21.09

### PBR

In [12]:
pbr_cells = inv_soup.find('tr',{'id':'p_grid1_12'}).find_all('td',{'class':'r'})
pbr = float(pbr_cells[3].string.replace(',',''))
pbr

2.06

### PCR

In [13]:
pcr_cells = inv_soup.find('tr',{'id':'p_grid1_10'}).find_all('td',{'class':'r'})
pcr = float(pcr_cells[3].string.replace(',',''))
pcr

9.75

### Get All Stock code



In [None]:
csv_url = '/content/drive/MyDrive/Colab/financial_data_analysis/company.csv'
company = pd.read_csv(csv_url, encoding='utf-8')
code_list = company['종목코드']
sample_code_list = code_list[1000:1050]
sample_code_list

### Create Crawling Function

In [21]:
def crawler(li):
    #url setting
    SNAP_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Main.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=101&stkGb=701'
    INV_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701'

    result ={}
    for code in tqdm(li):
        try:
            snap = rq.urlopen(SNAP_URL.format(code)).read()
            snap_soup = BeautifulSoup(snap,'html.parser')
            inv_html = rq.urlopen(INV_URL.format(code)).read()
            inv_soup = BeautifulSoup(inv_html, 'html.parser')

            #get company name
            name = company[company['종목코드'] == code]['회사명'].values[0]

            #get Market value
            mv_sells = snap_soup.find('div',{'id':"svdMainGrid1"}).find_all('td',{'class':'r'})
            market_value = float(mv_sells[8].string.replace(',',''))

            #get PER
            per_cells = inv_soup.find('tr',{'id':'p_grid1_9'}).find_all('td',{'class':'r'})
            per = float(per_cells[3].string.replace(',',''))

            #get PBR
            pbr_cells = inv_soup.find('tr',{'id':'p_grid1_12'}).find_all('td',{'class':'r'})
            pbr = float(pbr_cells[3].string.replace(',',''))

            #get PCR
            pcr_cells = inv_soup.find('tr',{'id':'p_grid1_10'}).find_all('td',{'class':'r'})
            pcr = float(pcr_cells[3].string.replace(',',''))

            #insert into result dict
            result[name]=[code, market_value, per, pbr, pcr]


        except(ValueError, AttributeError, IndexError, TypeError):
            pass
    
    # convert dict into DataFrame
    result_df = pd.DataFrame(result)

    # transposing DataFrame
    result_df = result_df.transpose()

    # set column names
    result_df.columns=['Code','Market_Value','PER', 'PBR','PCR']
    
    return result_df

In [22]:
result_df = crawler(sample_code_list)
result_df

100%|██████████| 50/50 [02:26<00:00,  2.93s/it]


Unnamed: 0,Code,Market_Value,PER,PBR,PCR
포시에스,189690,774,12.0,1.09,10.75
덕산네오룩스,213420,14622,25.09,4.34,21.83
국일신동,60480,547,464.97,0.93,59.69
바디텍메드,206640,5789,12.23,4.43,10.94
하이셈,200470,1231,19.06,1.89,6.22
휴메딕스,200670,3605,15.7,1.67,9.25
서전기전,189860,980,12.09,0.77,8.11
오킨스전자,80580,3412,561.91,7.63,35.53
아이티센,124500,1254,17.05,1.85,8.34
녹십자엠에스,142280,2502,49.36,5.82,33.56


### 소형주 500개

In [23]:
tmp_df = result_df.copy()
tmp_df = tmp_df.sort_values(by='Market_Value',ascending=True)[:500] #소형주 500개
tmp_df

Unnamed: 0,Code,Market_Value,PER,PBR,PCR
파티게임즈,194510,72,10.96,0.29,1.99
국일신동,60480,547,464.97,0.93,59.69
셀바스헬스케어,208370,596,71.56,2.17,18.64
포시에스,189690,774,12.0,1.09,10.75
하이로닉,149980,917,23.03,1.32,17.45
서전기전,189860,980,12.09,0.77,8.11
메디아나,41920,1181,12.58,2.06,10.84
하이셈,200470,1231,19.06,1.89,6.22
영우디에스피,143540,1245,12.65,1.98,10.72
아이티센,124500,1254,17.05,1.85,8.34


### PER,PBR,PCR 순위 매기기

In [24]:
top_range = int(len(tmp_df)*0.3)
middle_range = int(len(tmp_df)*0.7)

In [None]:
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PER')[:top_range]['Code']),'PER_Score'] = 3
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PER')[top_range:middle_range]['Code']),'PER_Score'] = 2
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PER')[middle_range:]['Code']),'PER_Score'] = 1
tmp_df

In [None]:
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PBR')[:top_range]['Code']),'PBR_Score'] = 3
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PBR')[top_range:middle_range]['Code']),'PBR_Score'] = 2
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PBR')[middle_range:]['Code']),'PBR_Score'] = 1
tmp_df

In [27]:
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PCR')[:top_range]['Code']),'PCR_Score'] = 3
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PCR')[top_range:middle_range]['Code']),'PCR_Score'] = 2
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PCR')[middle_range:]['Code']),'PCR_Score'] = 1
tmp_df

Unnamed: 0,Code,Market_Value,PER,PBR,PCR,PER_Score,PBR_Score,PCR_Score
파티게임즈,194510,72,10.96,0.29,1.99,3.0,3.0,3.0
국일신동,60480,547,464.97,0.93,59.69,1.0,3.0,1.0
셀바스헬스케어,208370,596,71.56,2.17,18.64,1.0,2.0,1.0
포시에스,189690,774,12.0,1.09,10.75,3.0,3.0,2.0
하이로닉,149980,917,23.03,1.32,17.45,2.0,3.0,2.0
서전기전,189860,980,12.09,0.77,8.11,3.0,3.0,3.0
메디아나,41920,1181,12.58,2.06,10.84,3.0,2.0,2.0
하이셈,200470,1231,19.06,1.89,6.22,2.0,2.0,3.0
영우디에스피,143540,1245,12.65,1.98,10.72,2.0,2.0,2.0
아이티센,124500,1254,17.05,1.85,8.34,2.0,2.0,3.0


### 통합 순위 작성

In [28]:
tmp_df['Total_Score'] = tmp_df['PER_Score'] + tmp_df['PBR_Score'] + tmp_df['PCR_Score']
tmp_df.sort_values(by='Total_Score', ascending=False)

Unnamed: 0,Code,Market_Value,PER,PBR,PCR,PER_Score,PBR_Score,PCR_Score,Total_Score
파티게임즈,194510,72,10.96,0.29,1.99,3.0,3.0,3.0,9.0
슈피겐코리아,192440,3431,7.57,1.17,6.63,3.0,3.0,3.0,9.0
서전기전,189860,980,12.09,0.77,8.11,3.0,3.0,3.0,9.0
포시에스,189690,774,12.0,1.09,10.75,3.0,3.0,2.0,8.0
랩지노믹스,84650,3727,5.06,2.61,4.68,3.0,2.0,3.0,8.0
하이로닉,149980,917,23.03,1.32,17.45,2.0,3.0,2.0,7.0
메디아나,41920,1181,12.58,2.06,10.84,3.0,2.0,2.0,7.0
하이셈,200470,1231,19.06,1.89,6.22,2.0,2.0,3.0,7.0
아이티센,124500,1254,17.05,1.85,8.34,2.0,2.0,3.0,7.0
휴메딕스,200670,3605,15.7,1.67,9.25,2.0,3.0,2.0,7.0


In [30]:
max_point_df = tmp_df[tmp_df['Total_Score']==9.0]
max_point_df.sort_values(by='PER', ascending=True).head(30)

Unnamed: 0,Code,Market_Value,PER,PBR,PCR,PER_Score,PBR_Score,PCR_Score,Total_Score
슈피겐코리아,192440,3431,7.57,1.17,6.63,3.0,3.0,3.0,9.0
파티게임즈,194510,72,10.96,0.29,1.99,3.0,3.0,3.0,9.0
서전기전,189860,980,12.09,0.77,8.11,3.0,3.0,3.0,9.0
