<a href="https://colab.research.google.com/github/HoYoungChun/financial_data_analysis/blob/main/Fama%2BLSV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Basic Setting

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
import pandas as pd
from tqdm import tqdm
from bs4 import BeautifulSoup
#import urllib.request
from urllib import request as rq

In [6]:
SNAP_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Main.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=101&stkGb=701'
INV_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701'
sample_code = '005930'

### Market Value(시가총액)

In [7]:
snap = rq.urlopen(SNAP_URL.format(sample_code)).read()
snap_soup = BeautifulSoup(snap,'html.parser')
mv_sells = snap_soup.find('div',{'id':"svdMainGrid1"}).find_all('td',{'class':'r'})
market_value = float(mv_sells[8].string.replace(',',''))
market_value

4769856.0

### PER

In [10]:
inv_html = rq.urlopen(INV_URL.format(sample_code)).read()
inv_soup = BeautifulSoup(inv_html, 'html.parser')
per_cells = inv_soup.find('tr',{'id':'p_grid1_9'}).find_all('td',{'class':'r'})
per = float(per_cells[3].string.replace(',',''))
per

21.09

### PBR

In [12]:
pbr_cells = inv_soup.find('tr',{'id':'p_grid1_12'}).find_all('td',{'class':'r'})
pbr = float(pbr_cells[3].string.replace(',',''))
pbr

2.06

### PCR

In [13]:
pcr_cells = inv_soup.find('tr',{'id':'p_grid1_10'}).find_all('td',{'class':'r'})
pcr = float(pcr_cells[3].string.replace(',',''))
pcr

9.75

### Get All Stock code



In [31]:
csv_url = '/content/drive/MyDrive/Colab/financial_data_analysis/company.csv'
company = pd.read_csv(csv_url, encoding='utf-8')
code_list = company['종목코드']
sample_code_list = code_list[400:450]
sample_code_list

400    010580
401    007980
402    009440
403    008420
404    016880
405    007310
406    008500
407    014790
408    010120
409    009190
410    000760
411    014820
412    010140
413    009680
414    010690
415    000390
416    009450
417    012030
418    005090
419    005750
420    005880
421    008560
422    005870
423    017550
424    011170
425    001430
426    008770
427    014580
428    013520
429    004080
430    006060
431    014530
432    002140
433    016090
434    001620
435    017040
436    011390
437    011810
438    010130
439    012600
440    004720
441    009200
442    001500
443    008870
444    008110
445    005820
446    008490
447    014910
448    003580
449    015860
Name: 종목코드, dtype: object

### Create Crawling Function

In [32]:
def crawler(li):
    #url setting
    SNAP_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Main.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=101&stkGb=701'
    INV_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701'

    result ={}
    for code in tqdm(li):
        try:
            snap = rq.urlopen(SNAP_URL.format(code)).read()
            snap_soup = BeautifulSoup(snap,'html.parser')
            inv_html = rq.urlopen(INV_URL.format(code)).read()
            inv_soup = BeautifulSoup(inv_html, 'html.parser')

            #get company name
            name = company[company['종목코드'] == code]['회사명'].values[0]

            #get Market value
            mv_sells = snap_soup.find('div',{'id':"svdMainGrid1"}).find_all('td',{'class':'r'})
            market_value = float(mv_sells[8].string.replace(',',''))

            #get PER
            per_cells = inv_soup.find('tr',{'id':'p_grid1_9'}).find_all('td',{'class':'r'})
            per = float(per_cells[3].string.replace(',',''))

            #get PBR
            pbr_cells = inv_soup.find('tr',{'id':'p_grid1_12'}).find_all('td',{'class':'r'})
            pbr = float(pbr_cells[3].string.replace(',',''))

            #get PCR
            pcr_cells = inv_soup.find('tr',{'id':'p_grid1_10'}).find_all('td',{'class':'r'})
            pcr = float(pcr_cells[3].string.replace(',',''))

            #insert into result dict
            result[name]=[code, market_value, per, pbr, pcr]


        except(ValueError, AttributeError, IndexError, TypeError):
            pass
    
    # convert dict into DataFrame
    result_df = pd.DataFrame(result)

    # transposing DataFrame
    result_df = result_df.transpose()

    # set column names
    result_df.columns=['Code','Market_Value','PER', 'PBR','PCR']
    
    return result_df

In [33]:
result_df = crawler(sample_code_list)
result_df

100%|██████████| 50/50 [02:19<00:00,  2.79s/it]


Unnamed: 0,Code,Market_Value,PER,PBR,PCR
문배철강,8420,1046,8.14,0.58,7.53
오뚜기,7310,19608,19.7,1.31,9.64
한라,14790,2243,2.19,0.65,1.79
LS산전,10120,16710,22.22,1.32,10.41
동원시스템즈,14820,10862,17.95,1.91,8.58
모토닉,9680,3894,26.21,0.9,16.28
삼화페인트공업,390,3360,57.71,1.17,16.7
경동나비엔,9450,8966,16.12,2.03,9.1
동부,12030,2525,25.54,0.47,11.22
삼광글라스,5090,7287,9.68,0.98,5.96


### 소형주 500개

In [34]:
tmp_df = result_df.copy()
tmp_df = tmp_df.sort_values(by='Market_Value',ascending=True)[:500] #소형주 500개
tmp_df

Unnamed: 0,Code,Market_Value,PER,PBR,PCR
원림,5820,612,2.83,0.42,2.48
대동전자,8110,787,7.36,0.22,3.74
문배철강,8420,1046,8.14,0.58,7.53
고려산업,2140,1054,17.83,1.19,11.59
휴니드테크놀러지스,5870,1166,14.08,0.63,8.16
대현,16090,1357,9.3,0.41,5.81
백광소재,14580,1407,24.15,0.81,7.89
신흥,4080,1477,8.49,1.14,6.93
광명전기,17040,1478,18.64,0.85,13.84
부산산업,11390,1531,49.29,2.09,16.85


### PER,PBR,PCR 순위 매기기

In [35]:
top_range = int(len(tmp_df)*0.3)
middle_range = int(len(tmp_df)*0.7)

In [None]:
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PER')[:top_range]['Code']),'PER_Score'] = 3
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PER')[top_range:middle_range]['Code']),'PER_Score'] = 2
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PER')[middle_range:]['Code']),'PER_Score'] = 1
tmp_df

In [None]:
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PBR')[:top_range]['Code']),'PBR_Score'] = 3
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PBR')[top_range:middle_range]['Code']),'PBR_Score'] = 2
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PBR')[middle_range:]['Code']),'PBR_Score'] = 1
tmp_df

In [38]:
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PCR')[:top_range]['Code']),'PCR_Score'] = 3
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PCR')[top_range:middle_range]['Code']),'PCR_Score'] = 2
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='PCR')[middle_range:]['Code']),'PCR_Score'] = 1
tmp_df

Unnamed: 0,Code,Market_Value,PER,PBR,PCR,PER_Score,PBR_Score,PCR_Score
원림,5820,612,2.83,0.42,2.48,3.0,3.0,3.0
대동전자,8110,787,7.36,0.22,3.74,3.0,3.0,3.0
문배철강,8420,1046,8.14,0.58,7.53,3.0,3.0,2.0
고려산업,2140,1054,17.83,1.19,11.59,2.0,1.0,1.0
휴니드테크놀러지스,5870,1166,14.08,0.63,8.16,2.0,3.0,2.0
대현,16090,1357,9.3,0.41,5.81,3.0,3.0,3.0
백광소재,14580,1407,24.15,0.81,7.89,1.0,2.0,2.0
신흥,4080,1477,8.49,1.14,6.93,3.0,2.0,2.0
광명전기,17040,1478,18.64,0.85,13.84,2.0,2.0,1.0
부산산업,11390,1531,49.29,2.09,16.85,1.0,1.0,1.0


### 통합 순위 작성

In [39]:
tmp_df['Total_Score'] = tmp_df['PER_Score'] + tmp_df['PBR_Score'] + tmp_df['PCR_Score']
tmp_df.sort_values(by='Total_Score', ascending=False)

Unnamed: 0,Code,Market_Value,PER,PBR,PCR,PER_Score,PBR_Score,PCR_Score,Total_Score
원림,5820,612,2.83,0.42,2.48,3.0,3.0,3.0,9.0
대현,16090,1357,9.3,0.41,5.81,3.0,3.0,3.0,9.0
대동전자,8110,787,7.36,0.22,3.74,3.0,3.0,3.0,9.0
한라,14790,2243,2.19,0.65,1.79,3.0,3.0,3.0,9.0
삼광글라스,5090,7287,9.68,0.98,5.96,3.0,2.0,3.0,8.0
문배철강,8420,1046,8.14,0.58,7.53,3.0,3.0,2.0,8.0
화승인더스트리,6060,4238,10.56,1.08,3.86,3.0,2.0,3.0,8.0
휴니드테크놀러지스,5870,1166,14.08,0.63,8.16,2.0,3.0,2.0,7.0
신흥,4080,1477,8.49,1.14,6.93,3.0,2.0,2.0,7.0
극동유화,14530,1681,15.63,0.68,10.36,2.0,3.0,2.0,7.0


In [43]:
max_point_df = tmp_df[tmp_df['Total_Score']==9.0]
max_point_df.sort_values(by='PCR', ascending=True).head(50)

Unnamed: 0,Code,Market_Value,PER,PBR,PCR,PER_Score,PBR_Score,PCR_Score,Total_Score
한라,14790,2243,2.19,0.65,1.79,3.0,3.0,3.0,9.0
원림,5820,612,2.83,0.42,2.48,3.0,3.0,3.0,9.0
대동전자,8110,787,7.36,0.22,3.74,3.0,3.0,3.0,9.0
대현,16090,1357,9.3,0.41,5.81,3.0,3.0,3.0,9.0


In [45]:
max_point_df.to_csv('/content/drive/MyDrive/Colab/financial_data_analysis/result.csv')
max_point_df.to_excel('/content/drive/MyDrive/Colab/financial_data_analysis/result.xlsx')