<a href="https://colab.research.google.com/github/HoYoungChun/financial_data_analysis/blob/main/EV_EBITDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Basic Setting

In [2]:
import pandas as pd
from tqdm import tqdm
from bs4 import BeautifulSoup
#import urllib.request
from urllib import request as rq

In [3]:
FS_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=103&stkGb=701'
INDEX_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701'
sample_code = '005930'

### EV/EBITDA
Earnings(영업이익) before Interest(이자), Tax(법인세), Depreciation(유형자산 감가삼각), Amortization(무형자산 감가삼각)

In [19]:
index_html = rq.urlopen(INDEX_URL.format(sample_code)).read()
index_soup = BeautifulSoup(index_html, 'html.parser')
ev_cells = index_soup.find('tr',{'id':'p_grid1_14'}).find_all('td',{'class':'r'})
ev = float(ev_cells[3].string.replace(',',''))

### Get Gross Profit

In [29]:
fs_html = rq.urlopen(FS_URL.format(sample_code)).read()
fs_soup = BeautifulSoup(fs_html, 'html.parser')
gp_cells = fs_soup.find('div',{'id':'divSonikY'}).find_all('tr',{'class':'rwf'})
gross_profit = float(gp_cells[2].find_all('td')[2].string.replace(',',''))
gross_profit

923187.0

### Get Asset

In [34]:
asset_cells = fs_soup.find('div',{'id':'divDaechaY'}).find_all('tr',{'class':'rwf'})
asset = float(asset_cells[0].find_all('td')[2].string.replace(',',''))
asset

3782357.0

### Get All Stock code

In [35]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
csv_url = '/content/drive/MyDrive/Colab/financial_data_analysis/company.csv'
company = pd.read_csv(csv_url, encoding='utf-8')
code_list = company['종목코드']
sample_code_list = code_list[1000:1050]
sample_code_list

### Create Crawling Function

In [42]:
def crawler(li):
    #url setting
    FS_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=103&stkGb=701'
    INDEX_URL = 'https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701'

    result ={}
    for code in tqdm(li):
        try:
            index_html = rq.urlopen(INDEX_URL.format(code)).read()
            index_soup = BeautifulSoup(index_html, 'html.parser')
            fs_html = rq.urlopen(FS_URL.format(code)).read()
            fs_soup = BeautifulSoup(fs_html, 'html.parser')

            #get company name
            name = company[company['종목코드'] == code]['회사명'].values[0]

            #get EV/EBITA
            ev_cells = index_soup.find('tr',{'id':'p_grid1_14'}).find_all('td',{'class':'r'})
            ev = float(ev_cells[3].string.replace(',',''))

            #get Gross profit
            gp_cells = fs_soup.find('div',{'id':'divSonikY'}).find_all('tr',{'class':'rwf'})
            gross_profit = float(gp_cells[2].find_all('td')[2].string.replace(',',''))

            #get Asset
            asset_cells = fs_soup.find('div',{'id':'divDaechaY'}).find_all('tr',{'class':'rwf'})
            asset = float(asset_cells[0].find_all('td')[2].string.replace(',',''))

            #insert into result dict
            result[name]=[code, ev, gross_profit, asset]


        except(ValueError, AttributeError, IndexError, TypeError):
            pass
    
    # convert dict into DataFrame
    result_df = pd.DataFrame(result)

    # transposing DataFrame
    result_df = result_df.transpose()

    # set column names
    result_df.columns=['Code','EV_EBITDA','Gross_profit', 'Asset']
    
    return result_df

In [43]:
result_df = crawler(sample_code_list)
result_df



  0%|          | 0/50 [00:00<?, ?it/s][A[A

  2%|▏         | 1/50 [00:05<04:12,  5.16s/it][A[A

  4%|▍         | 2/50 [00:10<04:15,  5.32s/it][A[A

  6%|▌         | 3/50 [00:15<04:07,  5.26s/it][A[A

  8%|▊         | 4/50 [00:20<03:55,  5.13s/it][A[A

 10%|█         | 5/50 [00:25<03:49,  5.10s/it][A[A

 12%|█▏        | 6/50 [00:30<03:40,  5.01s/it][A[A

 14%|█▍        | 7/50 [00:35<03:36,  5.05s/it][A[A

 16%|█▌        | 8/50 [00:41<03:38,  5.19s/it][A[A

 18%|█▊        | 9/50 [00:46<03:32,  5.17s/it][A[A

 20%|██        | 10/50 [00:51<03:30,  5.26s/it][A[A

 22%|██▏       | 11/50 [00:57<03:23,  5.23s/it][A[A

 24%|██▍       | 12/50 [01:01<03:14,  5.13s/it][A[A

 26%|██▌       | 13/50 [01:07<03:09,  5.12s/it][A[A

 28%|██▊       | 14/50 [01:11<03:01,  5.05s/it][A[A

 30%|███       | 15/50 [01:16<02:56,  5.06s/it][A[A

 32%|███▏      | 16/50 [01:22<02:51,  5.05s/it][A[A

 34%|███▍      | 17/50 [01:26<02:45,  5.00s/it][A[A

 36%|███▌      | 18/50 [01

Unnamed: 0,Code,EV_EBITDA,Gross_profit,Asset
퓨쳐스트림네트웍스,214270,13.47,1635,2129
포시에스,189690,11.41,160,570
덕산네오룩스,213420,18.55,536,2209
국일신동,60480,87.98,18,523
바디텍메드,206640,7.88,977,1635
하이셈,200470,8.08,99,1304
휴메딕스,200670,9.26,395,1756
바이오로그디바이스,208710,17.72,43,1011
서전기전,189860,7.26,141,561
오킨스전자,80580,35.88,94,583


In [44]:
tmp_df = result_df.copy()
tmp_df.head(10)

Unnamed: 0,Code,EV_EBITDA,Gross_profit,Asset
퓨쳐스트림네트웍스,214270,13.47,1635,2129
포시에스,189690,11.41,160,570
덕산네오룩스,213420,18.55,536,2209
국일신동,60480,87.98,18,523
바디텍메드,206640,7.88,977,1635
하이셈,200470,8.08,99,1304
휴메딕스,200670,9.26,395,1756
바이오로그디바이스,208710,17.72,43,1011
서전기전,189860,7.26,141,561
오킨스전자,80580,35.88,94,583


In [45]:
tmp_df['GP_A'] = tmp_df['Gross_profit'] / tmp_df['Asset']
tmp_df

Unnamed: 0,Code,EV_EBITDA,Gross_profit,Asset,GP_A
퓨쳐스트림네트웍스,214270,13.47,1635,2129,0.767966
포시에스,189690,11.41,160,570,0.280702
덕산네오룩스,213420,18.55,536,2209,0.242644
국일신동,60480,87.98,18,523,0.0344168
바디텍메드,206640,7.88,977,1635,0.597554
하이셈,200470,8.08,99,1304,0.0759202
휴메딕스,200670,9.26,395,1756,0.224943
바이오로그디바이스,208710,17.72,43,1011,0.0425321
서전기전,189860,7.26,141,561,0.251337
오킨스전자,80580,35.88,94,583,0.161235


### Grading Scores
#### 1. EV/EBITDA Score

In [52]:
top_range = int(len(tmp_df)*0.3)
middle_range = int(len(tmp_df)*0.7)

In [53]:
print(top_range, middle_range)

10 25


In [64]:
#tmp_df[tmp_df['Code'].isin(tmp_df.sort_values(by='EV_EBITDA')[:top_range]['Code'])]
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='EV_EBITDA')[:top_range]['Code']),'EV_EBITDA_Score'] = 3
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='EV_EBITDA')[top_range:middle_range]['Code']),'EV_EBITDA_Score'] = 2
tmp_df.loc[tmp_df['Code'].isin(tmp_df.sort_values(by='EV_EBITDA')[middle_range:]['Code']),'EV_EBITDA_Score'] = 1
tmp_df

Unnamed: 0,Code,EV_EBITDA,Gross_profit,Asset,GP_A,EV_EBITDA_Score
퓨쳐스트림네트웍스,214270,13.47,1635,2129,0.767966,2.0
포시에스,189690,11.41,160,570,0.280702,2.0
덕산네오룩스,213420,18.55,536,2209,0.242644,2.0
국일신동,60480,87.98,18,523,0.0344168,1.0
바디텍메드,206640,7.88,977,1635,0.597554,3.0
하이셈,200470,8.08,99,1304,0.0759202,3.0
휴메딕스,200670,9.26,395,1756,0.224943,3.0
바이오로그디바이스,208710,17.72,43,1011,0.0425321,2.0
서전기전,189860,7.26,141,561,0.251337,3.0
오킨스전자,80580,35.88,94,583,0.161235,1.0
