In [52]:
import pandas as pd
import os

dataset_path = 'data_before/finance_datas' # dart, krx 를 통한 데이터 수집 진행
files_of_finance_datas = os.listdir(dataset_path)
using_columns = [ '배당금지급', '유동자산', '유동부채', '영업이익(손실)', '수익(매출액)',\
                  '당기순이익(손실)', '자본총계', '자산총계', '부채총계', 'ATR', 'MARKETCAP', 'TSF_SLOPE' ]

finance_datas = pd.DataFrame()

for filename in files_of_finance_datas:
    if '.csv' not in filename:
        continue
    
    code = filename.split('_')[0]
    quarter = filename.split('_')[1].split('.')[0]
    
    csv_data = pd.read_csv(os.path.join(dataset_path, filename), encoding='ANSI', skipinitialspace=True)
    
    finance_data = { }
    
    finance_data['종목코드'] = str(code)
    finance_data['분기'] = int(quarter)
    
    skip = False
    
    for using_column in using_columns:
        if using_column not in csv_data.columns:
            skip = True
            break
        finance_data[using_column] = csv_data[using_column].item()
        
    if skip == True:
        continue
        
    finance_datas = finance_datas.append(finance_data, ignore_index=True)

try:
    os.mkdir('data')
except:
    pass

finance_datas.to_csv('data/data_preprocessing_unified.csv', sep=',', encoding='utf-8', index=False)

finance_datas

Unnamed: 0,종목코드,분기,배당금지급,유동자산,유동부채,영업이익(손실),수익(매출액),당기순이익(손실),자본총계,자산총계,부채총계,ATR,MARKETCAP,TSF_SLOPE
0,000020,1.0,4.982243e+09,2.356569e+11,8.182557e+10,1.218255e+10,9.941981e+10,1.293368e+10,3.869576e+11,4.769903e+11,9.003264e+10,0.017,2.321105e+11,-0.127
1,000020,2.0,4.982243e+09,2.316660e+11,7.726278e+10,5.464701e+09,8.999036e+10,2.051404e+10,3.945880e+11,4.817560e+11,8.716788e+10,0.027,2.902080e+11,0.354
2,000020,3.0,4.982243e+09,2.345890e+11,7.876748e+10,2.694848e+09,8.745741e+10,6.157136e+09,4.007350e+11,4.901510e+11,8.941590e+10,0.028,2.737284e+11,-0.024
3,000220,2.0,2.001735e+09,1.002660e+11,4.948049e+10,-3.801753e+08,3.421640e+10,-2.717080e+09,1.230820e+11,2.005860e+11,7.750395e+10,0.027,8.584303e+10,-0.188
4,000220,3.0,2.001735e+09,9.409481e+10,4.727321e+10,-1.510527e+09,3.409087e+10,-2.275938e+09,1.207970e+11,1.965830e+11,7.578627e+10,0.031,7.724171e+10,-0.071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637,900070,1.0,0.000000e+00,5.619339e+08,1.435437e+08,6.542623e+06,1.534348e+08,1.255343e+06,6.280200e+08,8.338091e+08,2.057891e+08,0.025,3.735206e+10,-0.064
638,900070,2.0,4.103465e+06,5.786784e+08,1.413904e+08,7.373644e+06,1.502561e+08,1.696553e+07,6.521951e+08,8.565471e+08,2.043521e+08,0.056,4.407006e+10,0.117
639,900070,3.0,4.103465e+06,5.672710e+08,1.290003e+08,9.139425e+06,1.489769e+08,4.868978e+06,6.494237e+08,8.367302e+08,1.873065e+08,0.040,3.342875e+10,-0.169
640,950130,2.0,2.275586e+07,4.193563e+08,3.896446e+07,9.368300e+05,5.022495e+07,1.237934e+07,4.687991e+08,5.555589e+08,8.675981e+07,0.035,2.880174e+11,-0.191


In [None]:
finance_datas = pd.read_csv('data/data_preprocessing_unified.csv', sep=',', encoding='utf-8', dtype={'종목코드':str, '분기':int})

finance_datas

: 

In [58]:
"""
1. 변동성 지표
 - ATR : ATRPP (Average True Range Per Price)

2. 재무안정성 지표
 - 배당수익률
 - Altman Z Score
   - Z > 2.99 : 안정적 상태
   - 1.81 < Z < 2.99 : 주의 요망
   - Z < 1.81 : 파산 가능성이 높음
 - 순이익률 (Net Profit Margin)
 - 자본회전율 (Asset Turnover)

3. 가치성 지표
 - PER (시가총액 / 순이익)
 - PSR (시가총액 / 매출)
 - PBR (시가총액 / 순자산)

4. 모멘텀 지표
 - TSF Slope : TSF (Time Series Forcasting) 지표에 대한 Slope (기울기) 계산 (Percentage 방식)
"""
    
finance_datas['배당수익률'] = finance_datas['배당금지급'] / finance_datas['MARKETCAP']
finance_datas['AltmanZScore'] = 1.2 * finance_datas['유동자산'] / finance_datas['자산총계'] \
                              + 1.4 * finance_datas['유동부채'] / finance_datas['자산총계'] \
                              + 3.3 * finance_datas['영업이익(손실)']  / finance_datas['자산총계'] \
                              + 0.6 * finance_datas['MARKETCAP'] / finance_datas['부채총계'] \
                              + 1.0 * finance_datas['수익(매출액)'] / finance_datas['자산총계']
finance_datas['순이익률'] = finance_datas['당기순이익(손실)'] / finance_datas['수익(매출액)']
finance_datas['자본회전율'] = finance_datas['수익(매출액)'] / finance_datas['자본총계']
    
finance_datas['PER'] = finance_datas['MARKETCAP'] / finance_datas['당기순이익(손실)']
finance_datas['PSR'] = finance_datas['MARKETCAP'] / finance_datas['수익(매출액)']
finance_datas['PBR'] = finance_datas['MARKETCAP'] / (finance_datas['자산총계'] - finance_datas['부채총계'])

finance_datas

Unnamed: 0,종목코드,분기,배당금지급,유동자산,유동부채,영업이익(손실),수익(매출액),당기순이익(손실),자본총계,자산총계,...,ATR,MARKETCAP,TSF_SLOPE,배당수익률,AltmanZScore,순이익률,자본회전율,PER,PSR,PBR
0,000020,1,4.982243e+09,2.356569e+11,8.182557e+10,1.218255e+10,9.941981e+10,1.293368e+10,3.869576e+11,4.769903e+11,...,0.017,2.321105e+11,-0.127,0.021465,2.672581,0.130092,0.256927,17.946209,2.334651,0.599835
1,000020,2,4.982243e+09,2.316660e+11,7.726278e+10,5.464701e+09,8.999036e+10,2.051404e+10,3.945880e+11,4.817560e+11,...,0.027,2.902080e+11,0.354,0.017168,3.023391,0.227958,0.228062,14.146794,3.224878,0.735471
2,000020,3,4.982243e+09,2.345890e+11,7.876748e+10,2.694848e+09,8.745741e+10,6.157136e+09,4.007350e+11,4.901510e+11,...,0.028,2.737284e+11,-0.024,0.018201,2.832657,0.070402,0.218243,44.457101,3.129848,0.683066
3,000220,2,2.001735e+09,1.002660e+11,4.948049e+10,-3.801753e+08,3.421640e+10,-2.717080e+09,1.230820e+11,2.005860e+11,...,0.027,8.584303e+10,-0.188,0.023319,1.774075,-0.079409,0.277997,-31.593851,2.508827,0.697446
4,000220,3,2.001735e+09,9.409481e+10,4.727321e+10,-1.510527e+09,3.409087e+10,-2.275938e+09,1.207970e+11,1.965830e+11,...,0.031,7.724171e+10,-0.071,0.025915,1.670630,-0.066761,0.282216,-33.938408,2.265759,0.639435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637,900070,1,0.000000e+00,5.619339e+08,1.435437e+08,6.542623e+06,1.534348e+08,1.255343e+06,6.280200e+08,8.338091e+08,...,0.025,3.735206e+10,-0.064,0.000000,110.163543,0.008182,0.244315,29754.463779,243.439357,59.475908
638,900070,2,4.103465e+06,5.786784e+08,1.413904e+08,7.373644e+06,1.502561e+08,1.696553e+07,6.521951e+08,8.565471e+08,...,0.056,4.407006e+10,0.117,0.000093,130.640149,0.112911,0.230385,2597.623386,293.299687,67.571905
639,900070,3,4.103465e+06,5.672710e+08,1.290003e+08,9.139425e+06,1.489769e+08,4.868978e+06,6.494237e+08,8.367302e+08,...,0.040,3.342875e+10,-0.169,0.000123,108.326015,0.032683,0.229399,6865.660103,224.388821,51.474478
640,950130,2,2.275586e+07,4.193563e+08,3.896446e+07,9.368300e+05,5.022495e+07,1.237934e+07,4.687991e+08,5.555589e+08,...,0.035,2.880174e+11,-0.191,0.000079,1992.925982,0.246478,0.107135,23265.977417,5734.548463,614.372714


In [None]:
finance_datas.to_csv('data/data_preprocessing_unified_including_others.csv', sep=',', encoding='utf-8', index=False)

finance_datas