# 종목별 연도별 상승일/하락일 계산

상승하락일 기준 수익률 1%, -1%

관련파일 : 

In [1]:
import FinanceDataReader as fdr
fdr.__version__

'0.6.0'

In [2]:
import numpy as np 
from statsmodels import regression 
import statsmodels.api as sm 
import matplotlib.pyplot as plt 
import math 
import pandas as pd 
import pandas_datareader as pdr

In [3]:
# import FinanceDataReader as fdr

# 한국거래소 상장종목 전체
# 상폐는 조회안되는 듯
df_krx = fdr.StockListing('KRX')
len(df_krx)

2216

### 1. 종목별 기간 조회 함수 정의

In [4]:
def stock_select(code,strt_year='2012', end_date='2018-09-30'):

    stock_df = fdr.DataReader(code,strt_year,end_date)

    # Date를 인덱스에서 제거하고 항목으로 사용
    stock_df = stock_df.reset_index()

    # Date에서 연도를 추출함
    stock_df['Year'] = stock_df.Date.map(lambda x : x.year )
    stock_df['Code'] = code

    # 최고수익률, 최저수익률 계산
    bf_close = stock_df['Close']/(1+stock_df['Change'])
    stock_df['high_change'] = (stock_df['High']-bf_close)/bf_close 
    stock_df['low_change'] = (stock_df['Low']-bf_close)/bf_close

    
    return stock_df


In [12]:
stock_df = stock_select('049070', '2012','2015')
stock_df.head()

Unnamed: 0,Date,Close,Open,High,Low,Volume,Change,Year,Code,high_change,low_change
0,2014-03-14,21650,21850,22200,21450,32100.0,-0.0137,2014,49070,0.011356,-0.022811
1,2014-03-17,21550,21700,21800,20850,75510.0,-0.0046,2014,49070,0.006948,-0.036933
2,2014-03-18,21450,21700,21800,21450,20610.0,-0.0046,2014,49070,0.011642,-0.0046
3,2014-03-19,21350,21650,21650,21300,6470.0,-0.0047,2014,49070,0.009285,-0.007031
4,2014-03-20,21600,21300,21750,21300,16660.0,0.0117,2014,49070,0.018726,-0.002351


In [22]:

    
for x in range(0,5):  #len(df_krx)  체크
    # 종목 데이터 추출
    code = df_krx.Symbol.iloc[x]
    name = df_krx.Name.iloc[x]

    
    # 초기화
    temp_days_df = pd.DataFrame()
    year_days_dict = {}
    year_days_dict['Code'] = code
    year_days_dict['Name'] = name    

    # 연도별 일수 계산
    for year in range(2012,2018+1):  # 변경확인 


        # 종목별 기간별 데이터 추출
        try:
            if year == 2018: # 변경확인
                end_year = '2018-09-30' # 변경확인
            else:
                end_year = year+1
            stock_df = stock_select(code, str(year),str(end_year))
            target_df = stock_df[stock_df['Year'] == year]
            print(code, name, target_df['Date'].min(), "-", target_df['Date'].max(), "추출")
            
        except:
            print(code, name, year, "종목정보 추출 실패")
            # 연도별 일수 변수에 저장
            year_days_dict["up_" + str(year)] = ""
            year_days_dict["down_" + str(year)] = ""
            year_days_dict["tot_" + str(year)] = ""
            year_days_dict["high_" + str(year)] = ""
            year_days_dict["low_" + str(year)] = ""   
            continue
        
        try:
            # 상승/하락일 카운팅
            up_days = len(target_df.query("Change >= 0.01")) 
            down_days = len(target_df.query("Change <= -0.01")) 
            tot_days = len(target_df)
            
            # 상한가/하한가 근접일수 카운팅
            high_days = len(target_df.query("high_change >= 0.25")) 
            low_days = len(target_df.query("low_change <= -0.25")) 
            
        except:
            up_days = ""         
            down_days = ""
            tot_days = ""
            high_days = ""
            low_days = ""
        
        # 연도별 일수 변수에 저장
        year_days_dict["up_" + str(year)] = up_days
        year_days_dict["down_" + str(year)] = down_days
        year_days_dict["tot_" + str(year)] = tot_days   
        year_days_dict["high_" + str(year)] = high_days   
        year_days_dict["low_" + str(year)] = low_days   
        
    # 연도별 데이터를 임시데이터프레임에 저장(1개종목)
    temp_days_df = pd.Series(year_days_dict).to_frame()
    temp_days_df = temp_days_df.T



    try:
        year_days_df = pd.concat([year_days_df, temp_days_df]) # 이전 데이터와 연결
        print(x, "번 : ", code, name, "Days 생성완료")
    except:
        print(x, "번 : ", code,name, "Days 생성실패")
        break
    


001040 CJ 2012-01-02 00:00:00 - 2012-12-28 00:00:00 추출
001040 CJ 2013-01-02 00:00:00 - 2013-12-31 00:00:00 추출
001040 CJ 2014-01-01 00:00:00 - 2014-12-30 00:00:00 추출
001040 CJ 2015-01-02 00:00:00 - 2015-12-30 00:00:00 추출
001040 CJ 2016-01-03 00:00:00 - 2016-12-29 00:00:00 추출
001040 CJ 2017-01-01 00:00:00 - 2017-12-28 00:00:00 추출
001040 CJ 2018-01-02 00:00:00 - 2018-09-30 00:00:00 추출
0 번 :  001040 CJ Days 생성완료
011150 CJ씨푸드 2012-01-02 00:00:00 - 2012-12-28 00:00:00 추출
011150 CJ씨푸드 2013-01-02 00:00:00 - 2013-12-31 00:00:00 추출
011150 CJ씨푸드 2014-01-02 00:00:00 - 2014-12-30 00:00:00 추출
011150 CJ씨푸드 2015-01-02 00:00:00 - 2015-12-30 00:00:00 추출
011150 CJ씨푸드 2016-01-04 00:00:00 - 2016-12-29 00:00:00 추출
011150 CJ씨푸드 2017-01-02 00:00:00 - 2017-12-28 00:00:00 추출
011150 CJ씨푸드 2018-01-02 00:00:00 - 2018-09-30 00:00:00 추출
1 번 :  011150 CJ씨푸드 Days 생성완료
012630 HDC 2012 종목정보 추출 실패
012630 HDC 2013 종목정보 추출 실패
012630 HDC 2014 종목정보 추출 실패
012630 HDC 2015 종목정보 추출 실패
012630 HDC 2016 종목정보 추출 실패
012630 HDC 2017 종

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




082740 HSD엔진 2012-01-02 00:00:00 - 2012-12-28 00:00:00 추출
082740 HSD엔진 2013-01-02 00:00:00 - 2013-12-31 00:00:00 추출
082740 HSD엔진 2014-01-02 00:00:00 - 2014-12-30 00:00:00 추출
082740 HSD엔진 2015-01-01 00:00:00 - 2015-12-30 00:00:00 추출
082740 HSD엔진 2016-01-04 00:00:00 - 2016-12-29 00:00:00 추출
082740 HSD엔진 2017-01-02 00:00:00 - 2017-12-28 00:00:00 추출
082740 HSD엔진 2018-01-02 00:00:00 - 2018-09-28 00:00:00 추출
3 번 :  082740 HSD엔진 Days 생성완료
001390 KG케미칼 2012-01-02 00:00:00 - 2012-12-28 00:00:00 추출
001390 KG케미칼 2013-01-02 00:00:00 - 2013-12-31 00:00:00 추출
001390 KG케미칼 2014-01-01 00:00:00 - 2014-12-30 00:00:00 추출
001390 KG케미칼 2015-01-01 00:00:00 - 2015-12-30 00:00:00 추출
001390 KG케미칼 2016-01-04 00:00:00 - 2016-12-29 00:00:00 추출
001390 KG케미칼 2017-01-02 00:00:00 - 2017-12-28 00:00:00 추출
001390 KG케미칼 2018-01-01 00:00:00 - 2018-09-28 00:00:00 추출
4 번 :  001390 KG케미칼 Days 생성완료


In [23]:
year_days_df

Unnamed: 0,Code,Name,down_2012,down_2013,down_2014,down_2015,down_2016,down_2017,down_2018,tot_2012,...,tot_2016,tot_2017,tot_2018,up_2012,up_2013,up_2014,up_2015,up_2016,up_2017,up_2018
0,1040,CJ,48.0,79.0,70.0,80.0,84.0,72.0,54.0,248.0,...,267.0,261.0,198.0,61.0,74.0,76.0,91.0,62.0,61.0,38.0
0,11150,CJ씨푸드,66.0,55.0,43.0,72.0,70.0,39.0,41.0,248.0,...,260.0,254.0,196.0,76.0,43.0,41.0,78.0,63.0,36.0,38.0
0,12630,HDC,,,,,,,,,...,,,,,,,,,,
0,82740,HSD엔진,74.0,76.0,80.0,83.0,97.0,96.0,51.0,248.0,...,277.0,261.0,182.0,65.0,68.0,64.0,57.0,89.0,85.0,69.0
0,1390,KG케미칼,44.0,50.0,64.0,78.0,44.0,54.0,65.0,248.0,...,250.0,260.0,205.0,52.0,60.0,68.0,71.0,51.0,55.0,68.0


In [40]:
bf_close = Close/(1-Change)

27699.530516431925

In [25]:
target_df

Unnamed: 0,Date,Close,Open,High,Low,Volume,Change,Year,Code
0,2018-01-01,24500,24500,24500,24500,0.0,0.0000,2018,001390
1,2018-01-02,26950,25100,27500,25050,624970.0,0.1000,2018,001390
2,2018-01-03,27700,27800,29150,26900,881320.0,0.0278,2018,001390
3,2018-01-04,26550,27000,27900,25800,573460.0,-0.0415,2018,001390
4,2018-01-05,27200,26600,27650,26250,464810.0,0.0245,2018,001390
5,2018-01-07,27200,27200,27200,27200,0.0,0.0000,2018,001390
6,2018-01-08,26400,27200,28250,25850,456660.0,-0.0294,2018,001390
7,2018-01-09,26200,26350,26700,25000,389180.0,-0.0076,2018,001390
8,2018-01-10,24800,26400,26650,24800,352230.0,-0.0534,2018,001390
9,2018-01-11,25750,24850,25900,24500,267290.0,0.0383,2018,001390
