In [2]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [3]:
raw = pd.read_csv('Data/kospi.csv', encoding = 'cp949')

# 전처리

In [4]:
raw.head()

Unnamed: 0,일자,현재지수,대비,등락률(%),배당수익률(%),주가이익비율,주가자산비율,시가지수,고가지수,저가지수,거래량(천주),거래대금(백만원),상장시가총액(백만원)
0,1980/01/04,100.0,,-,0.0,0.0,0.0,,,,96,603,
1,1980/01/05,100.15,,-,0.0,0.0,0.0,,,,131,776,
2,1980/01/07,102.53,,-,0.0,0.0,0.0,,,,358,2030,
3,1980/01/08,105.28,,-,0.0,0.0,0.0,,,,796,5567,
4,1980/01/09,106.37,,-,0.0,0.0,0.0,,,,552,3706,


In [5]:
raw.dtypes

일자              object
현재지수            object
대비             float64
등락률(%)          object
배당수익률(%)       float64
주가이익비율         float64
주가자산비율         float64
시가지수            object
고가지수            object
저가지수            object
거래량(천주)         object
거래대금(백만원)       object
상장시가총액(백만원)     object
dtype: object

In [6]:
raw['일자'] = raw['일자'].apply(lambda x: x.replace('/','')).astype('int')

raw['현재지수'] = raw['현재지수'].apply(lambda x: x.replace(',','')).astype('float')

raw['등락률(%)'] = raw['현재지수'].pct_change().fillna(0) +1

raw = raw[['일자','현재지수','등락률(%)']]

In [7]:
raw.head()

Unnamed: 0,일자,현재지수,등락률(%)
0,19800104,100.0,1.0
1,19800105,100.15,1.0015
2,19800107,102.53,1.023764
3,19800108,105.28,1.026821
4,19800109,106.37,1.010353


In [8]:
raw['GIJUN_MONTH'] = raw['일자'].apply(lambda x: x//100)

In [9]:
raw.tail()

Unnamed: 0,일자,현재지수,등락률(%),GIJUN_MONTH
10411,20180813,2248.45,0.984957,201808
10412,20180814,2258.91,1.004652,201808
10413,20180816,2240.8,0.991983,201808
10414,20180817,2247.05,1.002789,201808
10415,20180820,2247.88,1.000369,201808


# Historical Simulation을 위한 기초 data 생성

In [10]:
column_list = ['GIJUN_MONTH','SILH_SUIK_RT','KOSPI(월초)','KOSPI(월말)']
data = DataFrame(columns = column_list)

# 월초
jisu1_list = []

#월말
jisu2_list = []

month_list = []
suik_list = []

for month in raw['GIJUN_MONTH'].unique():
    temp = raw[raw['GIJUN_MONTH'] == month]
    
    # 2018년 8월 자료는 제외
    if month == 201808: continue
    
    # 월 기록
    month_list.append(month)
    
    # 월초 주가 기록
    jisu1_list.append(temp['현재지수'].iloc[0])
    
    # 월말 주가 기록
    jisu2_list.append(temp['현재지수'].iloc[-1])
    
    # 월 수익률 기록
    suik_list.append(np.prod(temp['등락률(%)']))
    
    
    
data['GIJUN_MONTH'] = month_list
data['SILH_SUIK_RT'] = suik_list
data['KOSPI(월초)'] = jisu1_list
data['KOSPI(월말)'] = jisu2_list

In [11]:
data

Unnamed: 0,GIJUN_MONTH,SILH_SUIK_RT,KOSPI(월초),KOSPI(월말)
0,198001,1.058900,100.00,105.89
1,198002,0.979696,104.48,103.74
2,198003,1.012146,103.96,105.00
3,198004,1.105619,105.43,116.09
4,198005,1.002929,116.54,116.43
5,198006,0.967276,116.77,112.62
6,198007,1.000799,112.05,112.71
7,198008,0.999468,112.28,112.65
8,198009,0.979583,112.55,110.35
9,198010,0.957952,109.85,105.71


In [12]:
data.to_csv('RAW_DATA.csv')

# 시뮬레이션 시작

In [13]:
def find_true_r(temp_list):
    
    # 방정식을 풀어 얻은 해 중, 1에 가장 가까운 값을 갖는 것을 사용한다.
    test_list = [np.abs(r-1) for r in temp_list]
    
    return temp_list[np.argmin(test_list)]

In [30]:
from collections import OrderedDict
from sympy import Symbol, solve

n_period = 24

investment = 10000
total_investment = n_period * investment

rf_annual = 0.03
rf_monthly = np.power(1+rf_annual, 1/12) -1



store_dict = OrderedDict(\
                         {'기준 월' : [],
                          'KOSPI_0' : [],
                          'KOSPI_T' : [],
                          
                          '전체 시나리오' : [],
                          '메인 시나리오' : [],
                          '세부 시나리오' : [],
                          
                          'LS 수익률(%)' : [],
                          'BH 수익률(%)' : [],
                          'DCA1 수익률(%)' : [],
                          'DCA1* 수익률(%)' : [],
                          
                          'P_0' : [],
                          'P_T' : [],
                          'H_DCA1' : [],
                          'H_DCA1*' : [],                          
                          
                          })
#for i in range(1):
for i in range(len(data)):
    
    if i % 50 == 0 :
        print(i)
    
    temp = data.iloc[i:i+n_period]
    
    # full year가 아니면 제외
    if not len(temp) == n_period:
        continue
    
    #기준 월 기록
    gijun_month = temp['GIJUN_MONTH'].iloc[0]
    store_dict['기준 월'].append(gijun_month)
    
    # 최초시점 KOSPI 기록
    kospi_0 = temp['KOSPI(월초)'].iloc[0]
    store_dict['KOSPI_0'].append(kospi_0)
    
    # 마지막 시점 KOSPI 기록
    kospi_t = temp['KOSPI(월말)'].iloc[-1]
    store_dict['KOSPI_T'].append(kospi_t)
    
    # state : 각각의 시나리오 (ex: 상상)를 기록할 변수
    total_senario = ''
    main_senario = ''
    sebu_senario = '' 

    ################ total_senario 계산 ##################
    for index in temp.index:
        row = temp.loc[index, :]
        
        # kospi_new : 해당 기의 마지막 날 주가
        kospi_new = row['KOSPI(월말)']
           
        # t기간 수익률
        suik_t = kospi_new / kospi_0 
        
        ##### 상, 하 state 기록
        if suik_t >= 1:
            total_senario += '상'
        else:
            total_senario += '하'
    
    # total senario = xxxxxxxxxxxx (x : n_period)
    store_dict['전체 시나리오'].append(total_senario)
    
    ######################## 메인 시나리오 계산 ###########################
    
    up = 0
    down = 0
    
    for letter in total_senario:
        if letter == '상':
            up += 1
        elif letter == '하':
            down += 1
    
    # 메인 시나리오 계산
    # up이 절반 이상이면
    if up > n_period/2 :
        main_senario += '강'
    
    elif down > n_period/2 :
        main_senario += '약'
    
    elif up == n_period/2 :
        main_senario += '보'
    
    store_dict['메인 시나리오'].append(main_senario)
    
    
    
    ############################ 세부 시나리오 계산 ############################
    # 전기 (최초 n_period/2 개월) 
    up = 0
    down = 0
    
    for letter in total_senario[:n_period//2]:
        if letter == '상':
            up += 1
        elif letter == '하':
            down += 1          
    
    if up > n_period/4 :
        sebu_senario += '강'
    if down > n_period/4 :
        sebu_senario += '약'
    if up == n_period/4 :
        sebu_senario += '보'
        
    # 후기
    up = 0
    down = 0
    
    for letter in total_senario[n_period//2:]:
        if letter == '상':
            up += 1
        elif letter == '하':
            down += 1          
    
    if up > n_period/4 :
        sebu_senario += '강'
    if down > n_period/4 :
        sebu_senario += '약'
    if up == n_period/4 :
        sebu_senario += '보'
        
    store_dict['세부 시나리오'].append(sebu_senario)
    
    
    
    ################### P_0 , P_T 계산 ###############################
    P_0 = temp['KOSPI(월초)'].iloc[0]
    P_T = temp['KOSPI(월말)'].iloc[-1]    
    
    P_T_0 = P_T / P_0
    
    ###################### LS , BH 수익률 계산 ###################
    LS_HPR = P_T / P_0 -1  # HPR  0.xx 
    LS_annual = np.power(LS_HPR +1 , 12/n_period) -1 # annual 0.xxx
    
    BH_HPR = (total_investment/2 * (P_T/P_0) + 
              total_investment/2 * np.power( 1+ rf_annual, 12/ n_period)) / total_investment -1
    
    
    BH_annual = np.power(BH_HPR +1, 12/n_period) -1
    
    store_dict['LS 수익률(%)'].append(round(LS_annual * 100,3))
    store_dict['BH 수익률(%)'].append(round(BH_annual * 100,3))
    
    
    
    
    ##################### DCA1 FV 계산 ########################
    FV_sum = 0
    
    for i in range(n_period):
        FV_sum += investment * np.prod(temp['SILH_SUIK_RT'].iloc[i:])
        
    # FV_sum : 적립식 투자안의 미래가치의 총합
    FV_sum = np.round(FV_sum,2)
    
    #################### DCA1 수익률 계산 ########################
    DCA1_HPR = FV_sum / total_investment -1 # HPR 0.xxx
    DCA1_annual = np.power(DCA1_HPR +1, 12 / n_period) -1  # annual 0.xxx
    DCA1_monthly = np.power(DCA1_annual +1, 1/n_period) -1
    
    store_dict['DCA1 수익률(%)'].append(round(DCA1_annual * 100 , 3))

    
    
    
    ################### DCA1* 수익률 계산 ##########################
    irr_path = np.ones(n_period+1) * -investment
    irr_path[-1] += (FV_sum + investment)
    
    # DCA1_star : 월별 수익률
    # IRR_annual : 연환산 수익률
    DCA1_star_monthly = np.irr(irr_path)    # monthly 0. xxxx
    DCA1_star_HPR = np.power(DCA1_star_monthly +1 ,n_period) -1
    DCA1_star_annual = np.power(DCA1_star_monthly +1, 12) -1 # annual 0. xxx
    
    store_dict['DCA1* 수익률(%)'].append(np.round(DCA1_star_annual * 100,3))

    
    H_DCA1 = P_T / (1 + DCA1_HPR)
    H_DCA1_star = round(P_T / (1+ DCA1_star_HPR),5)
    
    store_dict['P_0'].append(1)
    store_dict['P_T'].append(P_T/ P_0)
    
    store_dict['H_DCA1'].append(H_DCA1/P_0)
    store_dict['H_DCA1*'].append(H_DCA1_star/P_0)
        
    
    
# df 결과 값 저장 : DataFrame    
df_DCA = DataFrame(store_dict)

df_DCA.T.to_csv('모든 시나리오 %s개월.csv'%n_period)

0
50
100
150
200
250
300
350
400
450


In [31]:
df_DCA.head()

Unnamed: 0,기준 월,KOSPI_0,KOSPI_T,전체 시나리오,메인 시나리오,세부 시나리오,LS 수익률(%),BH 수익률(%),DCA1 수익률(%),DCA1* 수익률(%),P_0,P_T,H_DCA1,H_DCA1*
0,198001,100.0,131.3,상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,14.586,7.886,6.648,12.888,1,1.313,1.154413,1.030309
1,198002,104.48,125.65,하상상상상상상상상상상상하상상상상상상상상상상상,강,강강,9.664,5.297,3.728,7.198,1,1.202623,1.117729,1.046543
2,198003,103.96,128.78,상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,11.299,6.152,4.632,8.955,1,1.238746,1.131496,1.043496
3,198004,105.43,127.83,상상상상상상상상상상하상상상상상상상상상상상상상,강,강강,10.112,5.531,3.765,7.27,1,1.212463,1.126065,1.053682
4,198005,116.54,121.4,하하하하하하하하하하하상상상상상상상상상상상상상,강,약강,2.064,1.405,0.696,1.337,1,1.041702,1.027359,1.014394


In [32]:
df_DCA.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,430,431,432,433,434,435,436,437,438,439
기준 월,198001,198002,198003,198004,198005,198006,198007,198008,198009,198010,...,201511,201512,201601,201602,201603,201604,201605,201606,201607,201608
KOSPI_0,100,104.48,103.96,105.43,116.54,116.77,112.05,112.28,112.55,109.85,...,2035.24,2023.93,1918.76,1924.82,1947.42,1973.57,1978.15,1982.72,1987.32,2029.61
KOSPI_T,131.3,125.65,128.78,127.83,121.4,114.57,123.64,125.08,121.94,119.71,...,2523.43,2476.37,2467.49,2566.46,2427.36,2445.85,2515.38,2423.01,2326.13,2295.26
전체 시나리오,상상상상상상상상상상상상상상상상상상상상상상상상,하상상상상상상상상상상상하상상상상상상상상상상상,상상상상상상상상상상상상상상상상상상상상상상상상,상상상상상상상상상상하상상상상상상상상상상상상상,하하하하하하하하하하하상상상상상상상상상상상상상,하하하하하하하하하하상상상상상상상상상상상상상하,상상하하하하하하하상상상상상상상상상상상상상상상,상하하하하하하하상상상상상상상상상상상상상상상상,하하하하하하하상상상상상상상상상상상상상상상상상,하하하하하하상상상상상상상상상상상상상상상상상상,...,하하하하하하하하하하상하하하상상상상상상상상상상,하하하하하하하하상상하하상상상상상상상상상상상상,하하상상상상상상상상상상상상상상상상상상상상상상,하상상상상상상상상상상상상상상상상상상상상상상상,상상상상상상상상상상상상상상상상상상상상상상상상,상상하상상상상상상상상상상상상상상상상상상상상상,상하상상상상상상상상상상상상상상상상상상상상상상,하상상상상상상상상상상상상상상상상상상상상상상상,상상상상하상상상상상상상상상상상상상상상상상상상,상상하하하상상상상상상상상상상상상상상상상상상상
메인 시나리오,강,강,강,강,강,강,강,강,강,강,...,약,강,강,강,강,강,강,강,강,강
세부 시나리오,강강,강강,강강,강강,약강,약강,약강,약강,약강,보강,...,약강,약강,강강,강강,강강,강강,강강,강강,강강,강강
LS 수익률(%),14.586,9.664,11.299,10.112,2.064,-0.947,5.045,5.546,4.088,4.392,...,11.349,10.614,13.401,15.471,11.644,11.324,12.764,10.547,8.189,6.343
BH 수익률(%),7.886,5.297,6.152,5.531,1.405,-0.099,2.916,3.172,2.428,2.583,...,6.178,5.793,7.258,8.357,6.333,6.165,6.922,5.758,4.532,3.58
DCA1 수익률(%),6.648,3.728,4.632,3.765,0.696,-2.27,1.56,1.948,0.438,-0.654,...,10.033,8.545,7.883,9.516,5.872,5.747,6.788,4.307,1.766,0.721
DCA1* 수익률(%),12.888,7.198,8.955,7.27,1.337,-4.343,3.003,3.751,0.842,-1.254,...,19.542,16.61,15.31,18.522,11.372,11.128,13.163,8.323,3.4,1.385


# 세부 시나리오

In [33]:
def summarize_by_senario(df, Total = True, **kwargs):
    main_senario_list = ['강', '보', '약']

    column_list = ['메인 시나리오', '세부 시나리오', 'N', 
                   'P_0', 'P_T','H_DCA1', 'H_DCA1*',
                   
                   'DCA1 > LS', 'DCA1 > BH',
                   'DCA1* > LS', 'DCA1* > BH',

                   'H_DCA1 < P_T', 'H_DCA1* < P_T',

                   'LS 수익률(%)','BH 수익률(%)', 'DCA1 수익률(%)' , 'DCA1* 수익률(%)',
                   'LS 표준편차(%)', 'BH 표준편차(%)', 'DCA1 표준편차(%)', 'DCA1* 표준편차(%)' ,]

    store_dict = OrderedDict(\
                             dict((key , []) for key in column_list))

    # 결과 저장 DataFrame

    # 시나리오 기록
    
    if Total == True :
        store_dict['메인 시나리오'].append('전체')
        store_dict['세부 시나리오'].append('전체')
    
    elif Total == False:
        store_dict['메인 시나리오'].append(kwargs['main'])
        store_dict['세부 시나리오'].append(kwargs['sebu'])
    

    # N
    store_dict['N'].append(len(df))



    ################### # P_0 ,P_T , H_DCA1 , H_DCA1 ########################
    for column in ['P_0', 'P_T' , 'H_DCA1', 'H_DCA1*']:
        store_dict[column].append(df[column].mean())



    ################# [DCA 1*. DCA 2*] > [LS수익률(%), BH수익률(%)]######################
    for column in ['DCA1','DCA1*']:
        suik_column = column + ' 수익률(%)'

        DCA_win_LS_rate = np.sum(df['%s'%suik_column] > df['LS 수익률(%)']) / len(df)
        DCA_win_BH_rate = np.sum(df['%s'%suik_column] > df['BH 수익률(%)']) / len(df)

        store_dict['%s > LS'%column].append(DCA_win_LS_rate)
        store_dict['%s > BH'%column].append(DCA_win_BH_rate)




    ############### DCA1 , DCA1*, DCA2, DCA2*   > 0 (수익이 난 비율) #######################
    store_dict['H_DCA1 < P_T'].append(np.sum(df['H_DCA1'] < df['P_T']) / len(df))
    store_dict['H_DCA1* < P_T'].append( np.sum(df['H_DCA1*'] < df['P_T']) / len(df))



    #################### # LS, DCA , DCA1 수익률 기록 #######################################

    for column in ['LS', 'BH', 'DCA1', 'DCA1*']:
        suik_column = column + ' 수익률(%)'
        store_dict[suik_column].append(df[suik_column].mean())

        std_column = column + ' 표준편차(%)'
        store_dict[std_column].append(df[suik_column].std())
                                                                                
    
    return_df = DataFrame(store_dict)
    return return_df


In [34]:
summarize_by_senario(df_DCA, Total=True)

Unnamed: 0,메인 시나리오,세부 시나리오,N,P_0,P_T,H_DCA1,H_DCA1*,DCA1 > LS,DCA1 > BH,DCA1* > LS,...,H_DCA1 < P_T,H_DCA1* < P_T,LS 수익률(%),BH 수익률(%),DCA1 수익률(%),DCA1* 수익률(%),LS 표준편차(%),BH 표준편차(%),DCA1 표준편차(%),DCA1* 표준편차(%)
0,전체,전체,440,1.0,1.277988,1.089394,1.016296,0.356818,0.411364,0.486364,...,0.686364,0.686364,10.585127,6.31457,5.926198,11.945641,23.496045,12.726436,13.571495,26.733381


In [35]:
from collections import OrderedDict

# 전체 요약
summary_df = summarize_by_senario(df_DCA)

    
# 세부 시나리오별 요약
for main_senario in ['강','보','약']:
    temp = df_DCA[df_DCA['메인 시나리오'] == main_senario]
    
    # 강 보 합, 메인 시나리오 결과 총합
    main_summary_df = summarize_by_senario(temp, Total = False, main = main_senario, sebu = '전체')
    summary_df = pd.concat([summary_df, main_summary_df], axis=0, copy = False)
    sebu_senario_list = sorted(temp['세부 시나리오'].unique())

    for sebu_senario in sebu_senario_list:
        temp2 = temp[temp['세부 시나리오'] == sebu_senario]
        
        sebu_summary_df = summarize_by_senario(temp2, Total=False, main = main_senario, sebu = sebu_senario)
        summary_df = pd.concat([summary_df , sebu_summary_df], axis=0, copy = False)


summary_df.set_index('메인 시나리오', inplace= True)

#df_1.iloc[:,1:] = df_1.iloc[:,1:].apply(lambda x: np.round(x,3))

In [36]:
summary_df.T

메인 시나리오,전체,강,강.1,강.2,강.3,강.4,강.5,보,보.1,보.2,보.3,약,약.1,약.2,약.3,약.4,약.5
세부 시나리오,전체,전체,강강,강보,강약,보강,약강,전체,강약,보보,약강,전체,강약,보약,약강,약보,약약
N,440,282,221,4,8,17,32,8,2,1,5,150,13,9,7,4,117
P_0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
P_T,1.27799,1.50464,1.57122,1.02571,0.91585,1.37127,1.32276,1.09797,1.02104,1.00194,1.14794,0.861486,0.875752,0.902744,1.1315,1.11242,0.831993
H_DCA1,1.08939,1.2182,1.26045,1.05879,1.03205,1.09047,1.06071,0.991211,0.995069,1.00334,0.987243,0.852476,0.926007,0.945379,0.947007,0.877788,0.830638
H_DCA1*,1.0163,1.08255,1.11528,1.10937,1.18314,0.932985,0.907451,0.922943,0.974536,1.00462,0.88597,0.896715,0.992962,1.00153,0.818796,0.7383,0.888036
DCA1 > LS,0.356818,0.0283688,0,0,0.25,0.0588235,0.15625,0.5,0.5,0,0.6,0.966667,0.846154,0.888889,0.857143,1,0.991453
DCA1 > BH,0.411364,0.269504,0.19457,0.25,0.125,0.647059,0.625,0.75,0.5,0,1,0.66,0.307692,0.333333,1,1,0.692308
DCA1* > LS,0.486364,0.343972,0.271493,0.25,0.25,0.764706,0.65625,0.75,0.5,0,1,0.74,0.384615,0.444444,1,1,0.777778
DCA1* > BH,0.620455,0.648936,0.61991,0.25,0.25,0.882353,0.875,0.75,0.5,0,1,0.56,0.153846,0.333333,1,1,0.581197


In [37]:
summary_df.T.to_csv('시나리오별 통계_%s개월.csv'%n_period)