In [2]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from matplotlib import font_manager, rc


In [3]:
raw = pd.read_csv('Data/kospi.csv', encoding = 'cp949')

# 전처리

In [4]:
raw.head()

Unnamed: 0,일자,현재지수,대비,등락률(%),배당수익률(%),주가이익비율,주가자산비율,시가지수,고가지수,저가지수,거래량(천주),거래대금(백만원),상장시가총액(백만원)
0,1980/01/04,100.0,,-,0.0,0.0,0.0,,,,96,603,
1,1980/01/05,100.15,,-,0.0,0.0,0.0,,,,131,776,
2,1980/01/07,102.53,,-,0.0,0.0,0.0,,,,358,2030,
3,1980/01/08,105.28,,-,0.0,0.0,0.0,,,,796,5567,
4,1980/01/09,106.37,,-,0.0,0.0,0.0,,,,552,3706,


In [5]:
raw.dtypes

일자              object
현재지수            object
대비             float64
등락률(%)          object
배당수익률(%)       float64
주가이익비율         float64
주가자산비율         float64
시가지수            object
고가지수            object
저가지수            object
거래량(천주)         object
거래대금(백만원)       object
상장시가총액(백만원)     object
dtype: object

In [6]:
raw['일자'] = raw['일자'].apply(lambda x: x.replace('/','')).astype('int')

raw['현재지수'] = raw['현재지수'].apply(lambda x: x.replace(',','')).astype('float')

raw['등락률(%)'] = raw['현재지수'].pct_change().fillna(0) +1

raw = raw[['일자','현재지수','등락률(%)']]

In [7]:
raw.head()

Unnamed: 0,일자,현재지수,등락률(%)
0,19800104,100.0,1.0
1,19800105,100.15,1.0015
2,19800107,102.53,1.023764
3,19800108,105.28,1.026821
4,19800109,106.37,1.010353


In [8]:
raw['GIJUN_MONTH'] = raw['일자'].apply(lambda x: x//100)

In [9]:
raw.tail()

Unnamed: 0,일자,현재지수,등락률(%),GIJUN_MONTH
10411,20180813,2248.45,0.984957,201808
10412,20180814,2258.91,1.004652,201808
10413,20180816,2240.8,0.991983,201808
10414,20180817,2247.05,1.002789,201808
10415,20180820,2247.88,1.000369,201808


# Historical Simulation을 위한 기초 data 생성

In [10]:
column_list = ['GIJUN_MONTH','SILH_SUIK_RT','KOSPI(월초)','KOSPI(월말)']
data = DataFrame(columns = column_list)

# 월초
jisu1_list = []

#월말
jisu2_list = []

month_list = []
suik_list = []

for month in raw['GIJUN_MONTH'].unique():
    temp = raw[raw['GIJUN_MONTH'] == month]
    
    # 2018년 8월 자료는 제외
    if month == 201808: continue
    
    # 월 기록
    month_list.append(month)
    
    # 월초 주가 기록
    jisu1_list.append(temp['현재지수'].iloc[0])
    
    # 월말 주가 기록
    jisu2_list.append(temp['현재지수'].iloc[-1])
    
    # 월 수익률 기록
    suik_list.append(np.prod(temp['등락률(%)']))
    
    
    
data['GIJUN_MONTH'] = month_list
data['SILH_SUIK_RT'] = suik_list
data['KOSPI(월초)'] = jisu1_list
data['KOSPI(월말)'] = jisu2_list

In [11]:
data

Unnamed: 0,GIJUN_MONTH,SILH_SUIK_RT,KOSPI(월초),KOSPI(월말)
0,198001,1.058900,100.00,105.89
1,198002,0.979696,104.48,103.74
2,198003,1.012146,103.96,105.00
3,198004,1.105619,105.43,116.09
4,198005,1.002929,116.54,116.43
5,198006,0.967276,116.77,112.62
6,198007,1.000799,112.05,112.71
7,198008,0.999468,112.28,112.65
8,198009,0.979583,112.55,110.35
9,198010,0.957952,109.85,105.71


In [12]:
data.to_csv('RAW_DATA.csv')

# 시뮬레이션 시작

In [37]:
from collections import OrderedDict
from sympy import Symbol, solve

n_period = 36

investment = 10000
total_investment = n_period * investment

rf_annual = 0.03
rf_monthly = np.power(1+rf_annual, 1/12) -1



store_dict = OrderedDict(\
                         {'기준 월' : [],
                          'KOSPI_0' : [],
                          'KOSPI_T' : [],
                          
                          '전체 시나리오' : [],
                          '메인 시나리오' : [],
                          '세부 시나리오' : [],
                          
                          'LS 수익률(%)' : [],
                          'BH 수익률(%)' : [],
                          'DCA1 수익률(%)' : [],
                          'DCA1* 수익률(%)' : [],
                          
                          'P_0' : [],
                          'P_T' : [],
                          'H_DCA1' : [],
                          'H_DCA1*' : [],
                          
                          })

for i in range(len(data)):
    
    
    temp = data.iloc[i:i+n_period]
    
    # full year가 아니면 제외
    if not len(temp) == n_period:
        continue
    
    #기준 월 기록
    gijun_month = temp['GIJUN_MONTH'].iloc[0]
    store_dict['기준 월'].append(gijun_month)
    
    # 최초시점 KOSPI 기록
    kospi_0 = temp['KOSPI(월초)'].iloc[0]
    store_dict['KOSPI_0'].append(kospi_0)
    
    # 중간시점 KOSPI 기록
    kospi_05 = temp['KOSPI(월초)'].iloc[n_period//2]
    
    
    # 마지막 시점 KOSPI 기록
    kospi_t = temp['KOSPI(월말)'].iloc[-1]
    store_dict['KOSPI_T'].append(kospi_t)
    
    # state : 각각의 시나리오 (ex: 상상)를 기록할 변수
    total_senario = ''
    main_senario = ''
    sebu_senario = '' 

    ################ total_senario 계산 ##################
    for index in temp.index:
        row = temp.loc[index, :]
        
        # kospi_new : 해당 기의 마지막 날 주가
        kospi_new = row['KOSPI(월말)']
           
        # t기간 수익률
        suik_t = kospi_new / kospi_0 
        
        ##### 상, 하 state 기록
        if suik_t >= 1:
            total_senario += '상'
        else:
            total_senario += '하'
    
    # total senario = xxxxxxxxxxxx (x : n_period)
    store_dict['전체 시나리오'].append(total_senario)
    
    ######################## 메인 시나리오 계산 ###########################
    
    up = 0
    down = 0
    
    for letter in total_senario:
        if letter == '상':
            up += 1
        elif letter == '하':
            down += 1
    
    # 메인 시나리오 계산
    # up이 절반 이상이면
    if up > n_period/2 :
        main_senario += '강'
    
    elif down > n_period/2 :
        main_senario += '약'
    
    elif up == n_period/2 :
        if kospi_t >= kospi_0:
            main_senario += '강'
            
        elif kospi_t < kospi_0:
            main_senario += '약'
            
    
    store_dict['메인 시나리오'].append(main_senario)
    
    
    
    ############################ 세부 시나리오 계산 ############################
    # 전기 (최초 n_period/2 개월) 
    up = 0
    down = 0
    
    for letter in total_senario[:n_period//2]:
        if letter == '상':
            up += 1
        elif letter == '하':
            down += 1          
    
    if up > n_period/4 :
        sebu_senario += '강'
        
    if down > n_period/4 :
        sebu_senario += '약'
        
    if up == n_period/4 :
        if kospi_05 >= kospi_0:
            sebu_senario += '강'
        
        elif kospi_05 < kospi_0:
            sebu_senario += '약'
        
    # 후기
    up = 0
    down = 0
    
    for letter in total_senario[n_period//2:]:
        if letter == '상':
            up += 1
        elif letter == '하':
            down += 1          
    
    if up > n_period/4 :
        sebu_senario += '강'
        
    if down > n_period/4 :
        sebu_senario += '약'
        
    if up == n_period/4 :
        if kospi_t >= kospi_05:
            sebu_senario += '강'
        elif kospi_t < kospi_05:
            sebu_senario += '약'
        
        
    store_dict['세부 시나리오'].append(sebu_senario)
    
    
    
    ################### P_0 , P_T 계산 ###############################
    P_0 = temp['KOSPI(월초)'].iloc[0]
    P_T = temp['KOSPI(월말)'].iloc[-1]    
    
    P_T_0 = P_T / P_0
    
    ###################### LS , BH 수익률 계산 ###################
    LS_HPR = P_T / P_0 -1  # HPR  0.xx 
    LS_annual = np.power(LS_HPR +1 , 12/n_period) -1 # annual 0.xxx
    
    BH_HPR = (total_investment/2 * (P_T/P_0) + 
              total_investment/2 * np.power( 1+ rf_annual, 12/ n_period)) / total_investment -1
    
    
    BH_annual = np.power(BH_HPR +1, 12/n_period) -1
    
    store_dict['LS 수익률(%)'].append(round(LS_annual * 100,3))
    store_dict['BH 수익률(%)'].append(round(BH_annual * 100,3))
    
    
    
    
    ##################### DCA1 FV 계산 ########################
    FV_sum = 0
    
    for i in range(n_period):
        FV_sum += investment * np.prod(temp['SILH_SUIK_RT'].iloc[i:])
        
    # FV_sum : 적립식 투자안의 미래가치의 총합
    FV_sum = np.round(FV_sum,2)
    
    #################### DCA1 수익률 계산 ########################
    DCA1_HPR = FV_sum / total_investment -1 # HPR 0.xxx
    DCA1_annual = np.power(DCA1_HPR +1, 12 / n_period) -1  # annual 0.xxx
    DCA1_monthly = np.power(DCA1_annual +1, 1/n_period) -1
    
    store_dict['DCA1 수익률(%)'].append(round(DCA1_annual * 100 , 3))

    
    
    
    ################### DCA1* 수익률 계산 ##########################
    irr_path = np.ones(n_period+1) * -investment
    irr_path[-1] += (FV_sum + investment)
    
    # DCA1_star : 월별 수익률
    # IRR_annual : 연환산 수익률
    DCA1_star_monthly = np.irr(irr_path)    # monthly 0. xxxx
    DCA1_star_HPR = np.power(DCA1_star_monthly +1 ,n_period) -1
    DCA1_star_annual = np.power(DCA1_star_monthly +1, 12) -1 # annual 0. xxx
    
    store_dict['DCA1* 수익률(%)'].append(round(DCA1_star_annual * 100, 3))
    
    
    # H_DCA1 = P_T / (1 + DCA1_HPR)
    # H_DCA1* = P_T / (1 + DCA1*_HPR)
    
    H_DCA1 = P_T / (1 + DCA1_HPR)
    H_DCA1_star = round(P_T / (1+ DCA1_star_HPR),5)
    
    store_dict['P_0'].append(1)
    store_dict['P_T'].append(P_T/ P_0)
    
    store_dict['H_DCA1'].append(H_DCA1/P_0)
    store_dict['H_DCA1*'].append(H_DCA1_star/P_0)
    
    
    
    
# df 결과 값 저장 : DataFrame    
df_DCA = DataFrame(store_dict)

df_DCA.T.to_csv('모든 시나리오 %s개월.csv'%n_period)

In [38]:
df_DCA.head()

Unnamed: 0,기준 월,KOSPI_0,KOSPI_T,전체 시나리오,메인 시나리오,세부 시나리오,LS 수익률(%),BH 수익률(%),DCA1 수익률(%),DCA1* 수익률(%),P_0,P_T,H_DCA1,H_DCA1*
0,198001,100.0,128.99,상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,8.856,4.766,2.998,5.834,1,1.2899,1.18051,1.088132
1,198002,104.48,118.27,하상상상상상상상상상상상하상상상상상상상상상상상상상상상상상상상상상상상,강,강강,4.219,2.311,-0.185,-0.359,1,1.131987,1.13828,1.144276
2,198003,103.96,123.5,상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,5.909,3.195,1.155,2.248,1,1.187957,1.147722,1.111316
3,198004,105.43,119.8,상상상상상상상상상상하상상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,4.351,2.38,-0.036,-0.07,1,1.136299,1.137534,1.138704
4,198005,116.54,131.44,하하하하하하하하하하하상상상상상상상상상상상상상하상상상상상상상상상상상,강,약강,4.092,2.245,2.967,5.773,1,1.127853,1.03314,0.953065


# 세부 시나리오

In [39]:
def summarize_by_senario(df, Total = True, **kwargs):
    main_senario_list = ['강', '보', '약']

    column_list = ['메인 시나리오', '세부 시나리오', 'N', 
                   'P_0', 'P_T','H_DCA1', 'H_DCA1*',
                   
                   'DCA1 > LS', 'DCA1 > BH', 
                   'DCA1* > LS', 'DCA1* > BH',

                   'H_DCA1 < P_T', 'H_DCA1* < P_T',

                   'LS 수익률(%)','BH 수익률(%)', 'DCA1 수익률(%)' , 'DCA1* 수익률(%)',
                   'LS 표준편차(%)', 'BH 표준편차(%)', 'DCA1 표준편차(%)', 'DCA1* 표준편차(%)' ,]
    
    for column in ['LS','BH','DCA1','DCA1*']:
        
        column_list.append(column + ' 왜도')
        column_list.append(column + ' 첨도')
        
        for name in [' min',' 1Q',' median', ' 3Q',' max']:
            column_list.append(column+name)
            
    
    
    
    
    store_dict = OrderedDict(\
                             dict((key , []) for key in column_list))

    # 결과 저장 DataFrame

    # 시나리오 기록
    
    if Total == True :
        store_dict['메인 시나리오'].append('전체')
        store_dict['세부 시나리오'].append('전체')
    
    elif Total == False:
        store_dict['메인 시나리오'].append(kwargs['main'])
        store_dict['세부 시나리오'].append(kwargs['sebu'])
    

    # N
    store_dict['N'].append(len(df))



    ################### # P_0 ,P_T , H_DCA1 , H_DCA1 ########################
    for column in ['P_0', 'P_T' , 'H_DCA1', 'H_DCA1*']:
        store_dict[column].append(df[column].mean())


    
    ################# [DCA 1*. DCA 2*] > [LS수익률(%), BH수익률(%)]######################
    for column in ['DCA1','DCA1*']:
        suik_column = column + ' 수익률(%)'

        DCA_win_LS_rate = np.sum(df['%s'%suik_column] > df['LS 수익률(%)']) / len(df)
        DCA_win_BH_rate = np.sum(df['%s'%suik_column] > df['BH 수익률(%)']) / len(df)

        store_dict['%s > LS'%column].append(DCA_win_LS_rate)
        store_dict['%s > BH'%column].append(DCA_win_BH_rate)




    ############### DCA1 , DCA1*, DCA2, DCA2*   > 0 (수익이 난 비율) #######################
    store_dict['H_DCA1 < P_T'].append(np.sum(df['H_DCA1'] < df['P_T']) / len(df))
    store_dict['H_DCA1* < P_T'].append( np.sum(df['H_DCA1*'] < df['P_T']) / len(df))


    #################### # LS, DCA , DCA1 수익률 기록 #######################################

    for column in ['LS', 'BH', 'DCA1', 'DCA1*']:
        
        #### 수익률 ####
        suik_column = column + ' 수익률(%)'
        store_dict[suik_column].append(df[suik_column].mean())
        
        #### 표준편차 ####
        std_column = column + ' 표준편차(%)'
        store_dict[std_column].append(df[suik_column].std())
        
        
        #### 분위수 계산 ####
        for cnt, name in enumerate([' min',' 1Q',' median',' 3Q',' max']):
            quartile = 25 * cnt # 0, 25, 50, 75, 100
            
            store_dict[column + name].append(np.percentile(a= df[suik_column], q=quartile))
        
        #### 왜도 첨도 계산 ####
        store_dict[column + ' 왜도'].append(df[suik_column].skew(axis = 0))
        store_dict[column + ' 첨도'].append(df[suik_column].kurtosis(axis = 0))
    
    
    return_df = DataFrame(store_dict)
    return return_df


In [40]:
summarize_by_senario(df_DCA, Total=True)

Unnamed: 0,메인 시나리오,세부 시나리오,N,P_0,P_T,H_DCA1,H_DCA1*,DCA1 > LS,DCA1 > BH,DCA1* > LS,...,DCA1 median,DCA1 3Q,DCA1 max,DCA1* 왜도,DCA1* 첨도,DCA1* min,DCA1* 1Q,DCA1* median,DCA1* 3Q,DCA1* max
0,전체,전체,428,1.0,1.463603,1.138155,1.0217,0.315421,0.46028,0.514019,...,2.962,10.3725,43.452,1.008413,1.757521,-46.234,-0.25625,5.7635,20.184,84.635


In [41]:
df_DCA.head()

Unnamed: 0,기준 월,KOSPI_0,KOSPI_T,전체 시나리오,메인 시나리오,세부 시나리오,LS 수익률(%),BH 수익률(%),DCA1 수익률(%),DCA1* 수익률(%),P_0,P_T,H_DCA1,H_DCA1*
0,198001,100.0,128.99,상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,8.856,4.766,2.998,5.834,1,1.2899,1.18051,1.088132
1,198002,104.48,118.27,하상상상상상상상상상상상하상상상상상상상상상상상상상상상상상상상상상상상,강,강강,4.219,2.311,-0.185,-0.359,1,1.131987,1.13828,1.144276
2,198003,103.96,123.5,상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,5.909,3.195,1.155,2.248,1,1.187957,1.147722,1.111316
3,198004,105.43,119.8,상상상상상상상상상상하상상상상상상상상상상상상상상상상상상상상상상상상상,강,강강,4.351,2.38,-0.036,-0.07,1,1.136299,1.137534,1.138704
4,198005,116.54,131.44,하하하하하하하하하하하상상상상상상상상상상상상상하상상상상상상상상상상상,강,약강,4.092,2.245,2.967,5.773,1,1.127853,1.03314,0.953065


In [42]:
from collections import OrderedDict

# 전체 요약
summary_df = summarize_by_senario(df_DCA)

    
# 세부 시나리오별 요약
for main_senario in ['강','약']:
    temp = df_DCA[df_DCA['메인 시나리오'] == main_senario]
    
    # 강 보 합, 메인 시나리오 결과 총합
    main_summary_df = summarize_by_senario(temp, Total = False, main = main_senario, sebu = '전체')
    summary_df = pd.concat([summary_df, main_summary_df], axis=0, copy = False)
    sebu_senario_list = sorted(temp['세부 시나리오'].unique())

    for sebu_senario in sebu_senario_list:
        temp2 = temp[temp['세부 시나리오'] == sebu_senario]
        
        sebu_summary_df = summarize_by_senario(temp2, Total=False, main = main_senario, sebu = sebu_senario)
        summary_df = pd.concat([summary_df , sebu_summary_df], axis=0, copy = False)


summary_df.set_index('메인 시나리오', inplace= True)

#df_1.iloc[:,1:] = df_1.iloc[:,1:].apply(lambda x: np.round(x,3))

In [43]:
summary_df.T

메인 시나리오,전체,강,강.1,강.2,강.3,약,약.1,약.2,약.3
세부 시나리오,전체,전체,강강,강약,약강,전체,강약,약강,약약
N,428,290,246,10,34,138,8,27,103
P_0,1,1,1,1,1,1,1,1,1
P_T,1.4636,1.73565,1.82836,1.02258,1.27462,0.891901,0.874374,1.16903,0.820617
H_DCA1,1.13815,1.28283,1.32512,1.07585,1.0377,0.834132,0.933559,0.896378,0.810092
H_DCA1*,1.0217,1.09646,1.12263,1.17978,0.882612,0.864588,1.02631,0.726674,0.888179
DCA1 > LS,0.315421,0.0172414,0.00406504,0,0.117647,0.942029,0.875,0.777778,0.990291
DCA1 > BH,0.46028,0.324138,0.260163,0.1,0.852941,0.746377,0.375,0.962963,0.718447
DCA1* > LS,0.514019,0.37931,0.321138,0.1,0.882353,0.797101,0.375,0.962963,0.786408
DCA1* > BH,0.654206,0.637931,0.609756,0.4,0.911765,0.688406,0.25,0.962963,0.650485


In [44]:
summary_df.T.to_csv('시나리오별 통계_%s개월.csv'%n_period)