In [28]:
import pandas as pd
import numpy as np
import warnings
import pyodbc
import random
import os
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from collections import Counter

warnings.filterwarnings(action = 'ignore')

#### 유전 알고리즘 활용한 페어 트레이딩 v5

* v2
 - parameter 생성 시 공적분 검정 테스트를 추가하자 (train set 한도내에서)
 - child 생성 시 볼린저밴드 파라미터와 비중 2개로 나누어 crossover
 - mutation 시 볼린저밴드 파라미터 조정과 비중 shuffle

* v3
 - train set/test set 기간을 여러개로 나눠서 각 시뮬레이션 내 종목 비중과 결과 비교
 - 변이 이후 수선 기능을 추가. 각 파라미터의 정상성 테스트 후 p-value 0.05 이하인 것만 고르고 모자라다면 새로 생성
 
* v4
 - v2 + v3, 단 train set 기간 0.7에 대해 먼저 시뮬레이션 해보기
 - Clustering 기법을 도입해서 각 클러스터링 내 가장 score가 좋은 부모 쌍을 고르고 교배. 부모 + 자식 중 best fit을 고르고 변이를 적용해 클러스터링 내 개체 갯수만큼 숫자 채우기
 - Clustering은 KMeans 활용
 
* v5
 - parameter 생성 시 공적분 검정 테스트를 추가하자 (train set 한도내에서)
 - child 생성 시 볼린저밴드 파라미터와 비중 2개로 나누어 crossover
 - mutation 시 볼린저밴드 파라미터 조정과 비중 shuffle
 - 변이 이후 수선 기능을 추가. 각 파라미터의 정상성 테스트 후 p-value 0.05 이하인 것만 고르고 모자라다면 새로 생성
 - train set 기간 0.7에 대해 시뮬레이션 해보기
 - Clustering 활용. 각 개체 내 -비중을 기준으로 clustering. 각 cluster 내에서 child 생성 (이 때 - 숫자는 변하지 않는 식으로 child 생성). 이후 전체 개체 중 best, lucky, 초기생성으로 새로운 세대 추출
 - child 생성 시 elite technique 활용 (부,모,자식 중 가장 스코어 높은 유전체 선택)

In [2]:
conn_pcor = pyodbc.connect('driver={Oracle in OraClient18Home1};dbq=PCOR;uid=EF0SEL;pwd=EF0SEL#076')
conn_quant = pyodbc.connect('driver={SQL Server};server=46.2.90.172;database=quant;uid=index;pwd=samsung@00')
conn_wisefn = pyodbc.connect('driver={SQL Server};server=46.2.90.172;database=wisefn;uid=index;pwd=samsung@00')

In [3]:
os.chdir("..")
os.chdir("..")

os.chdir('00_data')

#df_const = pd.read_json('230927_index_constituent.json')
df_prc_raw = pd.read_json('230927_stk_prc_daily.json')
#df_turnover = pd.read_json('230927_stk_turnover.json')
#df_sector = pd.read_json('230927_stk_sector.json')
#df_mktcap = pd.read_json('230927_stk_mktcap.json')
#df_turnover_daily = pd.read_json('231011_stk_turnover_daily.json')

os.chdir("..")
os.chdir('02_Trading Strategies')
os.chdir('231026_유전 알고리즘을 활용한 페어트레이딩')

In [69]:
sector_code = 'IKS013'

In [70]:
sql_dt = f'''
        SELECT TRD_DT, CLOSE_PRC
        FROM TS_IDX_DAILY
        WHERE 1=1
        AND SEC_CD = '{sector_code}'
        AND TRD_DT > '20091231'
        ORDER BY TRD_DT ASC
'''

df_dt = pd.read_sql(sql_dt, conn_wisefn)

In [71]:
df_dt['TRD_DT'] =  pd.to_datetime(df_dt['TRD_DT'])
df_dt['M'] = df_dt['TRD_DT'].dt.to_period('M')
dt_m = [max(df_dt[df_dt['M']==p]['TRD_DT']) for p in df_dt['M'].unique()]
dt_m = [int(x.strftime('%Y%m%d')) for x in dt_m]

In [72]:
def objective_func(df, params):
    
    #stk_list = sorted(list(set(df['COMP_CD'])))
    
    params_period = params[0]
    params_init = params[1]
    params_exit = params[2]
    params_wt = params[3:]

    df_sp = df * params_wt
    df_sp = df_sp.sum(axis=1)
    
    df_sp_norm = (df_sp - df_sp.rolling(params_period).mean())/df_sp.rolling(params_period).std()
    df_sp_norm.dropna(axis=0, inplace=True)
    
    init_dt = []
    exit_dt = []
    init_yn = 0
    for i in range(len(df_sp_norm)):
        if init_yn == 0:
            if df_sp_norm.iloc[i] < params_init:
                init_dt.append(df_sp_norm.index[i])
                init_yn = 1
        
        elif init_yn == 1:
            if df_sp_norm.iloc[i] > params_exit:
                exit_dt.append(df_sp_norm.index[i])
                init_yn = 0
        
    if len(init_dt) > len(exit_dt):
        if len(init_dt) - len(exit_dt) > 1:
            print("error")
        else:
            init_dt = init_dt[:-1]
    
    ret_list = []
    cum_ret = 1
    for i in range(len(init_dt)):
        dt_1 = init_dt[i]
        dt_2 = exit_dt[i]
        
        prc_1 = df.loc[dt_1]
        prc_2 = df.loc[dt_2]
        
        ret = prc_2/prc_1 - 1
        ret = (ret * params_wt).sum()
        ret_list.append(ret)
        cum_ret *= (1+ret)
    
    return cum_ret

In [73]:
def generate_weight(length):
    
    pos_len = random.randint(1,length - 1)
    neg_len = length - pos_len
    
    pos_wt = []
    for i in range(pos_len):
        p = random.uniform(0,1)
        pos_wt.append(p)
    
    pos_wt = [x/sum(pos_wt) for x in pos_wt]
    
    neg_wt = []
    for i in range(neg_len):
        n = random.uniform(0,1)
        neg_wt.append(n)
    
    neg_wt = [-x/sum(neg_wt) for x in neg_wt]
    
    wt = pos_wt + neg_wt
    
    random.shuffle(wt)
    
    return wt  
        

def generate_params(df, length):
    
    params_period = random.randint(20,20*36)
    params_init = random.uniform(-4,0)
    params_exit = random.uniform(0,4)
    
    while True:
        
        params_wt = generate_weight(length)
        
        df_sp = df * params_wt
        df_sp = df_sp.sum(axis=1)

        result = adfuller(df_sp)
        p_val = result[1]
        
        if p_val < 0.05:        
            params = [params_period, params_init, params_exit, *params_wt]
            
            break

            
    return params
    

In [74]:
def generate_pop(df, size, length):
    population = []
    
    for i in range(size):
        
        params = generate_params(df,length)
        population.append(params)
        
    return population

In [75]:
def compute_performance(df, population):
    
    obj_list = []
    for individual in population:
        obj = objective_func(df, individual)
        obj_list.append([individual,obj])
    
    pop_sorted = sorted(obj_list, key=lambda x: x[1], reverse=True)
    
    return pop_sorted

In [76]:
def select_survivors(population_sorted, best_sample, lucky_few, length, df):
    
    next_gen = []
    
    for i in range(best_sample):
        if population_sorted[i][1] > 0:
            next_gen.append(population_sorted[i][0])
    
    lucky_index = np.random.choice(list(range(len(population_sorted))), lucky_few, replace=False)
    for i in lucky_index:
        next_gen.append(population_sorted[i][0])
    
    next_gen_2 = []
    for item in next_gen:
        if item not in next_gen_2:
            next_gen_2.append(item)
    
    
    while len(next_gen_2) < best_sample + lucky_few:
        next_gen_2.append(generate_params(df, length))
        
    random.shuffle(next_gen_2)
    
    return next_gen_2

In [77]:
def pop_clustering(survivors):
    
    pop_dict = {}
    
    temp = [[x,sum(1 for i in x[3:] if i < 0)] for x in survivors]
    min_cnt = min([x[1] for x in temp])
    max_cnt = max([x[1] for x in temp])
    
    for i in range(min_cnt, max_cnt + 1):
        
        pop_dict[i] = [x[0] for x in temp if x[1] == i]

    return pop_dict

In [78]:
def create_child(df, param1, param2):
    
    r1 = random.random()
    r2 = random.random()
    r3 = random.random()
    
    if r1 < 0.5:
        child = param1[:3] + param2[3:]
    else:
        child = param2[:3] + param1[3:]
    
    temp_list = [[param1, objective_func(df, param1)], [param2, objective_func(df, param2)], [child, objective_func(df, child)]]
    temp_list = sorted(temp_list, key=lambda x: x[1], reverse=True)
    
    elite_gene = temp_list[0][0]
    
    return elite_gene


def create_children(df, pop_dict):
    
    
    children = []
    
    for i in pop_dict.keys():
        
        
        
        pop_temp = pop_dict[i]
        n_child = len(pop_temp)
        
        k = 0
        
        while k < n_child:
        
            if n_child == 1:
                child = pop_temp[0]
                children.append(child)
                k += 1
        
            else:
                fitness = [objective_func(df, x) for x in pop_temp]
                fitness_2 = [x if x >= 0 else 0 for x in fitness]

                prob = [x/sum(fitness_2) for x in fitness_2]

                try:
                    sample = np.random.choice(list(range(len(prob))), 2, replace=False, p = prob)
                except:
                    sample = np.random.choice(list(range(len(prob))), 2, replace=False)
                parents = [pop_temp[sample[0]], pop_temp[sample[1]]]

                child = create_child(df, parents[0], parents[1])
                children.append(child)

                k += 1

    return children

In [79]:
def mutation1(param, prob):
    
    r1 = random.random()
    r2 = random.random()
    r3 = random.random()
    
     
    if r1 < prob:
        param_period = random.randint(20,20*36)
    else:
        param_period = param[0]
           
    if r2 < prob:
        param_init = random.uniform(-4,0)
    else:
        param_init = param[1]
        
    if r3 < prob:
        param_exit = random.uniform(0,4)
    else:
        param_exit = param[2]
        
        
    param_mutate = [param_period, param_init, param_exit, *param[3:]]
    
    return param_mutate


def mutation2(param, prob):
    
    r1 = random.random()
    
    param_wt = param[3:]
    
    if r1 < prob:
        random.shuffle(param_wt)

    param_mutate = param[:3] + param_wt
    
    return param_mutate



################################################
def mutate_pop(population, prob1, prob2):
    
    for i in range(len(population)):
            population[i] = mutation1(population[i], prob1)
            population[i] = mutation2(population[i], prob2)
    
    return population

In [80]:
def mutation_fix(population, df, length):
    
    population_fixed = []
    
    fixed_cnt = 0
    
    for i in population:
        
        params_wt = i[3:]
        
        df_sp = df * params_wt
        df_sp = df_sp.sum(axis=1)
        
        result = adfuller(df_sp)
        p_val = result[1]
        
        if p_val < 0.05:        
            individual_fixed = i
        else:
            new_wt = generate_params(df, length)[3:]
            individual_fixed = i[:3] + new_wt
            fixed_cnt += 1
                
        population_fixed.append(individual_fixed)
        
    return population_fixed, fixed_cnt


In [81]:
def simulation_func(df, params):
    
    #stk_list = sorted(list(set(df['COMP_CD'])))
    
    params_period = params[0]
    params_init = params[1]
    params_exit = params[2]
    params_wt = params[3:]

    df_sp = df * params_wt
    df_sp = df_sp.sum(axis=1)
    
    df_sp_norm = (df_sp - df_sp.rolling(params_period).mean())/df_sp.rolling(params_period).std()
    df_sp_norm.dropna(axis=0, inplace=True)
    
    init_dt = []
    exit_dt = []
    init_yn = 0
    for i in range(len(df_sp_norm)):
        if init_yn == 0:
            if df_sp_norm.iloc[i] < params_init:
                init_dt.append(df_sp_norm.index[i])
                init_yn = 1
        
        elif init_yn == 1:
            if df_sp_norm.iloc[i] > params_exit:
                exit_dt.append(df_sp_norm.index[i])
                init_yn = 0
        
    if len(init_dt) > len(exit_dt):
        if len(init_dt) - len(exit_dt) > 1:
            print("error")
        else:
            init_dt = init_dt[:-1]
    
    ret_list = []
    long_ret_list = []
    short_ret_list = []
    cum_ret = 1
    for i in range(len(init_dt)):
        dt_1 = init_dt[i]
        dt_2 = exit_dt[i]
        
        prc_1 = df.loc[dt_1]
        prc_2 = df.loc[dt_2]

        
        ret = prc_2/prc_1 - 1
        ret_ls = (ret * params_wt).sum()
        ret_list.append(ret_ls)        
        cum_ret *= (1+ret_ls)
        
        long_wt = [x if x >= 0 else 0 for x in params_wt]
        long_ret = (ret * long_wt).sum()
        long_ret_list.append(long_ret)
        
        short_wt = [x if x < 0 else 0 for x in params_wt]
        short_ret = (ret * short_wt).sum()
        short_ret_list.append(short_ret)
        
        
    
    return cum_ret, init_dt, exit_dt, ret_list, long_ret_list, short_ret_list

In [82]:
i = 0.7
base_d = dt_m[int(np.round(len(dt_m)*i,0))]
train_begin_dt = dt_m[0]
train_end_dt = base_d

sql_const = f'''
            SELECT TRD_DT, CONCAT('A',STK_CD) COMP_CD, STK_NM_KOR COMP_NM
            FROM TS_STK_ISSUE
            WHERE 1=1
            AND KS200_TYP = 1
            AND TRD_DT = '{base_d}'
            AND KSC_CD = '{sector_code}'
'''

df_const = pd.read_sql(sql_const, conn_wisefn)

df_train = df_prc_raw.copy()
df_train = df_train[df_train['TRD_DT'] >= train_begin_dt]
df_train = df_train[df_train['TRD_DT'] <= train_end_dt]
df_train = df_train[df_train['COMP_CD'].isin(df_const['COMP_CD'])]
df_train = df_train.sort_values(by = ['TRD_DT','COMP_CD'])
#stk_list = sorted(list(set(df_train['COMP_CD'])))


df_train = df_train.pivot(index = 'TRD_DT', columns = 'COMP_CD')
df_train = df_train.droplevel(axis = 1, level = 0)
#df_train = df_train[stk_list]
df_train.dropna(axis=1, inplace=True)
stk_list = sorted(list(df_train.columns))


df_test = df_prc_raw.copy()
df_test = df_test[df_test['TRD_DT'] >= train_end_dt]
#df_test = df_prc[df_prc['TRD_DT'] <= train_end_dt]
df_test = df_test[df_test['COMP_CD'].isin(df_const['COMP_CD'])]
df_test = df_test.sort_values(by = ['TRD_DT','COMP_CD'])

df_test = df_test.pivot(index = 'TRD_DT', columns = 'COMP_CD')
df_test = df_test.droplevel(axis = 1, level = 0)
df_test = df_test[stk_list]

In [83]:
print(f'Train set: {train_begin_dt} ~ {train_end_dt}')
print(f'Test set: {train_end_dt} ~ {max(df_test.index)}')
print(' ')
print(f'유전 알고리즘 시작')
n_generation = 200
population = 100
best_sample = int(population/2)
lucky_few = int(population/2)
prob1 = 0.01
prob2 = 0.01
param_length = len(stk_list)

pop = generate_pop(df_train, size = population, length = param_length)

best_gene = []
best_perf = []

g = 0

while True:

    pop_sorted = compute_performance(df_train, pop)
    
    survivors =  select_survivors(pop_sorted, best_sample, lucky_few, param_length, df_train)
    
    pop_clustered = pop_clustering(survivors)
    
    next_gen = create_children(df_train, pop_clustered)
    
    next_gen = mutate_pop(next_gen, prob1, prob2)

    fixed = mutation_fix(next_gen, df_train, param_length)
    fixed_generation = fixed[0]
    fixed_cnt = fixed[1]
    
    if len(next_gen) < population:
        print(f'  세대수 부족: {len(next_gen)}')
        break
        
    
    print(f'  수선 개체 수: {fixed_cnt}')

    pop = fixed_generation
    best_gene.append(pop_sorted[0][0])
    best_perf.append(pop_sorted[0][1])

    temp_sp = df_train * pop_sorted[0][0][3:]
    temp_sp = temp_sp.sum(axis=1)
    result = adfuller(temp_sp)
    p_val = result[1]
    
    print(f'====== {g}th generation ends ======')
    print(pop_sorted[0])
    print(f'  정상성 체크: p value = {p_val}')
    
    gene_count = Counter([tuple(x[0]) for x in pop_sorted])
    most_common_gene = gene_count.most_common(1)[0][1]
    
    print(f'  최다 출현 개체수: {most_common_gene}')
    if most_common_gene > population * 0.7:
        break
    
    g += 1
    
    if g > n_generation:
        break
    
    print(' ')
    

Train set: 20100129 ~ 20191031
Test set: 20191031 ~ 20230927
 
유전 알고리즘 시작
  수선 개체 수: 1
[[341, -0.45443924682937586, 1.0393178889111572, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 3.396483822889004]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 1
 
  수선 개체 수: 1
[[341, -0.45443924682937586, 1.0393178889111572, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 3.396483822889004]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 5
 
  수선 개체 수: 0
[[341, -0.45443924682937586, 1.0393178889111572, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550

  수선 개체 수: 5
[[206, -1.08401274442314, 1.9256476794951305, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 4.520420238024047]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 7
 
  수선 개체 수: 1
[[206, -1.08401274442314, 1.9256476794951305, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 4.520420238024047]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 7
 
  수선 개체 수: 1
[[206, -1.08401274442314, 1.9256476794951305, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.155

  수선 개체 수: 2
[[204, -1.08401274442314, 1.9256476794951305, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 4.5227319882484975]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 9
 
  수선 개체 수: 0
[[204, -1.08401274442314, 1.9256476794951305, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 4.5227319882484975]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 7
 
  수선 개체 수: 0
[[204, -1.08401274442314, 1.9256476794951305, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.1

  수선 개체 수: 0
[[204, -1.08401274442314, 1.9256476794951305, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 4.5227319882484975]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 5
 
  수선 개체 수: 1
[[265, -0.49404436693760223, 0.7577385423926795, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933, -0.15597954294479316, 0.3150282924219677, -0.05348856792341605, -0.12875701203047493, -0.17702841057567045], 5.770645268145787]
  정상성 체크: p value = 0.03561985887175313
  최다 출현 개체수: 5
 
  수선 개체 수: 1
[[265, -0.49404436693760223, 0.7577385423926795, -0.16343227613743008, 0.6849717075780323, -0.02544213268882904, -0.03550213640506722, -0.0531574820735693, -0.08065481583081682, -0.126557623389933,

  수선 개체 수: 0
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 7
 
  수선 개체 수: 0
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 5
 
  수선 개체 수: 3
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.1721432455

  수선 개체 수: 1
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 8
 
  수선 개체 수: 0
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 7
 
  수선 개체 수: 3
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.1721432455

  수선 개체 수: 0
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 8
 
  수선 개체 수: 1
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 7
 
  수선 개체 수: 0
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.1721432455

  수선 개체 수: 2
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 6
 
  수선 개체 수: 1
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.17214324559811592, 0.1087386576044751, -0.021763464392508907, -0.20742415774436335, -0.12041986009698268, -0.1443789747429995], 7.173508199696456]
  정상성 체크: p value = 0.03378218106511272
  최다 출현 개체수: 6
 
  수선 개체 수: 0
[[473, -1.016858306900109, 2.1167101126889545, -0.026371230168042863, 0.7799147681959786, -0.12218741789597741, 0.11134657419954616, -0.053143673413360876, -0.13216797594764873, -0.1721432455

KeyboardInterrupt: 

In [84]:
result_train = simulation_func(df_train,pop_sorted[0][0])
result_test = simulation_func(df_test,pop_sorted[0][0])

In [85]:
ret_train = pd.DataFrame([result_train[3],result_train[4],result_train[5]], index = ['ls','long','short']).T
ret_test = pd.DataFrame([result_test[3],result_test[4],result_test[5]], index = ['ls','long','short']).T

In [86]:
ret_train

Unnamed: 0,ls,long,short
0,0.209713,0.183205,0.026508
1,0.428643,0.38046,0.048184
2,2.00715,1.964231,0.04292
3,0.553216,0.482983,0.070233
4,-0.111334,-0.241592,0.130258


In [87]:
ret_test

Unnamed: 0,ls,long,short
