In [100]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

# 데이터 정리

In [101]:
stocks = pd.read_csv('./trade_train_set/stocks.csv')
trade_train = pd.read_csv('./trade_train_set/trade_train.csv')
stocks = stocks.drop(['index'], axis=1)
trade_train = trade_train.drop(['Unnamed: 0'], axis=1)

In [104]:
stocks_copy = stocks.copy()
stocks['datetime'] = stocks['기준일자'].apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))
stocks.set_index(stocks['datetime'], inplace=True)

stocks = stocks.drop(['datetime','기준일자', '종목시가','종목고가','종목저가','종목종가'], 1)

In [105]:
numeric = stocks.select_dtypes('number').columns
non_num = stocks.columns.difference(numeric)
print(numeric)
print(non_num)

Index(['거래량', '거래금액_만원단위'], dtype='object')
Index(['20년7월TOP3대상여부', '시장구분', '종목명', '종목번호', '표준산업구분코드_대분류', '표준산업구분코드_소분류',
       '표준산업구분코드_중분류'],
      dtype='object')


In [106]:
#종목별로 찾아서, 월별로 묶음. 반복되는 데이터(종목코드 등)는 그대로 유지.

d = {**{x: 'mean' for x in numeric}, **{x: 'first' for x in non_num}}

monthly = None

for i, name in enumerate(stocks['종목명'].unique()):
    tmp =stocks[stocks['종목명'] == name].resample('M').agg(d)
    
    if i == 0:
        monthly = tmp
    else:
        monthly= pd.concat([monthly, tmp])

        
#평균가격을 계산
monthly['평균가격']= monthly['거래금액_만원단위']/monthly['거래량'] * 10000

#날짜를 인덱스에서 제거
monthly.reset_index(level=0, inplace=True)

#기준연월로 변경
monthly['기준년월'] = monthly['datetime'].dt.strftime('%Y%m')
monthly['기준년월'] = pd.to_numeric(monthly['기준년월'])

#datetime 삭제
monthly = monthly.drop(['datetime'], axis = 1)

monthly.to_csv("./month_stock.csv", mode='w', header=True, index=False)
monthly

Unnamed: 0,거래량,거래금액_만원단위,20년7월TOP3대상여부,시장구분,종목명,종목번호,표준산업구분코드_대분류,표준산업구분코드_소분류,표준산업구분코드_중분류,평균가격,기준년월
0,5.205826e+04,4.908391e+04,N,코스피,동화약품,A000020,제조업,의약품 제조업,의료용 물질 및 의약품 제조업,9428.650175,201907
1,1.573492e+05,1.357825e+05,N,코스피,동화약품,A000020,제조업,의약품 제조업,의료용 물질 및 의약품 제조업,8629.372818,201908
2,1.065910e+05,8.636921e+04,N,코스피,동화약품,A000020,제조업,의약품 제조업,의료용 물질 및 의약품 제조업,8102.861454,201909
3,1.254416e+05,9.880295e+04,N,코스피,동화약품,A000020,제조업,의약품 제조업,의료용 물질 및 의약품 제조업,7876.412202,201910
4,1.169308e+05,9.609957e+04,N,코스피,동화약품,A000020,제조업,의약품 제조업,의료용 물질 및 의약품 제조업,8218.502117,201911
...,...,...,...,...,...,...,...,...,...,...,...
13907,1.032519e+05,5.622853e+05,Y,코스닥,이지바이오,A353810,제조업,동물용 사료 및 조제식품 제조업,식료품 제조업,54457.601043,202007
13908,3.955931e+06,1.349480e+07,N,코스닥,에스씨엠생명과학,A298060,제조업,의약품 제조업,의료용 물질 및 의약품 제조업,34112.827549,202006
13909,7.000737e+05,1.804244e+06,N,코스닥,에스씨엠생명과학,A298060,제조업,의약품 제조업,의료용 물질 및 의약품 제조업,25772.208396,202007
13910,7.819787e+05,1.669942e+06,N,코스닥,엘이티,A297890,제조업,특수 목적용 기계 제조업,기타 기계 및 장비 제조업,21355.343797,202006


In [107]:
# trade에 월별 stocks데이터를 합침.
total = pd.merge(trade_train, monthly, on=['기준년월', '종목번호'])
total.to_csv("./total_data.csv", mode='w', header=True, index=False)

# 훈련데이터 생성

In [108]:
data = total[total['20년7월TOP3대상여부'] == 'Y']

data = data.drop(['그룹내_매수여부', '그룹내_매도여부', '평균매수수량', '평균매도수량', 
                  '매수가격_중앙값', '매도가격_중앙값', '20년7월TOP3대상여부', '종목명'], axis=1)

In [109]:
a = trade_train[['기준년월', '그룹번호', '종목번호', '매수고객수']].groupby(['기준년월', '그룹번호',
                                                            '종목번호']).sum().sort_values(by = [
    '기준년월','그룹번호','매수고객수'], ascending=[True, True, False]).reset_index(drop=False)

a = a.groupby(['기준년월', '그룹번호']).head(3)

prev_month_list = list(a[a['기준년월'] == 202006]['종목번호'])

data['직전달TOP3여부'] = 0
for i in range(data.shape[0]):
    if data.iloc[i,3] in prev_month_list:
        data.iloc[i,-1] = 1

In [110]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
def dummy(data,col):
    lab=LabelEncoder()
    aa=lab.fit_transform(data[col]).reshape(-1,1)
    ohe=OneHotEncoder(sparse=False)
    column_names=[col+'_'+ str(i) for i in lab.classes_]
    return(pd.DataFrame(ohe.fit_transform(aa),columns=column_names))

In [111]:
categ_columns = ['그룹번호', '종목번호','표준산업구분코드_대분류', '표준산업구분코드_중분류', 
                 '표준산업구분코드_소분류', '시장구분']
n_columns = ['기준년월','그룹내고객수', '매수고객수', '거래량', '거래금액_만원단위', '평균가격', '직전달TOP3여부']

df = data[n_columns]
df.index = range(df.shape[0])

In [112]:
lab = LabelEncoder()
for col in categ_columns:
    temp_df = pd.DataFrame(lab.fit_transform(data[col]))
    temp_df.columns = [col]
    df = pd.concat([df, temp_df], axis=1)

df.to_csv('./df.csv',mode='w', header=True, index=False)
df

# 훈련데이터 템플릿 만들기

In [114]:
df_temp = pd.DataFrame()

기준년월 = pd.Series(df['기준년월'].unique())
그룹번호 = pd.Series(df['그룹번호'].unique())
종목번호 = pd.Series(df['종목번호'].unique())

aa = []
for k in 기준년월:
    for i in 그룹번호:
        for j in 종목번호:
            aa.append([k,i,j])
        
aa = pd.DataFrame(data=aa, columns=['기준년월','그룹번호', '종목번호'])        
df_temp[['기준년월', '그룹번호', '종목번호']] = aa[['기준년월', '그룹번호', '종목번호']]

In [115]:
df_temp = pd.merge(df_temp, df[['종목번호', '직전달TOP3여부', '표준산업구분코드_대분류', '표준산업구분코드_중분류', 
                 '표준산업구분코드_소분류', '시장구분']].drop_duplicates(), on='종목번호')

df_temp = pd.merge(df_temp, df[['그룹번호','그룹내고객수']].drop_duplicates(), on='그룹번호')

df_temp = pd.merge(df_temp, df[['매수고객수','기준년월', '그룹번호', '종목번호', '거래량', 
                                '거래금액_만원단위', '평균가격']], 
        on = ['기준년월', '그룹번호', '종목번호'], how='outer')

df_temp = df_temp.fillna(0)


df_temp = df_temp[['기준년월', '그룹내고객수', '매수고객수', '거래량', '거래금액_만원단위', '평균가격', 
                   '직전달TOP3여부', '그룹번호', '종목번호', '표준산업구분코드_대분류', '표준산업구분코드_중분류', 
                   '표준산업구분코드_소분류', '시장구분']]

In [117]:
# 테스트 데이터 표준화를 위한 정보 저장
dat_mean = df_temp[['거래금액_만원단위', '평균가격', '거래량']].mean(axis=0)
dat_std = df_temp[['거래금액_만원단위', '평균가격', '거래량']].std()

# 훈련데이터 표준화
from sklearn.preprocessing import StandardScaler
df_temp[['거래금액_만원단위', '평균가격', '거래량']] = StandardScaler(
).fit_transform(df_temp[['거래금액_만원단위', '평균가격', '거래량']])

In [118]:
'''# 훈련데이터 기준년월 변환
def grap_year(data):
    """
    returns year from REG_YYMM
    """
    data = str(data)
    return int(data[:4])

def grap_month(data):
    """
    returns month from REG_YYMM
    """
    data = str(data)
    return int(data[4:])

df_temp['연도'] = (df_temp["기준년월"].apply(lambda x: grap_year(x)))
df_temp['월'] = (df_temp["기준년월"].apply(lambda x: grap_month(x)))
df_temp = df_temp.drop(["기준년월"], axis = 1)'''

'# 훈련데이터 기준년월 변환\ndef grap_year(data):\n    """\n    returns year from REG_YYMM\n    """\n    data = str(data)\n    return int(data[:4])\n\ndef grap_month(data):\n    """\n    returns month from REG_YYMM\n    """\n    data = str(data)\n    return int(data[4:])\n\ndf_temp[\'연도\'] = (df_temp["기준년월"].apply(lambda x: grap_year(x)))\ndf_temp[\'월\'] = (df_temp["기준년월"].apply(lambda x: grap_month(x)))\ndf_temp = df_temp.drop(["기준년월"], axis = 1)'

In [119]:
df_temp = df_temp[['기준년월', '그룹내고객수', '매수고객수', '거래량', '거래금액_만원단위', '평균가격', '직전달TOP3여부', '그룹번호',
       '종목번호', '표준산업구분코드_대분류', '표준산업구분코드_중분류', '표준산업구분코드_소분류', '시장구분']]

df_temp.to_csv('./df_temp.csv',mode='w', header=True, index=False)

## 분류 훈련 데이터 만들 때 아래 코드 사용

In [120]:
'''a = df_temp.groupby(['기준년월', '그룹번호', '종목번호']).sum().sort_values(
    by='매수고객수', ascending=False).reset_index(
    drop=False)[['기준년월', '그룹번호', '종목번호', '매수고객수']]

b = a.groupby(['기준년월', '그룹번호']).head(3)
b['top3'] = 1
b = b.drop('매수고객수', axis=1)

df_temp = pd.merge(df_temp, b, on=['기준년월', '그룹번호', '종목번호'], how='outer')
df_temp = df_temp.fillna(0)
df_temp = df_temp.drop('매수고객수', axis=1)

df_temp.to_csv('./df_temp.csv',mode='w', header=True, index=False)'''

"a = df_temp.groupby(['기준년월', '그룹번호', '종목번호']).sum().sort_values(\n    by='매수고객수', ascending=False).reset_index(\n    drop=False)[['기준년월', '그룹번호', '종목번호', '매수고객수']]\n\nb = a.groupby(['기준년월', '그룹번호']).head(3)\nb['top3'] = 1\nb = b.drop('매수고객수', axis=1)\n\ndf_temp = pd.merge(df_temp, b, on=['기준년월', '그룹번호', '종목번호'], how='outer')\ndf_temp = df_temp.fillna(0)\ndf_temp = df_temp.drop('매수고객수', axis=1)\n\ndf_temp.to_csv('./df_temp.csv',mode='w', header=True, index=False)"

### SMOTE and ADASYN

In [121]:
'''from imblearn.over_sampling import SMOTE, ADASYN
from collections import Counter

ada = ADASYN(random_state=42, n_neighbors=3)
X_syn, y_syn = ada.fit_resample(df_temp.drop('top3', axis=1),df_temp['top3'])
print('Original dataset shape %s' % Counter(df_temp['top3']))
print('ADASYN n_neighbors 5: Resampled dataset shape %s' % Counter(y_syn))

classif_train = pd.concat([X_syn, y_syn], axis=1)
classif_train

classif_train.to_csv('./classif_train2.csv',mode='w', header=True, index=False)'''

"from imblearn.over_sampling import SMOTE, ADASYN\nfrom collections import Counter\n\nada = ADASYN(random_state=42, n_neighbors=3)\nX_syn, y_syn = ada.fit_resample(df_temp.drop('top3', axis=1),df_temp['top3'])\nprint('Original dataset shape %s' % Counter(df_temp['top3']))\nprint('ADASYN n_neighbors 5: Resampled dataset shape %s' % Counter(y_syn))\n\nclassif_train = pd.concat([X_syn, y_syn], axis=1)\nclassif_train\n\nclassif_train.to_csv('./classif_train2.csv',mode='w', header=True, index=False)"

# 예측 템플릿 (테스트 데이터)

In [122]:
# 예측 템플릿 만들기
그룹번호 = pd.Series(df['그룹번호'].unique())
종목번호 = pd.Series(df['종목번호'].unique())


temp = []
for i in 그룹번호:
    for j in 종목번호:
        temp.append([i, j])

                                
temp = pd.DataFrame(data=temp, columns = ['그룹번호', '종목번호'])

In [123]:
a = trade_train[['그룹번호','그룹내고객수']].groupby('그룹번호').first().reset_index(drop=False)
a['그룹번호'] = lab.fit_transform(a['그룹번호'])

temp = pd.merge(temp, a, on='그룹번호')

temp = pd.merge(temp, df[['표준산업구분코드_대분류','표준산업구분코드_중분류', '표준산업구분코드_소분류', 
                          '종목번호', '직전달TOP3여부', '시장구분']].drop_duplicates(), on='종목번호')

In [124]:
a = stocks_copy[stocks_copy['기준일자'] > 20200700].groupby(['종목번호']).sum().reset_index(drop=False)[[
    '종목번호', '거래금액_만원단위', '거래량']]
a['종목번호'] = lab.fit_transform(a['종목번호'])

a['종목번호'] = lab.fit_transform(a['종목번호'])

a['거래금액_만원단위'] = a['거래금액_만원단위']
a['평균가격'] = (a['거래금액_만원단위'] / a['거래량']) * 10000


In [125]:
temp = pd.merge(temp, a, on='종목번호')

temp['거래금액_만원단위'] = (temp['거래금액_만원단위'] - dat_mean[0]) / dat_std[0]
temp['평균가격'] = (temp['평균가격'] - dat_mean[1]) / dat_std[1]
temp['거래량'] = (temp['거래량'] - dat_mean[2]) / dat_std[2]

In [126]:
temp['기준년월'] = 202007

In [127]:
'''temp['연도'] = 2020
temp['월'] = 7'''

"temp['연도'] = 2020\ntemp['월'] = 7"

In [128]:
temp = temp[['기준년월', '그룹내고객수','거래량','거래금액_만원단위','평균가격','직전달TOP3여부', '그룹번호', 
      '종목번호','표준산업구분코드_대분류', '표준산업구분코드_중분류', '표준산업구분코드_소분류', '시장구분']]

temp.to_csv('./template.csv',mode='w', header=True, index=False)

# 그룹번호 & 종목번호 dictionary 만들기

In [129]:
df_dict = pd.DataFrame()
df_dict['종목번호'] = temp['종목번호']
df_dict['종목번호_원본'] = 0
df_dict
for i in range(df_dict.shape[0]):
    df_dict['종목번호_원본'][i] =  lab2.inverse_transform([df_dict['종목번호'][i]])[0]
        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dict['종목번호_원본'][i] =  lab2.inverse_transform([df_dict['종목번호'][i]])[0]


In [130]:
df_dict = df_dict.drop_duplicates()

df_dict = df_dict.sort_values(by = ['종목번호'], ascending=[True])
df_dict.index = range(df_dict.shape[0])

df_dict.to_csv('./종목변환.csv',mode='w', header=True, index=False)

In [131]:
df_dict2 = pd.DataFrame()
df_dict2['그룹번호'] = temp['그룹번호']
df_dict2['그룹번호_원본'] = 0

for i in range(df_dict2.shape[0]):
    df_dict2['그룹번호_원본'][i] =  lab1.inverse_transform([df_dict2['그룹번호'][i]])[0]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dict2['그룹번호_원본'][i] =  lab1.inverse_transform([df_dict2['그룹번호'][i]])[0]


In [132]:
df_dict2 = df_dict2.drop_duplicates()

df_dict2 = df_dict2.sort_values(by = ['그룹번호'], ascending=[True])
df_dict2.index = range(df_dict2.shape[0])

df_dict2.to_csv('./그룹변환.csv',mode='w', header=True, index=False)

# 매수 연속성을 통한 점수(Score)화

In [21]:
import pandas as pd
import numpy as np

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/answer_sheet.csv'
answer_sheet = pd.read_csv(path)

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/stocks.csv'
stocks = pd.read_csv(path, index_col = 0)

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/trade_train.csv'
trade_train = pd.read_csv(path, index_col = 0)

In [22]:
group_list = list(set(trade_train['그룹번호']))
group_list = sorted(group_list)

In [23]:
# is_top3 = set(trade_train['종목번호']).intersection(stocks[stocks['20년7월TOP3대상여부']=='Y']['종목번호'].unique())
# for i in range(len(trade_train)) : 
#     if trade_train['종목번호'][i] in is_top3 : 
#         pass
#     else : 
#         trade_train = trade_train.drop(i, axis=0)

In [24]:
df=pd.DataFrame()
tmp = 0
for group in group_list :
    for day in set(trade_train['기준년월']) :
        condition1 = trade_train['그룹번호'] == group
        condition2 = trade_train['기준년월'] == day

        # 각 그룹별 월별 상위 매수 5종목을 뽑아 5점만점에서 1점씩 차등분배
        df_ = trade_train[condition1 & condition2]
        df_ = df_.sort_values(by=['매수고객수','평균매수수량'], ascending =False)
        df_['점수'] = df_[:5]['매수고객수'].rank()
        df_ = df_[:5]
        if tmp == 0 :
            df = df_
        else : 
            df = pd.merge(df,df_, how='outer')
        tmp += 1
score_df = df.sort_values(by=['그룹번호','기준년월'], ascending=True)

In [25]:
# 그룹별로 루프를 돌려 기준년월과 종목번호를 행렬로 갖는 그룹별 매수 Score 데이터 형성
for group in group_list :
    group_df = score_df[score_df['그룹번호'] == group]
    check_df = pd.DataFrame(index=set(group_df['기준년월']), columns=set(group_df['종목번호']))
    for day in set(group_df['기준년월']) :
        for stock in set(group_df['종목번호']) : 
            condition1= group_df['기준년월'] == day
            condition2= group_df['종목번호'] == stock
            try : 
                check_df[stock][day] = group_df[condition1 & condition2]['점수'].values[0]
            except IndexError : 
                pass
    check_df = check_df.sort_index()
    check_df.to_csv('C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/group/'+group+'_score.csv', index=True)

# 그룹번호 & 종목번호를 통한 점수 컬럼 추가

### 훈련데이터

In [1]:
import pandas as pd
import numpy as np

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/answer_sheet.csv'
answer_sheet = pd.read_csv(path)

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/stocks.csv'
stocks = pd.read_csv(path, index_col = 0)

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/trade_train.csv'
trade_train = pd.read_csv(path, index_col = 0)

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/df_temp.csv'
train_df = pd.read_csv(path, index_col = 0)

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/종목변환.csv'
item_num = pd.read_csv(path)

In [5]:
time_dict = {}
for num, time in enumerate(sorted(set(train_df.index))) : 
    time_dict[time] = num + 1
    
item_dict = dict(zip(item_num['종목번호_원본'], item_num['종목번호']))

group_num = pd.DataFrame(columns=['그룹명','그룹번호'])
group_num['그룹명'] = answer_sheet['그룹명']
group_num['그룹번호'] = range(0,48)
group_dict = dict(zip(group_num['그룹명'], group_num['그룹번호']))

In [8]:
score_list = []

for i in range(len(train_df)) :
    try : 
        time = train_df.iloc[i].name
        group = train_df.iloc[i]['그룹번호']
        item = train_df.iloc[i]['종목번호']

        serch_group = [key for key, value in group_dict.items() if value == group]
        serch_item = [key for key, value in item_dict.items() if value == item]

        path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/score/'+serch_group[0]+'_score.csv'
        score_df = pd.read_csv(path, index_col=0)
        score_df = score_df.fillna(0)
        score_df = score_df.sort_index(ascending = False)

        score_array = score_df[-time_dict[time]:][serch_item[0]]

        # 단순지수평활(simple exponential smoothing) 가중치
        a = 0.8
        weight = []
        for i in range(len(score_array)) : 
            if i == 0 : 
                alpha = a
                weight.append(alpha)
            else : 
                alpha = alpha*(1-a)
                weight.append(alpha)

        score = score_array.values@np.array(weight)
        if time == 201907 : 
            score_list.append(0)
            score_list.append(score)
        elif time == 202006 : 
            pass
        else : 
            score_list.append(score)    
    
    except KeyError : 
        score_list.append(0)

In [9]:
train_df['점수'] = score_list
train_df.to_csv('C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/train_df_score.csv', index=True)

### 테스트 데이터

In [10]:
path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/template.csv'
test_df = pd.read_csv(path,index_col = 0)

In [11]:
group_list = sorted(list(set(trade_train['그룹번호'])))

path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/score/'+group_list[0]+'_score.csv'
score_df = pd.read_csv(path, index_col=0)

time_dict = {}
for num, time in enumerate(score_df.index) : 
    time_dict[time] = num + 1
    
item_dict = dict(zip(item_num['종목번호_원본'], item_num['종목번호']))

group_num = pd.DataFrame(columns=['그룹명','그룹번호'])
group_num['그룹명'] = answer_sheet['그룹명']
group_num['그룹번호'] = range(0,48)

group_dict = dict(zip(group_num['그룹명'], group_num['그룹번호']))

In [13]:
score_list = []

for i in range(len(test_df)) :
    try : 
        time = test_df.iloc[i].name
        group = test_df.iloc[i]['그룹번호']
        item = test_df.iloc[i]['종목번호']

        serch_group = [key for key, value in group_dict.items() if value == group]
        serch_item = [key for key, value in item_dict.items() if value == item]

        path = 'C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/score/'+serch_group[0]+'_score.csv'
        score_df = pd.read_csv(path, index_col=0)
        score_df = score_df.fillna(0)
        score_df = score_df.sort_index(ascending = False)

        # 단순지수평활(simple exponential smoothing) 가중치
        a = 0.8
        weight = []
        for i in range(len(score_df)) : 
            if i == 0 : 
                alpha = a
                weight.append(alpha)
            else : 
                alpha = alpha*(1-a)
                weight.append(alpha)

        score = score_df[serch_item[0]]@np.array(weight)
        score_list.append(score)    
    
    except KeyError : 
        score_list.append(0)


In [14]:
test_df['점수'] = score_list
test_df.to_csv('C:/Users/elris/Dropbox/FBA/2020MiraeAsset/woogie/test_df_score.csv', index=True)

# 모델 훈련

In [142]:
#Train, test split
train_mask = (df_temp['기준년월'] != 202006)
X_train = df_temp.loc[train_mask, :]
X_test = df_temp.loc[~train_mask, :]


print("학습사이즈 : ", len(X_train))
print("테스트사이즈 : ", len(X_test))

# Y split
X_train_x = X_train.drop(['매수고객수'], axis = 1)
X_train_y = X_train['매수고객수']
X_test_x = X_test.drop(['매수고객수'], axis = 1)
X_test_y = X_test['매수고객수']

학습사이즈 :  70224
테스트사이즈 :  6384


In [145]:
categ = [6, 7, 8, 9, 10]

from catboost import CatBoostClassifier, CatBoostRegressor

cb_model = CatBoostRegressor(iterations=1300,
                             learning_rate=0.02,
                             depth=12,
                             eval_metric='RMSE',
                             random_seed = 23,
                             bagging_temperature = 0.2,
                             od_type='Iter',
                             od_wait=100)

cb_model.fit(X_train_x, X_train_y,
             eval_set=(X_test_x,X_test_y),
             cat_features=categ,
             use_best_model=True,
             verbose=True)

0:	learn: 9.5300781	test: 20.7914036	best: 20.7914036 (0)	total: 150ms	remaining: 3m 15s
1:	learn: 9.4018995	test: 20.6786985	best: 20.6786985 (1)	total: 249ms	remaining: 2m 41s
2:	learn: 9.2800367	test: 20.4725428	best: 20.4725428 (2)	total: 303ms	remaining: 2m 10s
3:	learn: 9.1616963	test: 20.2858959	best: 20.2858959 (3)	total: 406ms	remaining: 2m 11s
4:	learn: 9.0410981	test: 20.0723741	best: 20.0723741 (4)	total: 519ms	remaining: 2m 14s
5:	learn: 8.9284887	test: 19.8846909	best: 19.8846909 (5)	total: 635ms	remaining: 2m 16s
6:	learn: 8.8140471	test: 19.7828957	best: 19.7828957 (6)	total: 751ms	remaining: 2m 18s
7:	learn: 8.6993019	test: 19.5892862	best: 19.5892862 (7)	total: 867ms	remaining: 2m 20s
8:	learn: 8.5878779	test: 19.4063365	best: 19.4063365 (8)	total: 987ms	remaining: 2m 21s
9:	learn: 8.4847503	test: 19.2598557	best: 19.2598557 (9)	total: 1.1s	remaining: 2m 22s
10:	learn: 8.3764329	test: 19.0815320	best: 19.0815320 (10)	total: 1.22s	remaining: 2m 23s
11:	learn: 8.2683773

91:	learn: 3.9798739	test: 11.9886416	best: 11.9886416 (91)	total: 10.1s	remaining: 2m 12s
92:	learn: 3.9536578	test: 11.9567092	best: 11.9567092 (92)	total: 10.2s	remaining: 2m 12s
93:	learn: 3.9297046	test: 11.9187644	best: 11.9187644 (93)	total: 10.3s	remaining: 2m 12s
94:	learn: 3.9008380	test: 11.8595661	best: 11.8595661 (94)	total: 10.4s	remaining: 2m 12s
95:	learn: 3.8781778	test: 11.8191623	best: 11.8191623 (95)	total: 10.6s	remaining: 2m 12s
96:	learn: 3.8527817	test: 11.7800172	best: 11.7800172 (96)	total: 10.7s	remaining: 2m 12s
97:	learn: 3.8293786	test: 11.7438996	best: 11.7438996 (97)	total: 10.8s	remaining: 2m 12s
98:	learn: 3.8068542	test: 11.7117688	best: 11.7117688 (98)	total: 10.9s	remaining: 2m 12s
99:	learn: 3.7860600	test: 11.6885521	best: 11.6885521 (99)	total: 11s	remaining: 2m 11s
100:	learn: 3.7632270	test: 11.6532210	best: 11.6532210 (100)	total: 11.1s	remaining: 2m 11s
101:	learn: 3.7423035	test: 11.6230300	best: 11.6230300 (101)	total: 11.2s	remaining: 2m 1

182:	learn: 2.6948365	test: 10.0512272	best: 10.0512272 (182)	total: 20s	remaining: 2m 2s
183:	learn: 2.6897595	test: 10.0494042	best: 10.0494042 (183)	total: 20.1s	remaining: 2m 2s
184:	learn: 2.6803343	test: 10.0351272	best: 10.0351272 (184)	total: 20.2s	remaining: 2m 2s
185:	learn: 2.6736175	test: 10.0280042	best: 10.0280042 (185)	total: 20.4s	remaining: 2m 1s
186:	learn: 2.6682576	test: 10.0258733	best: 10.0258733 (186)	total: 20.5s	remaining: 2m 1s
187:	learn: 2.6594345	test: 10.0121353	best: 10.0121353 (187)	total: 20.6s	remaining: 2m 1s
188:	learn: 2.6531704	test: 10.0042214	best: 10.0042214 (188)	total: 20.7s	remaining: 2m 1s
189:	learn: 2.6484577	test: 10.0005624	best: 10.0005624 (189)	total: 20.8s	remaining: 2m 1s
190:	learn: 2.6432498	test: 9.9971076	best: 9.9971076 (190)	total: 20.9s	remaining: 2m 1s
191:	learn: 2.6350475	test: 9.9848879	best: 9.9848879 (191)	total: 21s	remaining: 2m 1s
192:	learn: 2.6274357	test: 9.9679896	best: 9.9679896 (192)	total: 21.1s	remaining: 2m 1

277:	learn: 2.2390281	test: 9.4119204	best: 9.4119204 (277)	total: 28.8s	remaining: 1m 45s
278:	learn: 2.2367412	test: 9.4081710	best: 9.4081710 (278)	total: 28.9s	remaining: 1m 45s
279:	learn: 2.2352322	test: 9.4069048	best: 9.4069048 (279)	total: 29s	remaining: 1m 45s
280:	learn: 2.2324966	test: 9.4047209	best: 9.4047209 (280)	total: 29.2s	remaining: 1m 45s
281:	learn: 2.2311289	test: 9.4045805	best: 9.4045805 (281)	total: 29.2s	remaining: 1m 45s
282:	learn: 2.2285280	test: 9.4027555	best: 9.4027555 (282)	total: 29.3s	remaining: 1m 45s
283:	learn: 2.2274651	test: 9.4026508	best: 9.4026508 (283)	total: 29.3s	remaining: 1m 44s
284:	learn: 2.2257411	test: 9.4016196	best: 9.4016196 (284)	total: 29.4s	remaining: 1m 44s
285:	learn: 2.2218530	test: 9.3970666	best: 9.3970666 (285)	total: 29.5s	remaining: 1m 44s
286:	learn: 2.2163657	test: 9.3887930	best: 9.3887930 (286)	total: 29.7s	remaining: 1m 44s
287:	learn: 2.2126052	test: 9.3821371	best: 9.3821371 (287)	total: 29.8s	remaining: 1m 44s
2

369:	learn: 1.9937697	test: 9.0125138	best: 9.0125138 (369)	total: 37.3s	remaining: 1m 33s
370:	learn: 1.9905738	test: 9.0067313	best: 9.0067313 (370)	total: 37.5s	remaining: 1m 33s
371:	learn: 1.9898237	test: 9.0038341	best: 9.0038341 (371)	total: 37.6s	remaining: 1m 33s
372:	learn: 1.9869738	test: 8.9992402	best: 8.9992402 (372)	total: 37.7s	remaining: 1m 33s
373:	learn: 1.9851445	test: 8.9937321	best: 8.9937321 (373)	total: 37.8s	remaining: 1m 33s
374:	learn: 1.9824131	test: 8.9899307	best: 8.9899307 (374)	total: 37.9s	remaining: 1m 33s
375:	learn: 1.9790305	test: 8.9811383	best: 8.9811383 (375)	total: 38s	remaining: 1m 33s
376:	learn: 1.9762474	test: 8.9762442	best: 8.9762442 (376)	total: 38s	remaining: 1m 33s
377:	learn: 1.9752091	test: 8.9756719	best: 8.9756719 (377)	total: 38.1s	remaining: 1m 32s
378:	learn: 1.9746964	test: 8.9750624	best: 8.9750624 (378)	total: 38.2s	remaining: 1m 32s
379:	learn: 1.9712993	test: 8.9693545	best: 8.9693545 (379)	total: 38.4s	remaining: 1m 32s
380

460:	learn: 1.7462765	test: 8.5192327	best: 8.5192327 (460)	total: 46.3s	remaining: 1m 24s
461:	learn: 1.7444874	test: 8.5146462	best: 8.5146462 (461)	total: 46.4s	remaining: 1m 24s
462:	learn: 1.7429610	test: 8.5104037	best: 8.5104037 (462)	total: 46.5s	remaining: 1m 24s
463:	learn: 1.7427537	test: 8.5103658	best: 8.5103658 (463)	total: 46.6s	remaining: 1m 23s
464:	learn: 1.7397696	test: 8.5054722	best: 8.5054722 (464)	total: 46.7s	remaining: 1m 23s
465:	learn: 1.7395203	test: 8.5054423	best: 8.5054423 (465)	total: 46.7s	remaining: 1m 23s
466:	learn: 1.7375972	test: 8.5029238	best: 8.5029238 (466)	total: 46.8s	remaining: 1m 23s
467:	learn: 1.7373591	test: 8.5017476	best: 8.5017476 (467)	total: 46.8s	remaining: 1m 23s
468:	learn: 1.7371650	test: 8.5017206	best: 8.5017206 (468)	total: 46.8s	remaining: 1m 23s
469:	learn: 1.7350407	test: 8.4982283	best: 8.4982283 (469)	total: 47s	remaining: 1m 22s
470:	learn: 1.7320107	test: 8.4943250	best: 8.4943250 (470)	total: 47.1s	remaining: 1m 22s
4

551:	learn: 1.6048476	test: 8.2663304	best: 8.2663304 (551)	total: 54.7s	remaining: 1m 14s
552:	learn: 1.6047491	test: 8.2655773	best: 8.2655773 (552)	total: 54.7s	remaining: 1m 13s
553:	learn: 1.6029931	test: 8.2615065	best: 8.2615065 (553)	total: 54.8s	remaining: 1m 13s
554:	learn: 1.6020498	test: 8.2607877	best: 8.2607877 (554)	total: 54.9s	remaining: 1m 13s
555:	learn: 1.6001816	test: 8.2581202	best: 8.2581202 (555)	total: 55.1s	remaining: 1m 13s
556:	learn: 1.5973341	test: 8.2532640	best: 8.2532640 (556)	total: 55.2s	remaining: 1m 13s
557:	learn: 1.5941694	test: 8.2484305	best: 8.2484305 (557)	total: 55.3s	remaining: 1m 13s
558:	learn: 1.5927601	test: 8.2467490	best: 8.2467490 (558)	total: 55.4s	remaining: 1m 13s
559:	learn: 1.5908802	test: 8.2440340	best: 8.2440340 (559)	total: 55.5s	remaining: 1m 13s
560:	learn: 1.5881999	test: 8.2406306	best: 8.2406306 (560)	total: 55.6s	remaining: 1m 13s
561:	learn: 1.5872330	test: 8.2396471	best: 8.2396471 (561)	total: 55.7s	remaining: 1m 13s

642:	learn: 1.4779588	test: 8.0468669	best: 8.0468669 (642)	total: 1m 4s	remaining: 1m 5s
643:	learn: 1.4753220	test: 8.0433814	best: 8.0433814 (643)	total: 1m 4s	remaining: 1m 5s
644:	learn: 1.4740055	test: 8.0400543	best: 8.0400543 (644)	total: 1m 4s	remaining: 1m 5s
645:	learn: 1.4729163	test: 8.0389417	best: 8.0389417 (645)	total: 1m 4s	remaining: 1m 5s
646:	learn: 1.4725392	test: 8.0382066	best: 8.0382066 (646)	total: 1m 4s	remaining: 1m 5s
647:	learn: 1.4707345	test: 8.0347828	best: 8.0347828 (647)	total: 1m 4s	remaining: 1m 5s
648:	learn: 1.4691162	test: 8.0319644	best: 8.0319644 (648)	total: 1m 4s	remaining: 1m 5s
649:	learn: 1.4677579	test: 8.0285961	best: 8.0285961 (649)	total: 1m 4s	remaining: 1m 4s
650:	learn: 1.4659163	test: 8.0259130	best: 8.0259130 (650)	total: 1m 5s	remaining: 1m 4s
651:	learn: 1.4635719	test: 8.0237421	best: 8.0237421 (651)	total: 1m 5s	remaining: 1m 4s
652:	learn: 1.4631543	test: 8.0228485	best: 8.0228485 (652)	total: 1m 5s	remaining: 1m 4s
653:	learn

735:	learn: 1.3707092	test: 7.8568954	best: 7.8568954 (735)	total: 1m 13s	remaining: 56.5s
736:	learn: 1.3705313	test: 7.8561867	best: 7.8561867 (736)	total: 1m 13s	remaining: 56.4s
737:	learn: 1.3703570	test: 7.8554850	best: 7.8554850 (737)	total: 1m 13s	remaining: 56.3s
738:	learn: 1.3680446	test: 7.8513063	best: 7.8513063 (738)	total: 1m 14s	remaining: 56.2s
739:	learn: 1.3677416	test: 7.8496039	best: 7.8496039 (739)	total: 1m 14s	remaining: 56.1s
740:	learn: 1.3652381	test: 7.8442170	best: 7.8442170 (740)	total: 1m 14s	remaining: 56s
741:	learn: 1.3649392	test: 7.8425394	best: 7.8425394 (741)	total: 1m 14s	remaining: 55.9s
742:	learn: 1.3638849	test: 7.8404150	best: 7.8404150 (742)	total: 1m 14s	remaining: 55.8s
743:	learn: 1.3614531	test: 7.8351438	best: 7.8351438 (743)	total: 1m 14s	remaining: 55.7s
744:	learn: 1.3595074	test: 7.8341024	best: 7.8341024 (744)	total: 1m 14s	remaining: 55.7s
745:	learn: 1.3575916	test: 7.8301376	best: 7.8301376 (745)	total: 1m 14s	remaining: 55.6s
7

827:	learn: 1.2600982	test: 7.6996963	best: 7.6996963 (827)	total: 1m 24s	remaining: 48s
828:	learn: 1.2595249	test: 7.6989502	best: 7.6989502 (828)	total: 1m 24s	remaining: 47.9s
829:	learn: 1.2593748	test: 7.6979368	best: 7.6979368 (829)	total: 1m 24s	remaining: 47.8s
830:	learn: 1.2582523	test: 7.6967305	best: 7.6967305 (830)	total: 1m 24s	remaining: 47.7s
831:	learn: 1.2569713	test: 7.6952875	best: 7.6952875 (831)	total: 1m 24s	remaining: 47.6s
832:	learn: 1.2558508	test: 7.6939442	best: 7.6939442 (832)	total: 1m 24s	remaining: 47.5s
833:	learn: 1.2556018	test: 7.6933353	best: 7.6933353 (833)	total: 1m 24s	remaining: 47.4s
834:	learn: 1.2546667	test: 7.6917366	best: 7.6917366 (834)	total: 1m 24s	remaining: 47.3s
835:	learn: 1.2537001	test: 7.6913590	best: 7.6913590 (835)	total: 1m 25s	remaining: 47.2s
836:	learn: 1.2534580	test: 7.6907544	best: 7.6907544 (836)	total: 1m 25s	remaining: 47.1s
837:	learn: 1.2532529	test: 7.6896400	best: 7.6896400 (837)	total: 1m 25s	remaining: 47s
838

918:	learn: 1.1721335	test: 7.5766122	best: 7.5766122 (918)	total: 1m 33s	remaining: 38.9s
919:	learn: 1.1708286	test: 7.5750519	best: 7.5750519 (919)	total: 1m 33s	remaining: 38.8s
920:	learn: 1.1700808	test: 7.5741725	best: 7.5741725 (920)	total: 1m 34s	remaining: 38.7s
921:	learn: 1.1686462	test: 7.5731009	best: 7.5731009 (921)	total: 1m 34s	remaining: 38.6s
922:	learn: 1.1683814	test: 7.5725660	best: 7.5725660 (922)	total: 1m 34s	remaining: 38.5s
923:	learn: 1.1675667	test: 7.5718363	best: 7.5718363 (923)	total: 1m 34s	remaining: 38.4s
924:	learn: 1.1662968	test: 7.5712828	best: 7.5712828 (924)	total: 1m 34s	remaining: 38.3s
925:	learn: 1.1655131	test: 7.5709816	best: 7.5709816 (925)	total: 1m 34s	remaining: 38.2s
926:	learn: 1.1643301	test: 7.5699658	best: 7.5699658 (926)	total: 1m 34s	remaining: 38.1s
927:	learn: 1.1635920	test: 7.5691091	best: 7.5691091 (927)	total: 1m 34s	remaining: 38s
928:	learn: 1.1629503	test: 7.5687166	best: 7.5687166 (928)	total: 1m 34s	remaining: 37.9s
9

1010:	learn: 1.1100624	test: 7.5136374	best: 7.5136374 (1010)	total: 1m 43s	remaining: 29.7s
1011:	learn: 1.1095791	test: 7.5130961	best: 7.5130961 (1011)	total: 1m 43s	remaining: 29.6s
1012:	learn: 1.1090350	test: 7.5128483	best: 7.5128483 (1012)	total: 1m 44s	remaining: 29.5s
1013:	learn: 1.1082653	test: 7.5123442	best: 7.5123442 (1013)	total: 1m 44s	remaining: 29.4s
1014:	learn: 1.1072674	test: 7.5115667	best: 7.5115667 (1014)	total: 1m 44s	remaining: 29.3s
1015:	learn: 1.1065533	test: 7.5106212	best: 7.5106212 (1015)	total: 1m 44s	remaining: 29.2s
1016:	learn: 1.1055810	test: 7.5098603	best: 7.5098603 (1016)	total: 1m 44s	remaining: 29.1s
1017:	learn: 1.1048832	test: 7.5089261	best: 7.5089261 (1017)	total: 1m 44s	remaining: 29s
1018:	learn: 1.1036628	test: 7.5073490	best: 7.5073490 (1018)	total: 1m 44s	remaining: 28.8s
1019:	learn: 1.1034322	test: 7.5068144	best: 7.5068144 (1019)	total: 1m 44s	remaining: 28.7s
1020:	learn: 1.1032072	test: 7.5062856	best: 7.5062856 (1020)	total: 1m 

1099:	learn: 1.0510986	test: 7.4575338	best: 7.4575338 (1099)	total: 1m 53s	remaining: 20.6s
1100:	learn: 1.0503015	test: 7.4566479	best: 7.4566479 (1100)	total: 1m 53s	remaining: 20.5s
1101:	learn: 1.0496604	test: 7.4564060	best: 7.4564060 (1101)	total: 1m 53s	remaining: 20.4s
1102:	learn: 1.0487238	test: 7.4559555	best: 7.4559555 (1102)	total: 1m 53s	remaining: 20.3s
1103:	learn: 1.0482458	test: 7.4554610	best: 7.4554610 (1103)	total: 1m 53s	remaining: 20.2s
1104:	learn: 1.0473020	test: 7.4540224	best: 7.4540224 (1104)	total: 1m 53s	remaining: 20.1s
1105:	learn: 1.0464183	test: 7.4528283	best: 7.4528283 (1105)	total: 1m 53s	remaining: 20s
1106:	learn: 1.0463981	test: 7.4527649	best: 7.4527649 (1106)	total: 1m 53s	remaining: 19.9s
1107:	learn: 1.0452577	test: 7.4523946	best: 7.4523946 (1107)	total: 1m 54s	remaining: 19.8s
1108:	learn: 1.0442786	test: 7.4520110	best: 7.4520110 (1108)	total: 1m 54s	remaining: 19.7s
1109:	learn: 1.0437203	test: 7.4513823	best: 7.4513823 (1109)	total: 1m 

1188:	learn: 0.9916997	test: 7.4104569	best: 7.4104569 (1188)	total: 2m 2s	remaining: 11.5s
1189:	learn: 0.9909623	test: 7.4103770	best: 7.4103770 (1189)	total: 2m 2s	remaining: 11.4s
1190:	learn: 0.9903419	test: 7.4094396	best: 7.4094396 (1190)	total: 2m 3s	remaining: 11.3s
1191:	learn: 0.9897052	test: 7.4096077	best: 7.4094396 (1190)	total: 2m 3s	remaining: 11.2s
1192:	learn: 0.9887255	test: 7.4093379	best: 7.4093379 (1192)	total: 2m 3s	remaining: 11.1s
1193:	learn: 0.9878922	test: 7.4090473	best: 7.4090473 (1193)	total: 2m 3s	remaining: 11s
1194:	learn: 0.9872693	test: 7.4091467	best: 7.4090473 (1193)	total: 2m 3s	remaining: 10.9s
1195:	learn: 0.9863230	test: 7.4088852	best: 7.4088852 (1195)	total: 2m 3s	remaining: 10.8s
1196:	learn: 0.9855200	test: 7.4086033	best: 7.4086033 (1196)	total: 2m 3s	remaining: 10.6s
1197:	learn: 0.9848070	test: 7.4085176	best: 7.4085176 (1197)	total: 2m 3s	remaining: 10.5s
1198:	learn: 0.9843412	test: 7.4080275	best: 7.4080275 (1198)	total: 2m 3s	remaini

1277:	learn: 0.9462510	test: 7.3787739	best: 7.3787739 (1277)	total: 2m 12s	remaining: 2.28s
1278:	learn: 0.9459070	test: 7.3786461	best: 7.3786461 (1278)	total: 2m 12s	remaining: 2.18s
1279:	learn: 0.9455227	test: 7.3781848	best: 7.3781848 (1279)	total: 2m 12s	remaining: 2.08s
1280:	learn: 0.9453124	test: 7.3781232	best: 7.3781232 (1280)	total: 2m 13s	remaining: 1.97s
1281:	learn: 0.9449909	test: 7.3779928	best: 7.3779928 (1281)	total: 2m 13s	remaining: 1.87s
1282:	learn: 0.9446003	test: 7.3779519	best: 7.3779519 (1282)	total: 2m 13s	remaining: 1.76s
1283:	learn: 0.9443372	test: 7.3777602	best: 7.3777602 (1283)	total: 2m 13s	remaining: 1.66s
1284:	learn: 0.9440290	test: 7.3776536	best: 7.3776536 (1284)	total: 2m 13s	remaining: 1.56s
1285:	learn: 0.9437761	test: 7.3771813	best: 7.3771813 (1285)	total: 2m 13s	remaining: 1.45s
1286:	learn: 0.9431629	test: 7.3767768	best: 7.3767768 (1286)	total: 2m 13s	remaining: 1.35s
1287:	learn: 0.9428089	test: 7.3767433	best: 7.3767433 (1287)	total: 2

<catboost.core.CatBoostRegressor at 0x7fd41d343ca0>

# 결과 예측

In [146]:
y_test = cb_model.predict(temp)
a = pd.DataFrame(y_test, columns=['매수고객수'])
result = pd.concat([temp, a], axis = 1)

out = result[['그룹번호', '종목번호', '매수고객수']].sort_values(by=['그룹번호','매수고객수'], 
                                                    ascending = [True, False])

result = out.groupby(['그룹번호']).head(3)
result

Unnamed: 0,그룹번호,종목번호,매수고객수
144,0,17,27.849807
192,0,18,20.619120
0,0,3,16.904124
4656,0,123,16.525934
5808,0,114,16.025241
...,...,...,...
5401,47,93,1.841982
2617,47,96,1.815905
1993,47,5,1.651924
3577,47,8,1.576841


In [149]:
group_name = df_dict2
stock_name = df_dict
#result
output = pd.DataFrame(columns=['그룹번호','Top1','Top2','Top3'])

top_list = []
top = []
count = 0
for idx, col in result.iterrows():
    if count % 3 == 0:
        count +=1
        top.append(group_name.iloc[col['그룹번호'].astype(int), 1])
        top.append(stock_name.iloc[col['종목번호'].astype(int), 1])
    elif count % 3 == 1:
        count +=1
        top.append(stock_name.iloc[col['종목번호'].astype(int), 1])
    else:
        top.append(stock_name.iloc[col['종목번호'].astype(int), 1])
        top_list.append(top)
        #clear
        top = []
        count = 0
        
for idx, top in enumerate(top_list):
    output.loc[idx] = top

output.to_csv("result.csv", mode='w', header=True, index=False)
output

Unnamed: 0,그룹번호,Top1,Top2,Top3
0,MAD01,A005930,A005935,A000660
1,MAD02,A272210,A004370,A005930
2,MAD03,A228760,A272210,A257370
3,MAD04,A005930,A272210,A174880
4,MAD05,A272210,A218410,A174880
5,MAD06,A272210,A218410,A174880
6,MAD07,A272210,A004370,A096770
7,MAD08,A272210,A007570,A228760
8,MAD09,A272210,A004370,A174880
9,MAD10,A272210,A015760,A004370
