In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 한글 폰트 사용을 위해서 세팅
from matplotlib import font_manager, rc
font_path = "C:/Windows/Fonts/malgun.ttf"
font = font_manager.FontProperties(fname=font_path).get_name()
rc('font', family=font)

import warnings 
warnings.filterwarnings('ignore')
import os 
import re
import FinanceDataReader as fdr
import time

pd.set_option('display.max.colwidth', 70)

In [4]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report, f1_score, roc_auc_score ,accuracy_score, precision_score, recall_score, confusion_matrix
import joblib 
from sklearn.preprocessing import MinMaxScaler

rfc = RandomForestClassifier()
xgb = XGBClassifier()
cat = CatBoostClassifier()
models = [rfc,xgb,cat]

# 함수

In [5]:
df_kospi = pd.read_csv('./data/recent_kospi_list.csv',index_col=0)
# 코드 반환
def corp_code(corp_name):
    code = df_kospi[df_kospi['Name']==corp_name]['Symbol'].iloc[0]
    code = str(code).zfill(6)
    return code

# 주가데이터
def stock_price(code,bgn_date= '2016-01-01',end_date= '2022-03-31'):

    df_p = fdr.DataReader(code,bgn_date,end_date)
    df_p = df_p.reset_index()
    df_p = df_p.rename(columns = {'Date': '날짜'})
    df_p = df_p.set_index('날짜',drop=True)
    
    return df_p

# merge
def merge(df_count,df_p):
    df_merge = pd.merge(df_count,df_p['Close'],left_index=True,right_index=True, how='right')
    return df_merge

from sklearn.preprocessing import MinMaxScaler

def mscaler(df):

    df.index = pd.DatetimeIndex(df.index)
    col_list = df.columns
    scaler = MinMaxScaler()
    df_scaled = df.copy()
    df_scaled[col_list] = scaler.fit_transform(df[col_list])
    
    return df_scaled

def add_target(df_merge,window_size, period_rate):

    earning_lst = []
    for i in range(len(df_merge)-window_size):
        
        earning_rate = (df_merge.iloc[i+window_size,-1]- df_merge.iloc[i,-1])/df_merge.iloc[i,-1]

        if earning_rate >= period_rate:
            earning_lst.append(1)
        else:
            earning_lst.append(0)

    df_model = df_merge.iloc[:-window_size,:]
    df_model['target'] = earning_lst
    
    return df_model

def feature_visualization(save_path,num):
    
    model = joblib.load(save_path)
    # 배열형태로 반환
    ft_importance_values = model.feature_importances_

    # 정렬과 시각화를 쉽게 하기 위해 series 전환
    ft_series = pd.Series(ft_importance_values, index = x_test.columns)
    ft_top20 = ft_series.sort_values(ascending=False)[:num]

    # 시각화
    plt.figure(figsize=(8,6))
    plt.title('Feature Importance Top 20')
    sns.barplot(x=ft_top20, y=ft_top20.index)
    plt.show()
    
    return ft_top20

# 모델결과 보기

In [8]:
result_7_5 = pd.read_csv('./data/model_result/machine_model2_7일_0.05.csv',index_col=0)
result_7_5

Unnamed: 0,회사이름,모델이름,accuracy,precision,recall,roc_auc,모델주소
0,LG이노텍,RandomForestClassifier,0.53913,0.666667,0.140351,0.608742,./data/machine_model2_7일_0.05/LG이노텍_RandomForestClassifier.pkl
1,LG이노텍,XGBClassifier,0.513043,0.666667,0.035088,0.57441,./data/machine_model2_7일_0.05/LG이노텍_XGBClassifier.pkl
2,LG이노텍,CatBoostClassifier,0.521739,0.75,0.052632,0.557623,./data/machine_model2_7일_0.05/LG이노텍_CatBoostClassifier.pkl
3,S-Oil,RandomForestClassifier,0.817391,0.75,0.130435,0.579395,./data/machine_model2_7일_0.05/S-Oil_RandomForestClassifier.pkl
4,S-Oil,CatBoostClassifier,0.808696,0.6,0.130435,0.512287,./data/machine_model2_7일_0.05/S-Oil_CatBoostClassifier.pkl
5,SKC,XGBClassifier,0.791304,0.571429,0.16,0.586889,./data/machine_model2_7일_0.05/SKC_XGBClassifier.pkl
6,STX,RandomForestClassifier,0.773913,0.666667,0.074074,0.436448,./data/machine_model2_7일_0.05/STX_RandomForestClassifier.pkl
7,TCC스틸,RandomForestClassifier,0.652174,0.666667,0.139535,0.541667,./data/machine_model2_7일_0.05/TCC스틸_RandomForestClassifier.pkl
8,TCC스틸,XGBClassifier,0.643478,0.6,0.139535,0.558947,./data/machine_model2_7일_0.05/TCC스틸_XGBClassifier.pkl
9,TCC스틸,CatBoostClassifier,0.643478,0.625,0.116279,0.5927,./data/machine_model2_7일_0.05/TCC스틸_CatBoostClassifier.pkl


## 7일 10%

In [6]:
result_7_10 = pd.read_csv('./data/model_result/machine_model2_7일_0.1.csv',index_col=0)
result_7_10

Unnamed: 0,회사이름,모델이름,accuracy,precision,recall,roc_auc,모델주소
0,대유에이텍,CatBoostClassifier,0.965217,0.666667,0.4,0.508182,./data/machine_model2_7일_0.1/대유에이텍_CatBoostClassifier.pkl
1,동양,RandomForestClassifier,0.947826,0.666667,0.285714,0.564153,./data/machine_model2_7일_0.1/동양_RandomForestClassifier.pkl
2,일진머티리얼즈,CatBoostClassifier,0.913043,0.666667,0.181818,0.597028,./data/machine_model2_7일_0.1/일진머티리얼즈_CatBoostClassifier.pkl
3,진원생명과학,RandomForestClassifier,0.843478,0.6,0.157895,0.593202,./data/machine_model2_7일_0.1/진원생명과학_RandomForestClassifier.pkl
4,화신,RandomForestClassifier,0.93913,0.666667,0.25,0.754673,./data/machine_model2_7일_0.1/화신_RandomForestClassifier.pkl


## 7일 15%

In [7]:
result_7_15 = pd.read_csv('./data/model_result/machine_model2_7일_0.15.csv',index_col=0)
result_7_15

Unnamed: 0,회사이름,모델이름,accuracy,precision,recall,roc_auc,모델주소


## 30일 5%

In [10]:
result_30_5 = pd.read_csv('./data/model_result/machine_model2_30일_0.05.csv',index_col=0)
result_30_5

Unnamed: 0,회사이름,모델이름,accuracy,precision,recall,roc_auc,모델주소
0,AJ네트웍스,RandomForestClassifier,0.717391,0.800000,0.137931,0.568966,./data/machine_model2_30일_0.05/AJ네트웍스_RandomForestClassifier.pkl
1,AJ네트웍스,CatBoostClassifier,0.717391,0.800000,0.137931,0.582649,./data/machine_model2_30일_0.05/AJ네트웍스_CatBoostClassifier.pkl
2,DB하이텍,RandomForestClassifier,0.434783,0.600000,0.214286,0.464782,./data/machine_model2_30일_0.05/DB하이텍_RandomForestClassifier.pkl
3,DB하이텍,CatBoostClassifier,0.413043,0.583333,0.125000,0.425843,./data/machine_model2_30일_0.05/DB하이텍_CatBoostClassifier.pkl
4,LG디스플레이,RandomForestClassifier,0.489130,0.529412,0.187500,0.545691,./data/machine_model2_30일_0.05/LG디스플레이_RandomForestClassifier.pkl
...,...,...,...,...,...,...,...
80,한미글로벌,CatBoostClassifier,0.467391,0.666667,0.040000,0.502857,./data/machine_model2_30일_0.05/한미글로벌_CatBoostClassifier.pkl
81,한미반도체,XGBClassifier,0.597826,0.666667,0.102564,0.523948,./data/machine_model2_30일_0.05/한미반도체_XGBClassifier.pkl
82,한미반도체,CatBoostClassifier,0.586957,0.555556,0.128205,0.444122,./data/machine_model2_30일_0.05/한미반도체_CatBoostClassifier.pkl
83,한화에어로스페이스,XGBClassifier,0.489130,0.600000,0.062500,0.702888,./data/machine_model2_30일_0.05/한화에어로스페이스_XGBClassifier.pkl


## 30일 10%

In [11]:
result_30_10 = pd.read_csv('./data/model_result/machine_model2_30일_0.1.csv',index_col=0)
result_30_10

Unnamed: 0,회사이름,모델이름,accuracy,precision,recall,roc_auc,모델주소
0,GS건설,XGBClassifier,0.771739,0.571429,0.181818,0.624675,./data/machine_model2_30일_0.1/GS건설_XGBClassifier.pkl
1,LG이노텍,RandomForestClassifier,0.402174,0.692308,0.15,0.511458,./data/machine_model2_30일_0.1/LG이노텍_RandomForestClassifier.pkl
2,SGC에너지,RandomForestClassifier,0.73913,0.545455,0.24,0.565373,./data/machine_model2_30일_0.1/SGC에너지_RandomForestClassifier.pkl
3,SK하이닉스,CatBoostClassifier,0.608696,0.6,0.081081,0.497543,./data/machine_model2_30일_0.1/SK하이닉스_CatBoostClassifier.pkl
4,SPC삼립,RandomForestClassifier,0.652174,0.8,0.114286,0.561153,./data/machine_model2_30일_0.1/SPC삼립_RandomForestClassifier.pkl
5,SPC삼립,CatBoostClassifier,0.652174,0.8,0.114286,0.554386,./data/machine_model2_30일_0.1/SPC삼립_CatBoostClassifier.pkl
6,고려아연,XGBClassifier,0.847826,0.666667,0.133333,0.739394,./data/machine_model2_30일_0.1/고려아연_XGBClassifier.pkl
7,국제약품,XGBClassifier,0.815217,0.666667,0.111111,0.547297,./data/machine_model2_30일_0.1/국제약품_XGBClassifier.pkl
8,모나미,CatBoostClassifier,0.945652,0.666667,0.333333,0.631783,./data/machine_model2_30일_0.1/모나미_CatBoostClassifier.pkl
9,무학,RandomForestClassifier,0.717391,0.571429,0.148148,0.47208,./data/machine_model2_30일_0.1/무학_RandomForestClassifier.pkl


## 30일 15%

In [12]:
result_30_15 = pd.read_csv('./data/model_result/machine_model_30일_0.15.csv',index_col=0)
result_30_15

Unnamed: 0,회사이름,모델이름,accuracy,precision,recall,roc_auc,모델주소
0,대우조선해양,RandomForestClassifier,0.858696,0.642857,0.529412,0.823137,./data/machine_model_30일_0.15/대우조선해양_RandomForestClassifier.pkl
1,대우조선해양,XGBClassifier,0.880435,0.6875,0.647059,0.861961,./data/machine_model_30일_0.15/대우조선해양_XGBClassifier.pkl
2,대우조선해양,CatBoostClassifier,0.902174,0.9,0.529412,0.841569,./data/machine_model_30일_0.15/대우조선해양_CatBoostClassifier.pkl
3,대웅,RandomForestClassifier,0.902174,0.6875,0.733333,0.937229,./data/machine_model_30일_0.15/대웅_RandomForestClassifier.pkl
4,대웅,XGBClassifier,0.923913,0.785714,0.733333,0.934199,./data/machine_model_30일_0.15/대웅_XGBClassifier.pkl
5,디아이씨,RandomForestClassifier,0.826087,0.644444,1.0,0.951834,./data/machine_model_30일_0.15/디아이씨_RandomForestClassifier.pkl
6,디아이씨,XGBClassifier,0.913043,0.783784,1.0,0.973727,./data/machine_model_30일_0.15/디아이씨_XGBClassifier.pkl
7,디아이씨,CatBoostClassifier,0.880435,0.725,1.0,0.998084,./data/machine_model_30일_0.15/디아이씨_CatBoostClassifier.pkl
8,삼성엔지니어링,XGBClassifier,0.880435,0.666667,0.428571,0.826923,./data/machine_model_30일_0.15/삼성엔지니어링_XGBClassifier.pkl
9,일진다이아,RandomForestClassifier,0.98913,0.75,1.0,0.996255,./data/machine_model_30일_0.15/일진다이아_RandomForestClassifier.pkl
