In [None]:
# 금가격, 국채가격, 변동성지수를 사용해 트리모델을 먼저 훈련, 
# 해당 모델에 기반해 다른 트리를 만듬 
# 이 과정을 순차적으로 계속하게 되면 편향이 적은 모델이 만들어짐.. 
# 만약 투자에 영향을 가장 많이 미치는것이 신문이나 방송등 뉴스 정보라고 생각한다면
# NLP기술을 활용해 데이터를 정제할수있음
# 핵심은 예측하고자 하는 시장의 움직임에 영향을 주거나 관련이 있다고 생각하되는 자료를 수집, 
# 가공하고 '이해를 바탕으로 한 ' 머신러닝 알고리즘을 적용해야한다는것임..

In [3]:
import warnings
warnings.filterwarnings('ignore')
import glob
import os
import datetime
import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import cross_validate
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression 
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from xgboost import plot_importance 
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.metrics import accuracy_score
from sklearn import svm 
import seaborn as sns; sns.set()

In [4]:
df = pd.read_csv('./data/ETFs_main.csv')

In [9]:
df.head()

Unnamed: 0,Dates,CLOSE_SPY,OPEN,HIGH,LOW,VOLUME,CLOSE_GLD,CLOSE_FXY,CLOSE_T10Y2Y,CLOSE_TED,CLOSE_USO,CLOSE_UUP,CLOSE_VIX,CLOSE_VWO
0,2007-02-20,146.04,145.56,146.2,144.0,56909500.0,65.31,83.51,2.3263,0.31,48.67,25.07,10.24,40.055
1,2007-02-21,145.98,145.61,146.07,145.0,63971500.0,67.28,82.9,2.3653,0.32,49.86,25.12,10.2,39.975
2,2007-02-22,145.87,146.05,146.42,145.0,79067398.0,67.15,82.46,2.3871,0.31,50.33,25.12,10.18,40.22
3,2007-02-23,145.3,145.74,145.79,145.0,71962797.0,67.72,82.78,2.3809,0.31,50.46,25.04,10.58,40.035
4,2007-02-26,145.17,145.83,145.95,145.0,69320062.0,68.1,83.08,2.3795,0.31,50.9,25.04,11.15,39.96


In [11]:
# 기술 지표 만들기
def moving_average(df, n):
    MA = pd.Series(df['CLOSE_SPY'].rolling(n, min_periods = n).mean(),name='MA_'+\
                  str(n))
    df = df.join(MA)
    return df 

def volume_moving_average(df,n):
    #거래량 이동평균 
    VMA = pd.Series(df['VOLUME'].rolling(n, min_periods=n).mean(), name='VMA_'+str(n))
    df = df.join(VMA)
    return df 

def relative_strength_index(df, n):
    """ 
    Calculate Relative Strength Index(RSI) for given data.

    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    i =0 

    UpI = [0]
    DoI = [0]
    while i+1 <=df.index[-1]:
        UpMove = df.loc[i+1,'HIGH'] - df.loc[i,'HIGH']
        DoMove = df.loc[i,'LOW'] - df.loc[i+1,'LOW']
        if UpMove > DoMove and UpMove > 0:
            UpD = UpMove
        else:
            DoD = 0 
        DoI.append(DoD)
        i = i +1 
    UpI = pd.Series(UpI)
    DoI = pd.Series(DoI)
    PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean())
    NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean())
    RSI = pd.Series(PosDI/ (PosDI + NegDI), name = 'RSI_'+str(n))
    df = df.join(RSI)
    return df 