In [None]:
!pip install finance-datareader

In [None]:
import numpy as np 
import pandas as pd 
import os
import FinanceDataReader as fdr
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler




import warnings
warnings.filterwarnings(action='ignore')

In [None]:
stock_name = ['KOSPI200','SNP500','NASDAQ','KOSDAQ','DOWJONES']
stock_code = ['KS200','US500','IXIC','KQ11','DJI']

In [None]:
for i in range(len(stock_code)):
    start_date = '2011-01-01'
    end_date = '2021-11-05'
    stock_name[i] = fdr.DataReader(stock_code[i],start=start_date,end=end_date)[['Close','Open','High','Low','Volume','Change']]
    

In [None]:
KOSPI200 = stock_name[0]
SNP500 = stock_name[1]
NASDAQ = stock_name[2]
KOSDAQ = stock_name[3]
US30 = stock_name[4]

In [None]:
print('KOSPI200\n',KOSPI200)
print('S&P500\n',SNP500)
print('NASDAQ\n',NASDAQ)
print('KOSDAQ\n',KOSDAQ)
print('US30\n',US30)

In [None]:
corr_data = KOSPI200[['Close', 'Open', 'High', 'Low', 'Volume', 'Change']]
colormap = plt.cm.PuBu  
f , ax = plt.subplots(figsize = (10,5)) 
plt.title('Correlation of Numeric Features with Close Indices',y=1,size=18)
sns.heatmap(corr_data.corr(),square = True, linewidths = 0.2,
            cmap = colormap, linecolor = "white", vmax=1)

In [None]:
cols = corr_data.corr().nlargest(6,'Close')['Close'].index 
print(cols) 
cm = np.corrcoef(KOSPI200[cols].values.T) 
f , ax = plt.subplots(figsize = (15,8)) 
heatmap = sns.heatmap(cm, vmax=1, linewidths=0.1,square=True,annot=True,cmap=colormap, linecolor="white",xticklabels = cols.values ,yticklabels = cols.values)

In [None]:
KOSPI200.keys()

# Moving Average

In [None]:
def MA(data):
    data['MA5'] = data['Close'].rolling(window=5).mean()
    data['MA10'] = data['Close'].rolling(window=10).mean()
    data['MA20'] = data['Close'].rolling(window=20).mean()
    data['MA60'] = data['Close'].rolling(window=60).mean()
    data['MA120'] = data['Close'].rolling(window=120).mean()
    data['VMA5'] = data['Volume'].rolling(window=5).mean()
    data['VMA10'] = data['Volume'].rolling(window=10).mean()
    data['VMA20'] = data['Volume'].rolling(window=20).mean()
    data['VMA60'] = data['Volume'].rolling(window=60).mean()
    data['VMA120'] = data['Volume'].rolling(window=120).mean()
    data['std'] = data['Close'].rolling(window=20).std()
    data['20_Upper'] = data['MA20'] + 2 * data['std']
    data['20_Lower'] = data['MA20'] - 2 * data['std']
    data.drop('std',axis=1,inplace=True)
    data.dropna(inplace=True)
    data['Close_MA5'] = data['Close'] / data['MA5']
    data['Close_MA10'] = data['Close'] / data['MA10']
    data['Close_MA20'] = data['Close'] / data['MA20']
    data['Close_MA60'] = data['Close'] / data['MA60']
    data['Close_MA120'] = data['Close'] / data['MA120']
    data['Volume_MA5'] = data['Volume'] / data['VMA5']
    data['Volume_MA10'] = data['Volume'] / data['VMA10']
    data['Volume_MA20'] = data['Volume'] / data['VMA20']
    data['Volume_MA60'] = data['Volume'] / data['VMA60']
    data['Volume_MA120'] = data['Volume'] / data['VMA120']
    return data

In [None]:
KOSPI200 = MA(KOSPI200)
SNP500 = MA(SNP500)
KOSDAQ = MA(KOSDAQ)
US30 = MA(US30)
NASDAQ = MA(NASDAQ)

In [None]:
corr_data = KOSPI200[['Close', 'Open', 'High', 'Low', 'Volume', 'Change', 'MA5', 'MA10',
       'MA20', 'MA60', 'MA120', 'VMA5', 'VMA10', 'VMA20', 'VMA60', 'VMA120',
       '20_Upper', '20_Lower', 'Close_MA5', 'Close_MA10', 'Close_MA20',
       'Close_MA60', 'Close_MA120', 'Volume_MA5', 'Volume_MA10', 'Volume_MA20',
       'Volume_MA60', 'Volume_MA120']]
colormap = plt.cm.PuBu  
f , ax = plt.subplots(figsize = (14,12)) 
plt.title('Correlation of Numeric Features with Sale Price',y=1,size=18)
sns.heatmap(corr_data.corr(),square = True, linewidths = 0.2,
            cmap = colormap, linecolor = "white", vmax=1)

In [None]:
k = len(KOSPI200.keys())-1
cols = corr_data.corr().nlargest(k,'Close')['Close'].index 
print(cols) 
cm = np.corrcoef(KOSPI200[cols].values.T) 
f , ax = plt.subplots(figsize = (20,20)) 
heatmap = sns.heatmap(cm, vmax=1, linewidths=0.1,square=True,annot=True,cmap=colormap, linecolor="white",xticklabels = cols.values ,yticklabels = cols.values)
plt.savefig('heatmap.png')

In [None]:
US30.isna().sum()

In [None]:
def plot_MA(data):
    %matplotlib inline
    plt.figure(figsize=(30,10))
    plt.rc('font',size=20)
    plt.plot(data['Close'],label='Close')
    plt.plot(data['MA5'],label='MA5',)
    plt.plot(data['MA10'],label='MA10')
    plt.plot(data['MA20'],label='MA20')
    plt.plot(data['MA60'],label='MA60')
    plt.plot(data['MA120'],label='MA120')
    plt.xlabel('Year')
    plt.ylabel('Indics')
    plt.legend(fontsize=30)
    plt.grid(True)
    return plt.show()
    

In [None]:
def plot_VMA(data):
    %matplotlib inline
    plt.figure(figsize=(30,10))
    plt.rc('font',size=20)
    plt.plot(data['Volume'],label='Volume')
    plt.plot(data['VMA5'],label='VMA5',)
    plt.plot(data['VMA10'],label='VMA10')
    plt.plot(data['VMA20'],label='VMA20')
    plt.plot(data['VMA60'],label='VMA60')
    plt.plot(data['VMA120'],label='VMA120')
    plt.ylabel('Volume')
    plt.xlabel('year')
    plt.legend(fontsize=30)
    plt.grid(True)
    return plt.show()
    

In [None]:
def bol_plot(data):
    plt.figure(figsize=(30,10))
    plt.rc('font',size=20)
    plt.plot(data['Close'],color='black',label='Close_price')
    plt.plot(data['20_Upper'],ls='--',color='red',label='Uper Band')
    plt.plot(data['20_Lower'],ls='--',color='blue',label='Lower Band')
    plt.xlabel('Date')
    plt.ylabel('Indics')
    plt.legend()
    plt.grid(True)
    return plt.show()

In [None]:
plot_MA(KOSPI200[2000:])

In [None]:
plot_VMA(KOSPI200[2000:])

In [None]:
bol_plot(KOSPI200[2000:])

# ML Dataset 

In [None]:
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.model_selection import train_test_split,StratifiedKFold,KFold
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
from sklearn.linear_model import Lasso
from xgboost import XGBRegressor

In [None]:
KOSPI200.keys()

In [None]:
def ML_data_preprocessing(data):
    data.drop(['Volume','Volume_MA5','Volume_MA10','Volume_MA20','Volume_MA60','Volume_MA120',
               'VMA5','VMA10','VMA20','VMA60','VMA120','Change'
              ],axis=1,inplace=True)
    ML_dataname_train, ML_dataname_test = data[:-5],data[-5:]
    ML_dataname_train = ML_dataname_train.reset_index(drop=True)
    ML_dataname_train_target = ML_dataname_train['Close'] 
    ML_dataname_train = ML_dataname_train.drop('Close',axis=1)
    ML_dataname_test = ML_dataname_test.reset_index(drop=True)
    ML_dataname_test_target = ML_dataname_test['Close']
    ML_dataname_test = ML_dataname_test.drop('Close',axis=1)
    return ML_dataname_train, ML_dataname_train_target,ML_dataname_test, ML_dataname_test_target
    

In [None]:
ML_KOSPI200_train,ML_KOSPI200_target,ML_KOSPI200_test,ML_KOSPI200_test_target = ML_data_preprocessing(KOSPI200)
ML_SNP500_train,ML_SNP500_target,ML_SNP500_test,ML_SNP500_test_target = ML_data_preprocessing(SNP500)
ML_NASDAQ_train,ML_NASDAQ_target,ML_NASDAQ_test,ML_NASDAQ_test_target = ML_data_preprocessing(NASDAQ)
ML_KOSDAQ_train,ML_KOSDAQ_target,ML_KOSDAQ_test,ML_KOSDAQ_test_target = ML_data_preprocessing(KOSDAQ)
ML_US30_train,ML_US30_target,ML_US30_test,ML_US30_test_target = ML_data_preprocessing(US30)

In [None]:
print(ML_KOSPI200_train.shape,ML_KOSPI200_target.shape,ML_KOSPI200_test.shape)

In [None]:
ML_US30_train.isna().sum()

# ML models

In [None]:
def LGB_Model(x_train,y_train,x_test):
    LGB_models = []
    LGB_result = []
    fold = 1
    kf = KFold(n_splits=5)
    for train_idx,valid_idx in kf.split(x_train,y_train):
        train_x = x_train.values[train_idx]
        train_y = y_train.values[train_idx]
        val_x = x_train.values[valid_idx]
        val_y = y_train.values[valid_idx]
        print(f"------------{fold}----------")
        lgb_model = LGBMRegressor(objective='regression',
                             max_depth=300,
                             n_estimators=8000,
                             learning_rate=0.005,
                             num_leaves=31)
        lgb_model.fit(train_x,train_y,
                 eval_set=[(val_x,val_y)],
                 eval_metric=['rmse'],
                 early_stopping_rounds=500,
                 verbose=500
                 )
        LGB_models.append(lgb_model)
        fold += 1
    for model in LGB_models:
        pred = model.predict(x_test.values)
        LGB_result.append(pred)
    LGB_pred = (LGB_result[0]+LGB_result[1]+LGB_result[2]+LGB_result[3])/4
    return LGB_pred

In [None]:
print('-------KOSPI200----------')
LGB_KOSPI200_pred = LGB_Model(ML_KOSPI200_train,ML_KOSPI200_target,ML_KOSPI200_test)
print('-------S&P500------------')
LGB_SNP_500_pred = LGB_Model(ML_SNP500_train,ML_SNP500_target,ML_SNP500_test)
print('-------KOSDAQ------------')
LGB_KOSDAQ_pred = LGB_Model(ML_KOSDAQ_train,ML_KOSDAQ_target,ML_KOSDAQ_test)
print('----------NASDAQ----------')
LGB_NASDAQ_pred = LGB_Model(ML_NASDAQ_train,ML_NASDAQ_target,ML_NASDAQ_test)
print('----------US30----------')
LGB_US30_pred = LGB_Model(ML_US30_train,ML_US30_target,ML_US30_test)
print('END Training...')

In [None]:
def RF_model(x_train,y_train,x_test):
    rf_models = []
    rf_result = []
    kf = KFold(n_splits=5) 
    fold = 1
    for train_idx,valid_idx in kf.split(x_train,y_train):
        train_x = x_train.values[train_idx] 
        train_y = y_train.values[train_idx] 
        val_x = x_train.values[valid_idx]  
        val_y = y_train.values[valid_idx]
        print(f"--------------------fold:{fold}--------------------")
        rf_model = RandomForestRegressor(n_estimators=300,
                                         random_state=0) 
        rf_model.fit(train_x,train_y)  
        prediction = rf_model.predict(val_x) 
        score = np.sqrt(mean_squared_error(prediction,val_y)) 
        print(f"RMSE:{score}")  
        rf_models.append(rf_model) 
        fold += 1
    for model in rf_models:
        pred = model.predict(x_test.values)
        rf_result.append(pred)
    rf_pred = (rf_result[0]+rf_result[1]+rf_result[2]+rf_result[3])/4
    return rf_pred

In [None]:
print('------KOSPI200---------')
rf_KOSPI200_pred = RF_model(ML_KOSPI200_train,ML_KOSPI200_target,ML_KOSPI200_test)
print('-------S&P500----------')
rf_SNP500_pred = RF_model(ML_SNP500_train,ML_SNP500_target,ML_SNP500_test)
print('-------KOSDAQ----------')
rf_KOSDAQ_pred = RF_model(ML_KOSDAQ_train,ML_KOSDAQ_target,ML_KOSDAQ_test)
print('-------NASDAQ----------')
rf_NASDAQ_pred = RF_model(ML_NASDAQ_train,ML_NASDAQ_target,ML_NASDAQ_test)
print('-------US30----------')
rf_US30_pred = RF_model(ML_US30_train,ML_US30_target,ML_US30_test)
print('End Training')

In [None]:
def Make_final_pred(pred_1,pred_2):
    final_pred = (pred_1+pred_2)/2
    return final_pred

In [None]:
KOSPI200_final_pred = Make_final_pred(LGB_KOSPI200_pred,rf_KOSPI200_pred)
SNP500_final_pred = Make_final_pred(LGB_SNP_500_pred,rf_SNP500_pred)
KOSDAQ_final_pred = Make_final_pred(LGB_KOSDAQ_pred,rf_KOSDAQ_pred)
NASDAQ_final_pred = Make_final_pred(LGB_NASDAQ_pred,rf_NASDAQ_pred)
US30_final_pred = Make_final_pred(LGB_US30_pred,rf_US30_pred)

# Compare to real_Indices

In [None]:
for i in range(len(stock_code)):
    start_date = '2021-11-08'
    end_date = '2021-11-12'
    stock_name[i] = fdr.DataReader(stock_code[i],start=start_date,end=end_date)[['Close']]


In [None]:
real_KOSPI200 = stock_name[0]
real_SNP500 = stock_name[1]
real_NASDAQ = stock_name[2]
real_KOSDAQ = stock_name[3]
real_US30 = stock_name[4]

In [None]:
def make_datetime(pred):
    pred_start = pd.to_datetime('2021-11-08')
    pred_end = pd.to_datetime('2021-11-12')
    name_pred = pd.DataFrame()
    name_pred.index = pd.date_range(pred_start,pred_end,freq='B')
    name_pred['pred_Close_Indices'] = pred
    return name_pred

In [None]:
ML_KOSPI200_pred = make_datetime(KOSPI200_final_pred)
ML_SNP500_pred = make_datetime(SNP500_final_pred)
ML_KOSDAQ_pred = make_datetime(KOSDAQ_final_pred)
ML_NASDAQ_pred = make_datetime(NASDAQ_final_pred)
ML_US30_pred = make_datetime(US30_final_pred)

In [None]:
def plot_result(label,pred,real):
    plt.figure(figsize=(17,10))
    plt.rc('font',size=20)
    plt.title(label)
    plt.plot(pred,ls='-.',label='prediction_Indices',color='black')
    plt.plot(real,label='real_Indices',color='red')
    plt.legend(loc='upper right')
    plt.show()

In [None]:
plot_result('KOSPI200',ML_KOSPI200_pred,real_KOSPI200)

In [None]:
plot_result('S&P500',ML_SNP500_pred,real_SNP500)

In [None]:
plot_result('KOSDAQ',ML_KOSDAQ_pred,real_KOSDAQ)

In [None]:
plot_result('NASDAQ',ML_NASDAQ_pred,real_NASDAQ)

In [None]:
plot_result('US30',ML_US30_pred,real_US30)

# Deep Learning Dataset

In [None]:
def deeplr_data(data):
    name_train, name_test = data[:-10], data[-10:]
    name_train = name_train.reset_index(drop=True)
    name_target = name_train['Close']
    name_train.drop('Close',axis=1,inplace=True)
    name_test.reset_index(drop=True)
    name_test_target = name_test['Close']
    name_test.drop('Close',axis=1,inplace=True)
    return name_train ,name_target, name_test, name_test_target
    
    

In [None]:
KOSPI200_train,KOSPI200_target,KOSPI200_test, KOSPI200_test_target = deeplr_data(KOSPI200)
SNP500_train, SNP500_target, SNP500_test, SNP500_test_target = deeplr_data(SNP500)
NASDAQ_train, NASDAQ_target, NASDAQ_test, NASDAQ_test_target = deeplr_data(NASDAQ)
KOSDAQ_train, KOSDAQ_target, KOSDAQ_test, KOSDAQ_test_target = deeplr_data(KOSDAQ)
US30_train, US30_target, US30_test, US30_test_target = deeplr_data(US30)

In [None]:
def make_dataset(data, label, window_size=5):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

In [None]:
feature = [KOSPI200.keys()]
label = ['Close']
KOSPI200_trainset , KOSPI200_target = make_dataset(KOSPI200_train,KOSPI200_target)
KOSPI200_testset, KOSPI200_test_target = make_dataset(KOSPI200_test,KOSPI200_test_target)

SNP500_trainset, SNP500_target = make_dataset(SNP500_train,SNP500_target)
SNP500_testset, SNP500_test_target = make_dataset(SNP500_test,SNP500_test_target)

KOSDAQ_trainset, KOSDAQ_target = make_dataset(KOSDAQ_train,KOSDAQ_target)
KOSDAQ_testset, KOSDAQ_test_target = make_dataset(KOSDAQ_test,KOSDAQ_test_target)

NASDAQ_trainset, NASDAQ_target = make_dataset(NASDAQ_train,NASDAQ_target)
NASDAQ_testset, NASDAQ_test_target = make_dataset(NASDAQ_test,NASDAQ_test_target)

US30_trainset, US30_target = make_dataset(US30_train,US30_target)
US30_testset, US30_test_target = make_dataset(US30_test,US30_test_target)


In [None]:
print(KOSDAQ_trainset.shape,KOSDAQ_target.shape)
print(KOSDAQ_testset.shape)

In [None]:
import torch
from torchvision import datasets
from torch.utils.data import DataLoader,Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self,data,label):
        self.data = data
        self.label = label
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        data = torch.tensor(self.data[idx],dtype=torch.float64)
        label = torch.tensor(self.label[idx],dtype=torch.int64)
        return data,label

# DNN Dataset

In [None]:
# KOSPI200_train_set = CustomDataset(KOSPI200_train.to_numpy(),KOSPI200_target.to_numpy())
# KOSPI200_test_set = CustomDataset(KOSPI200_test.to_numpy(),KOSPI200_test_target.to_numpy())

# SNP500_train_set = CustomDataset(SNP500_train.to_numpy(),SNP500_target.to_numpy())
# SNP500_test_set = CustomDataset(SNP500_test.to_numpy(),SNP500_test_target.to_numpy())

# KOSDAQ_train_set = CustomDataset(KOSDAQ_train.to_numpy(),KOSDAQ_target.to_numpy())
# KOSDAQ_test_set = CustomDataset(KOSDAQ_test.to_numpy(),KOSDAQ_test_target.to_numpy())

# NASDAQ_train_set = CustomDataset(NASDAQ_train.to_numpy(),NASDAQ_target.to_numpy())
# NASDAQ_test_set = CustomDataset(NASDAQ_test.to_numpy(),NASDAQ_test_target.to_numpy())

# US30_train_set = CustomDataset(US30_train.to_numpy(),US30_target.to_numpy())
# US30_test_set = CustomDataset(US30_test.to_numpy(),US30_test_target.to_numpy())

# CNN2RNN Dataset

In [None]:
KOSPI200_train_set = CustomDataset(KOSPI200_trainset,KOSPI200_target)
KOSPI200_test_set = CustomDataset(KOSPI200_testset,KOSPI200_test_target)

SNP500_train_set = CustomDataset(SNP500_trainset,SNP500_target)
SNP500_test_set = CustomDataset(SNP500_testset,SNP500_test_target)

KOSDAQ_train_set = CustomDataset(KOSDAQ_trainset,KOSDAQ_target)
KOSDAQ_test_set = CustomDataset(KOSDAQ_testset,KOSDAQ_test_target)

NASDAQ_train_set = CustomDataset(NASDAQ_trainset,NASDAQ_target)
NASDAQ_test_set = CustomDataset(NASDAQ_testset,NASDAQ_test_target)

US30_train_set = CustomDataset(US30_trainset,US30_target)
US30_test_set = CustomDataset(US30_testset,US30_test_target)

# Deep Learning Model

In [None]:
import torch
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import tensorflow
from tensorflow.keras.layers import LSTM,Dense,Dropout,Flatten,GRU,LeakyReLU,Conv2D,MaxPool1D
from tensorflow.keras.models import Sequential
from sklearn.model_selection import KFold
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning import Trainer

In [None]:
# class DNN(nn.Module):
#     def __init__(self):
#         super(DNN,self).__init__()
#         self.fc1 = nn.Linear(15,256)
#         self.bn1 = nn.BatchNorm1d(256)
#         self.fc2 = nn.Linear(256,512)
#         self.bn2 = nn.BatchNorm1d(512)
#         self.fc3 = nn.Linear(512,1024)
#         self.bn3 = nn.BatchNorm1d(1024)
#         self.fc4 = nn.Linear(1024,512)
#         self.bn4 = nn.BatchNorm1d(512)
#         self.fc5 = nn.Linear(512,1)
#         self.dropout = nn.Dropout(0.5)
#         self.swish = nn.Hardswish()
#     def forward(self,x):
#         x = self.dropout(self.swish(self.bn1(self.fc1(x))))
#         x = self.dropout(self.swish(self.bn2(self.fc2(x))))
#         x = self.dropout(self.swish(self.bn3(self.fc3(x))))
#         x = self.dropout(self.swish(self.bn4(self.fc4(x))))
#         output = self.fc5(x)
#         return output
# model = DNN()
# model

In [None]:
class CNN2GRU(nn.Module):
    def __init__(self):
        super(CNN2GRU,self).__init__()
        self.conv1 = nn.Conv1d(5,64,3)
        self.bnn1 = nn.BatchNorm1d(64)
        self.conv2 = nn.Conv1d(64,128,3)
        self.bnn2 = nn.BatchNorm1d(128)
        self.conv3 = nn.Conv1d(128,256,3)
        self.bnn3 = nn.BatchNorm1d(256)
        self.gru1 = nn.GRU(9,256,3)
        self.gru2 = nn.GRU(256,256,3)
        self.gru3 = nn.GRU(256,9,3)
        self.fc1 = nn.Linear(4608,1024)
        self.fc2 = nn.Linear(1024,512)
        self.fc3 = nn.Linear(512,256)
        self.fc4 = nn.Linear(256,1)
        self.flatten = nn.Flatten()
        self.gelu = nn.GELU()
        self.lrelu = nn.LeakyReLU(0.1)
        self.dropout = nn.Dropout(0.25)
        self.swish = nn.Hardswish()
    def forward(self,x):
        x = self.bnn1(self.swish(self.conv1(x)))
        x = self.bnn2(self.swish(self.conv2(x)))
        conv_out = self.bnn3(self.swish(self.conv3(x)))
        x,_ = self.gru1(conv_out)
        x = self.gelu(x)
        x,_ = self.gru2(x)
        x = self.gelu(x)
        x,_ = self.gru3(x)
        x = self.gelu(x)
        x = torch.cat([conv_out,x],dim=1)
        x = x.flatten(1)
        x = self.dropout(self.swish(self.fc1(x)))
        x = self.dropout(self.swish(self.fc2(x)))
        x = self.dropout(self.swish(self.fc3(x)))
        output = self.fc4(x)
        output = output.view(-1)
        return output
model = CNN2GRU()
model

# Train Session

In [None]:
def train(model,loader,device,optimizer):
        model.train()
        train_loss = []
        tqdm_train = tqdm(loader)
        for data,label in tqdm_train:
            data,label = data.to(device).float(),label.to(device)
            optimizer.zero_grad()
            logits = model(data)
            label = label.to(torch.float32)
            
            loss = MAPELoss(logits,label)
            loss.backward()
            optimizer.step()

            loss_np = loss.detach().cpu().numpy()
            train_loss.append(loss_np)
        train_loss = np.mean(train_loss)
    
        return train_loss

In [None]:
def validation(model,loader,device):
    model.eval()
    valid_loss = []
    tqdm_valid = tqdm(loader)
    with torch.no_grad():
        for data,label in tqdm_valid:
            
            data,label = data.to(device).float(),label.to(device)
                
            logits = model(data)
            loss = MAPELoss(logits,label)
            loss_np = loss.detach().cpu().numpy()
            valid_loss.append(loss_np)
        valid_loss = np.mean(valid_loss)
    return valid_loss
        

In [None]:
def MAPELoss(output,target):
    return torch.mean(torch.abs((target - output)/ target))

In [None]:
def CNN2RNN_run(stock_name,model,data,lr,epochs):
    
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
#     device = ('cuda' if torch.cuda.is_available else 'cpu')
    kf = KFold(n_splits=3,shuffle=True)
    for fold ,(train_idx,valid_idx) in enumerate(kf.split(data)):
        print(f'Fold:{fold+1}')
        train_sampler_kfold = SubsetRandomSampler(train_idx)
        valid_sampler_kfold = SubsetRandomSampler(valid_idx)
        train_loader = torch.utils.data.DataLoader(data,batch_size=64,
                                                   num_workers=0,sampler=train_sampler_kfold,drop_last=True)
        valid_loader = torch.utils.data.DataLoader(data,batch_size=64,
                                              num_workers=0,sampler=valid_sampler_kfold,drop_last=True)
        valid_loss_min = np.Inf
        name_train_loss = []
        name_valid_loss = []
        model = CNN2GRU()
        model.to(device)
        optimizer = torch.optim.Adamax(model.parameters(),lr=lr)
        for e in range(epochs):
            train_loss = train(model,train_loader,device,optimizer)
            valid_loss = validation(model,valid_loader,device)
            
            name_train_loss.append(train_loss)
            name_valid_loss.append(valid_loss)
            
            print('Epoch:{}\t Training Loss:{:.3f}\t Validation Loss:{:3f}'.format(
                e+1,train_loss,valid_loss))
            if valid_loss < valid_loss_min:
                print('Validation Loss decreased:{:3f} --> {:3f} So saving the model'.format(
                    valid_loss_min,valid_loss))
                valid_loss_min = valid_loss
                torch.save(model,f'{stock_name}_CNN2GRU_{fold}.pt')
                patience = 0
            else:
                patience += 1
                print(f'count patience:{patience} of 10')
                if patience > 10:
                    print(f"Meet Early Stopper so End Training...\n And best Valid_MSE:{valid_loss_min}")
                    break
    return name_train_loss, name_valid_loss
    

In [None]:
# def DNN_run(stock_name,model,data,lr,epochs):
    
#     if torch.cuda.is_available():
#         device = 'cuda'
#     else:
#         device = 'cpu'
#     device = ('cuda' if torch.cuda.is_available else 'cpu')
#     kf = KFold(n_splits=5,shuffle=True)
#     for fold ,(train_idx,valid_idx) in enumerate(kf.split(data)):
#         print(f'Fold:{fold+1}')
#         train_sampler_kfold = SubsetRandomSampler(train_idx)
#         valid_sampler_kfold = SubsetRandomSampler(valid_idx)
#         train_loader = torch.utils.data.DataLoader(data,batch_size=64,
#                                                    num_workers=0,sampler=train_sampler_kfold,drop_last=True)
#         valid_loader = torch.utils.data.DataLoader(data,batch_size=64,
#                                               num_workers=0,sampler=valid_sampler_kfold,drop_last=True)
#         valid_loss_min = np.Inf
#         name_train_loss = []
#         name_valid_loss = []
#         model = DNN()
#         model.to(device)
#         criterion = nn.MSELoss()
#         optimizer = torch.optim.Adamax(model.parameters(),lr=lr)
#         for e in range(epochs):
#             train_loss = train(model,train_loader,device,criterion,optimizer)
#             valid_loss = validation(model,valid_loader,device,criterion)
            
#             name_train_loss.append(train_loss)
#             name_valid_loss.append(valid_loss)
            
#             print('Epoch:{}\t Training Loss:{:.3f}\t Validation Loss:{:3f}'.format(
#                 e+1,train_loss,valid_loss))
#             if valid_loss < valid_loss_min:
#                 print('Validation Loss decreased:{:3f} --> {:3f} So saving the model'.format(
#                     valid_loss_min,valid_loss))
#                 valid_loss_min = valid_loss
#                 torch.save(model,f'{stock_name}_DNN_{fold}.pt')
#                 patience = 0
#             else:
#                 patience += 1
#                 print(f'count patience:{patience} of 10')
#                 if patience > 10:
#                     print(f"Meet Early Stopper so End Training...\n And best Valid_MSE:{valid_loss_min}")
#                     break
#     return name_train_loss, name_valid_loss
    

In [None]:
print('-------------------KOSPI200---------------------')
KOSPI200_train_loss, KOSPI200_valid_loss = CNN2RNN_run('KOSPI200',CNN2GRU(),KOSPI200_train_set,lr=0.0003,epochs=100)
print('-------------------S&P500---------------------')
SNP500_train_loss, SNP500_valid_loss = CNN2RNN_run('SNP500',CNN2GRU(),SNP500_train_set,lr=0.0003,epochs=100)
print('-------------------KOSDAQ---------------------')
KOSDAQ_train_loss, KOSDAQ_valid_loss = CNN2RNN_run('KOSDAQ',CNN2GRU(),KOSDAQ_train_set,lr=0.0003,epochs=100)
print('-------------------NASDAQ---------------------')
NASDAQ_train_loss, NASDAQ_valid_loss = CNN2RNN_run('NASDAQ',CNN2GRU(),NASDAQ_train_set,lr=0.0003,epochs=100)
print('-------------------US30---------------------')
US30_train_loss, US30_valid_loss = CNN2RNN_run('US30',CNN2GRU(),US30_train_set,lr=0.0003,epochs=100)

In [None]:
#  print('-------------------KOSPI200---------------------')
# DNN_KOSPI200_train_loss, DNN_KOSPI200_valid_loss = DNN_run('KOSPI200',DNN(),KOSPI200_train_set,lr=0.0003,epochs=200)
# print('-------------------S&P500---------------------')
# DNN_SNP500_train_loss, DNN_SNP500_valid_loss = DNN_run('SNP500',DNN(),SNP500_train_set,lr=0.0003,epochs=200)
# print('-------------------KOSDAQ---------------------')
# DNN_KOSDAQ_train_loss, DNN_KOSDAQ_valid_loss = DNN_run('KOSDAQ',DNN(),KOSDAQ_train_set,lr=0.0003,epochs=200)
# print('-------------------NASDAQ---------------------')
# DNN_NASDAQ_train_loss, DNN_NASDAQ_valid_loss = DNN_run('NASDAQ',DNN(),NASDAQ_train_set,lr=0.0003,epochs=200)
# print('-------------------US30---------------------')
# DNN_US30_train_loss, DNN_US30_valid_loss = DNN_run('US30',DNN(),US30_train_set,lr=0.0003,epochs=200)

In [None]:
def plot_loss(name,train_loss,val_loss):
    plt.figure(figsize=(20,10))
    plt.plot(train_loss,label='train_MAPE')
    plt.plot(val_loss,label='valid_MAPE')
    plt.title(name)
    plt.legend()
    plt.grid(True)
    return plt.show()

In [None]:
plot_loss('KOSPI200',KOSPI200_train_loss,KOSPI200_valid_loss)
plot_loss('SNP500',SNP500_train_loss,SNP500_valid_loss)
plot_loss('KOSDAQ',KOSDAQ_train_loss,KOSDAQ_valid_loss)
plot_loss('NASDAQ',NASDAQ_train_loss,NASDAQ_valid_loss)
plot_loss('US30',US30_train_loss,US30_valid_loss)

In [None]:
KOSPI200_test_loader = DataLoader(KOSPI200_test_set,batch_size=len(KOSPI200_test_set),num_workers=0)
SNP500_test_loader = DataLoader(SNP500_test_set,batch_size=len(SNP500_test_set),num_workers=0)
KOSDAQ_test_loader = DataLoader(KOSDAQ_test_set,batch_size=len(KOSDAQ_test_set),num_workers=0)
NASDAQ_test_loader = DataLoader(NASDAQ_test_set,batch_size=len(NASDAQ_test_set),num_workers=0)
US30_test_loader = DataLoader(US30_test_set,batch_size=len(US30_test_set),num_workers=0)

In [None]:
def eval_model(stock_name,model_name,loader):
    ensemble_pred = []
    for i in range(3):
        device = ('cuda' if torch.cuda.is_available() else 'cpu')
        model = torch.load(f'./{stock_name}_{model_name}_{i}.pt')
        model.to(device)
        model.eval()
        with torch.no_grad():
            for data,label in loader:
                data,label = data.to(device).float(),label.to(device)  
                pred = model(data).detach().cpu().numpy()
                ensemble_pred.append(pred)
    pred = np.mean(ensemble_pred,axis=0)
    return pred
        
    
        

# CNN2GRU_eval

In [None]:
deep_KOSPI200_pred = eval_model('KOSPI200','CNN2GRU',KOSPI200_test_loader)
deep_SNP500_pred = eval_model('SNP500','CNN2GRU',SNP500_test_loader)
deep_KOSDAQ_pred = eval_model('KOSDAQ','CNN2GRU',KOSDAQ_test_loader)
deep_NASDAQ_pred = eval_model('NASDAQ','CNN2GRU',NASDAQ_test_loader)
deep_US30_pred = eval_model('US30','CNN2GRU',US30_test_loader)

# DNN_eval

In [None]:
# deep_KOSPI200_pred = eval_model('KOSPI200','DNN',KOSPI200_test_loader)
# deep_SNP500_pred = eval_model('SNP500','DNN',SNP500_test_loader)
# deep_KOSDAQ_pred = eval_model('KOSDAQ','DNN',KOSDAQ_test_loader)
# deep_NASDAQ_pred = eval_model('NASDAQ','DNN',NASDAQ_test_loader)
# deep_US30_pred = eval_model('US30','DNN',US30_test_loader)

# Ensemble

In [None]:
Deep_KOSPI200_pred = make_datetime(deep_KOSPI200_pred)
Deep_SNP500_pred = make_datetime(deep_SNP500_pred)
Deep_KOSDAQ_pred = make_datetime(deep_KOSDAQ_pred)
Deep_NASDAQ_pred = make_datetime(deep_NASDAQ_pred)
Deep_US30_pred = make_datetime(deep_US30_pred)

In [None]:
KOSPI200_ensemble = (Deep_KOSPI200_pred+ML_KOSPI200_pred)/2
SNP500_ensemble = (Deep_SNP500_pred+ML_SNP500_pred)/2
KOSDAQ_ensemble = (Deep_KOSDAQ_pred+ML_KOSDAQ_pred)/2
NASDAQ_ensemble = (Deep_NASDAQ_pred+ML_NASDAQ_pred)/2
US30_ensemble = (Deep_US30_pred+ML_US30_pred)/2

In [None]:
KOSPI200_ensemble.to_csv('week2_KOSPI200.csv')
KOSDAQ_ensemble.to_csv('week2_KOSDAQ.csv')
NASDAQ_ensemble.to_csv('week2_NASDAQ.csv')
SNP500_ensemble.to_csv('week2_S&P500.csv')
US30_ensemble.to_csv('week2_US30.csv')

# Plot result

In [None]:
def final_plot_result(real_indices,pred):
    plt.figure(figsize=(20,10))
    plt.rc('font',size=20)
    plt.plot(pred,ls='-.',label='prediction_Indices',color='black')
    plt.plot(real_indices,label='real_Indices',color='red')
    plt.legend()
    return plt.show()

In [None]:
final_plot_result(real_KOSPI200,KOSPI200_ensemble)

In [None]:
final_plot_result(real_SNP500,SNP500_ensemble)

In [None]:
final_plot_result(real_KOSDAQ,KOSDAQ_ensemble)

In [None]:
final_plot_result(real_NASDAQ,NASDAQ_ensemble)

In [None]:
final_plot_result(real_US30,US30_ensemble)