In [25]:
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter('ignore')

In [26]:
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
sns.set()
tf.compat.v1.random.set_random_seed(1234)

In [27]:
# Load and Prepare Data
df = pd.read_csv("main_data.csv")
df.fillna(0, inplace=True)
df.head()

Unnamed: 0,date,vcb_close,vnindex_close,exchange_rate,interest_rate,gdp,inflation,sp500_close,gdp_growth
0,2019-12-30,69.881,965.03,23167.0,1.3,2010887.0,1.4,3221.29,0.0
1,2019-12-31,69.267,960.99,23155.0,1.43,2010887.0,1.4,3230.78,0.0
2,2020-01-02,69.728,966.67,23150.0,2.44,1188207.0,1.23,3257.85,-0.409113
3,2020-01-03,69.037,965.14,23157.0,1.99,1188207.0,1.23,3234.85,0.0
4,2020-01-06,67.194,955.79,23167.0,1.58,1188207.0,1.23,3246.28,0.0


In [28]:
minmax = MinMaxScaler().fit(df[['vnindex_close', 'sp500_close', 'gdp_growth', 'exchange_rate','interest_rate','inflation']].astype('float32')) # Close index
df_log = minmax.transform(df[['vnindex_close', 'sp500_close', 'gdp_growth', 'exchange_rate','interest_rate','inflation']].astype('float32')) # Close index
df_log = pd.DataFrame(df_log)
df_log.head()

Unnamed: 0,0,1,2,3,4,5
0,0.351776,0.384912,0.552606,0.148048,0.143885,0.960784
1,0.347129,0.388625,0.552606,0.131897,0.159472,0.960784
2,0.353662,0.399215,0.0,0.125168,0.280576,0.905229
3,0.351903,0.390217,0.552606,0.13459,0.226619,0.905229
4,0.341147,0.394689,0.552606,0.148048,0.177458,0.905229


In [29]:
test_size = 600
simulation_size = 10

df_train = df_log.iloc[:-test_size]
df_test = df_log.iloc[-test_size:]
df.shape, df_train.shape, df_test.shape

class Model(tf.keras.Model):
    def __init__(
        self,
        size,
        size_layer,
        output_size,
        num_layers,
        forget_bias=0.1,
        learning_rate=0.001,
    ):
        super(Model, self).__init__()
        self.num_layers = num_layers
        self.lstm_layers = [tf.keras.layers.LSTM(size_layer, return_sequences=True) for _ in range(num_layers)]
        self.rnn = tf.keras.Sequential(self.lstm_layers)
        self.out = tf.keras.layers.Dense(output_size)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate)
        self.forget_bias = forget_bias

    def call(self, inputs, training=False):
        x = self.rnn(inputs)
        return self.out(x)

def calculate_accuracy(real, predict):
    return 1 - np.sqrt(np.mean(np.square((real - predict) / real))) * 100

def anchor(signal, weight):
    buffer = []
    last = signal[0]
    for i in signal:
        smoothed_val = last * weight + (1 - weight) * i
        buffer.append(smoothed_val)
        last = smoothed_val
    return buffer

((874, 9), (274, 6), (600, 6))

In [42]:
class Model(tf.keras.Model):
    def __init__(
        self,
        size,
        size_layer,
        output_size,
        num_layers,
        forget_bias=0.1,
        learning_rate=0.001,
    ):
        super(Model, self).__init__()
        self.num_layers = num_layers
        self.lstm_layers = [tf.keras.layers.LSTM(size_layer, return_sequences=True) for _ in range(num_layers)]
        self.rnn = tf.keras.Sequential(self.lstm_layers)
        self.out = tf.keras.layers.Dense(output_size)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate)
        self.forget_bias = forget_bias

    def call(self, inputs, training=False):
        x = self.rnn(inputs)
        return self.out(x)

def calculate_accuracy(real, predict):
    return 1 - np.sqrt(np.mean(np.square((real - predict) / real))) * 100

def anchor(signal, weight):
    buffer = []
    last = signal[0]
    for i in signal:
        smoothed_val = last * weight + (1 - weight) * i
        buffer.append(smoothed_val)
        last = smoothed_val
    return buffer
 
num_layers = 1
size_layer = 128
timestamp = 6
epoch = 300
dropout_rate = 0.8
future_day = test_size
learning_rate = 0.01

def forecast():
    model = Model(learning_rate, num_layers, df_log.shape[1], size_layer, df_log.shape[1], dropout_rate)
    minmax = MinMaxScaler().fit(df_log)
    df_train = minmax.transform(df_log)
    
    date_ori = pd.to_datetime(df.iloc[:, 0]).tolist()

    pbar = tqdm(range(epoch), desc='train loop')
    for i in pbar:
        init_value = np.zeros((1, num_layers * size_layer))
        total_loss, total_acc = [], []
        for k in range(0, df_train.shape[0] - 1, timestamp):
            index = min(k + timestamp, df_train.shape[0] - 1)
            batch_x = np.expand_dims(df_train[k:index, :], axis=0)
            batch_y = df_train[k + 1 : index + 1, :]
            with tf.GradientTape() as tape:
                logits = model(batch_x)
                loss = tf.reduce_mean(tf.square(batch_y - logits))
            gradients = tape.gradient(loss, model.trainable_variables)
            model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            total_loss.append(loss.numpy())
            total_acc.append(calculate_accuracy(batch_y, logits))
        pbar.set_postfix(cost=np.mean(total_loss), acc=np.mean(total_acc))
    
    future_day = test_size

    output_predict = np.zeros((df_train.shape[0] + future_day, df_train.shape[1]))
    output_predict[0] = df_train.iloc[0]
    upper_b = (df_train.shape[0] // timestamp) * timestamp
    init_value = np.zeros((1, num_layers * 2 * size_layer))

    for k in range(0, (df_train.shape[0] // timestamp) * timestamp, timestamp):
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(
                    df_train.iloc[k : k + timestamp], axis = 0
                ),
                modelnn.hidden_layer: init_value,
            },
        )
        init_value = last_state
        output_predict[k + 1 : k + timestamp + 1] = out_logits

    if upper_b != df_train.shape[0]:
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(df_train.iloc[upper_b:], axis = 0),
                modelnn.hidden_layer: init_value,
            },
        )
        output_predict[upper_b + 1 : df_train.shape[0] + 1] = out_logits
        future_day -= 1
        date_ori.append(date_ori[-1] + timedelta(days = 1))

    init_value = last_state
    
    for i in range(future_day):
        o = output_predict[-future_day - timestamp + i:-future_day + i]
        out_logits, last_state = sess.run(
            [model.rnn, model.last_state],
            feed_dict = {
                model.X: np.expand_dims(o, axis = 0),
                model.hidden_layer: init_value,
            },
        )
        init_value = last_state
        output_predict[-future_day + i] = out_logits[-1]
        date_ori.append(date_ori[-1] + timedelta(days = 1))
    
    output_predict = minmax.inverse_transform(output_predict)
    deep_future = anchor(output_predict, 0.3)
    
    return deep_future[-test_size:]

results = []
for i in range(simulation_size):
    print('simulation %d'%(i + 1))
    results.append(forecast())

simulation 1


train loop: 100%|██████████| 300/300 [15:19:14<00:00, 183.85s/it, acc=-inf, cost=0.0344]  


AttributeError: 'numpy.ndarray' object has no attribute 'iloc'

In [None]:
accuracies = [calculate_accuracy(df['Close'].iloc[-test_size:].values, r) for r in results]

plt.figure(figsize = (15, 5))
for no, r in enumerate(results):
    plt.plot(r, label = 'forecast %d'%(no + 1))
plt.plot(df['Close'].iloc[-test_size:].values, label = 'true trend', c = 'black')
plt.legend()
plt.title('average accuracy: %.4f'%(np.mean(accuracies)))
plt.show()