# Download a dataset

In [1]:
import csv
from datetime import datetime
import numpy as np

with open('TSLA.csv', 'r') as file_data:
    csv_reader = csv.reader(file_data)
    list_data = []
    for row in csv_reader:
        try:
            list_data.append([datetime.strptime(row[0], '%Y-%m-%d'),
                              float(row[1]),
                              float(row[2]),
                              float(row[3]),
                              float(row[6]),
                              float(row[4]),
                             ])
        except:
            print(row)

print(len(list_data))
print(list_data[0])

np.save('tesla.npy', np.array(list_data))

['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
1259
[datetime.datetime(2015, 10, 8, 0, 0), 46.015999, 46.144001, 44.262001, 30666000.0, 45.344002]


# LSTM

In [2]:
import tensorflow as tf
import sklearn.model_selection as sklms

from datetime import datetime

import numpy as np
import matplotlib.pyplot as plt

In [3]:
# sequence 데이터는 에폭 개념이 명확치 않다

num_steps = 28
ratio_train = 0.7
rate_learning = 1e-3

num_iters = 10000
num_displays = 100
size_batch = 64
str_file = 'tesla.npy'

In [4]:
data = np.load(str_file, allow_pickle=True)
print(data.shape)

num_data = data.shape[0]
dim_data = data.shape[1] - 1 #첫 칼럼 datetime 뺐음
mean_closed_price = np.mean(data[:, -1])
std_closed_price = np.std(data[:, -1])

for ind in range(1, dim_data + 1):
    data[:, ind] = (data[:, ind] - np.mean(data[:, ind])) / np.std(data[:, ind])
    
data_train = data[:int(num_data * ratio_train)]
data_test = data[int(num_data * ratio_train):]

print(data.shape)
print(data_train.shape)
print(data_test.shape)

(1259, 6)
(1259, 6)
(881, 6)
(378, 6)


In [13]:
def sample_data(data, size_batch, num_steps):
    num_data = data.shape[0]
    ind_start = np.random.choice(num_data - num_steps - size_batch + 1, 1)[0]
    
    list_dates = []
    list_samples = []
    list_targets = []
    
    for ind_batch in range(0, size_batch):
        cur_index = ind_start + ind_batch
        
        list_dates.append(data[cur_index + 1: cur_index + num_steps + 1, 0]) # 날짜 정보
        list_samples.append(data[cur_index:cur_index + num_steps, 1:]) #
        list_targets.append(data[cur_index + 1:cur_index + num_steps + 1, -1]) # 1차원 종가
    
    samples = np.array(list_samples).astype(np.float32)
    targets = np.array(list_targets).astype(np.float32)

    return list_dates, samples, targets

def recover_prices(prices, mean, std):
    return prices * std + mean

def plot_prices(dates, by, outputs):
    fig = plt.figure(figsize=(18, 6))
    ax = fig.gca()
    ax.plot_date(dates, by, '-')
    ax.plot_date(dates, outputs, '-')
    plt.show()

In [8]:
class RNN(tf.keras.Model):
    def __init__(self):
        super(RNN, self).__init__()
        
        self.layer_fc_pre = tf.keras.layers.Dense(128)
        self.layer_lstm = tf.keras.layers.LSTM(256, return_sequences = True)
        self.layer_fc_post = tf.keras.layers.Dense(1) #아웃풋 예측
        
    def call(self, inputs):
        outputs = inputs
        #(batch size, num_Steps, 5)
        
        outputs = self.layer_fc_pre(outputs)
        #(batch size, num_Steps, 128)
        outputs = self.layer_lstm(outputs)
        #(batch size, num_Steps, 256) # 유닛 수, if return_sequences == False: outputs(many to one) = (batchsize, 256)
        outputs = self.layer_fc_post(outputs)
        #(batch size, num_Steps, 1)
        
        return outputs

In [9]:
model = RNN()
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=rate_learning)
metrics = tf.keras.metrics.MeanAbsoluteError

model.build((None, num_steps, dim_data)) #구체화 하기 힘든 값 None -> batch size
model.summary()

Model: "rnn_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              multiple                  768       
_________________________________________________________________
lstm_1 (LSTM)                multiple                  394240    
_________________________________________________________________
dense_2 (Dense)              multiple                  257       
Total params: 395,265
Trainable params: 395,265
Non-trainable params: 0
_________________________________________________________________


In [19]:
@tf.function
def step_train(X_batch, by_batch):
    with tf.GradientTape() as tape:
        preds_ = model(X_batch)
        loss_ = loss(by_batch, preds_)
        
    grads_ = tape.gradient(loss_, model.trainable_weights)
    optimizer.apply_gradients(zip(grads_, model.trainable_weights))
    
    return loss_

@tf.function
def step_test(X_batch, by_batch):
    preds_ = model(X_batch)
    loss_ = loss(by_batch, preds_)
    
    return loss_, preds_

In [20]:
for ind_iter in range(0, num_iters):
    _, X_batch, by_batch = sample_data(data_train, size_batch, num_steps)
    loss_batch = step_train(X_batch, by_batch)
    
    if ind_iter == 0 or ind_iter % num_displays == (num_displays - 1):
        print('{} iteration: Mean squared error {:.6f}'.format(ind_iter + 1, loss_batch))

1 iteration: Mean squared error 0.061205
100 iteration: Mean squared error 0.001459
200 iteration: Mean squared error 0.000395
300 iteration: Mean squared error 0.000721
400 iteration: Mean squared error 0.000176
500 iteration: Mean squared error 0.000215
600 iteration: Mean squared error 0.001532
700 iteration: Mean squared error 0.000382
800 iteration: Mean squared error 0.000428
900 iteration: Mean squared error 0.000586
1000 iteration: Mean squared error 0.000409
1100 iteration: Mean squared error 0.000350
1200 iteration: Mean squared error 0.000274
1300 iteration: Mean squared error 0.001878
1400 iteration: Mean squared error 0.001137
1500 iteration: Mean squared error 0.000548
1600 iteration: Mean squared error 0.001039
1700 iteration: Mean squared error 0.000333
1800 iteration: Mean squared error 0.001084
1900 iteration: Mean squared error 0.000234
2000 iteration: Mean squared error 0.000213
2100 iteration: Mean squared error 0.000250
2200 iteration: Mean squared error 0.000585


In [21]:
dates_, X_, by_ = sample_data(data_test, 1, num_steps)
loss_, preds_ = step_test(X_, by_)
metrics.update_state(by_, preds_)
print('Mean squared error {:.4f} Mean absolute error {:.4f}'.format(loss_.numpy(), metrics.result().numpy()))
metrics.reset_states()

by_ = recover_prices(by_, mean_closed_price, std_closed_price)
preds_ = recover_prices(preds_, mean_closed_price, std_closed_price)

plot_prices(dates_[0], by_[0], preds_[0])

TypeError: update_state() missing 1 required positional argument: 'y_pred'