In [2]:
import math
from termcolor import colored
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, scale
import joblib
from keras.models import Sequential
from keras.layers import Dense, LSTM, Masking
from keras.layers import Dropout
from keras.models import load_model
import matplotlib.pyplot as plt
from prepare_data import get_data
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error

In [3]:
def get_dataset(train_data):
    """ Fills len(train_data) long arrays with 60 records 
        in each position
        example = [[1...60], [1...60], ...]
    """
    x_train = []
    for i in range(60, len(train_data)):
        x_train.append(train_data[i-60:i, :-1])
    x_train = np.array(x_train)
    return np.array(x_train)

In [32]:
def create_model(shape_examples, shape_features):
    print(colored('Generating LSTM model.', 'yellow'))
    print(colored(f'Shape {shape_examples}x{shape_features}', 'cyan'))

    model = Sequential()
    model.add(LSTM(
        units=50,
        return_sequences=True,
        input_shape=(shape_examples, shape_features)
    ))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.25))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.25))
    model.add(LSTM(units=50))
    model.add(Dropout(0.25))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=["accuracy"])
    return model


In [33]:
try:
    X = np.load('resources/X_3.npy')
    Y = np.load('resources/Y_3.npy')
    scaler = joblib.load("resources/scaler_3.save")
    print(colored('Loaded storaged data.', 'green'))
except FileNotFoundError:
    try:
        df = pd.read_csv("resources/test.csv")
    except FileNotFoundError:
        print(colored('Creating missing file.', 'red'))
        get_data()
        df = pd.read_csv("resources/test.csv")

    df["trend"] = df["AO_1_500"].shift(-1)
    df = df.iloc[:, 2:]
    print(colored('Setting up parameters.', 'yellow'))
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df.values)  # shape (70718, 8)
    joblib.dump(scaler, "resources/scaler_3.save")
    X = get_dataset(scaled_data)
    Y = np.array(scaled_data[60:len(scaled_data), -1])
    np.save("resources/X_3.npy", X)
    np.save("resources/Y_3.npy", Y)


[32mLoaded storaged data.[0m


In [39]:
df

Unnamed: 0,High,Open,Close,Low,AO_1_500,SMA_500,RSI_14,trend
0,2879.20,2857.20,2863.70,2836.40,190.64730,2667.79160,35.895928,207.03220
1,2898.70,2863.40,2890.30,2851.80,207.03220,2668.88040,42.009444,236.54520
2,2923.70,2890.10,2906.20,2888.10,236.54520,2670.07100,45.363631,237.81690
3,2926.90,2906.10,2906.40,2889.90,237.81690,2671.35600,45.406406,237.53820
4,2911.70,2910.30,2907.30,2907.10,237.53820,2672.64940,45.612755,251.17420
...,...,...,...,...,...,...,...,...
70206,11801.41,11799.07,11784.47,11778.38,41.79979,11748.49470,50.233712,32.85821
70207,11787.33,11784.44,11776.22,11773.37,32.85821,11747.88614,48.030822,28.94011
70208,11787.55,11775.93,11786.29,11764.16,28.94011,11747.35282,50.863282,40.45993
70209,11799.50,11786.17,11779.67,11774.19,40.45993,11746.79568,48.973581,42.15309


In [34]:
print(colored(f'X shape: {X.shape}', 'cyan'))
print(colored(f'Y shape: {Y.shape}', 'cyan'))

training_data_len = math.ceil(len(Y) * .8)
x_train = X[:training_data_len]
y_train = Y[:training_data_len]
x_train = np.array(x_train)
y_train = np.array(y_train)

print(colored(f'X_train shape: {x_train.shape}', 'yellow'))
print(colored(f'Y_train shape: {y_train.shape}', 'yellow'))

[36mX shape: (70151, 60, 7)[0m
[36mY shape: (70151,)[0m
[33mX_train shape: (56121, 60, 7)[0m
[33mY_train shape: (56121,)[0m


In [35]:
try:
    model = load_model('resources/my_model_3.h5')
    print(colored('Model loaded successfully', 'green'))
except IOError:
    print(colored('Training...', 'yellow'))
    model = create_model(X.shape[1], X.shape[-1])
    history = model.fit(x_train, y_train, batch_size=64, epochs=10)
    model.save('resources/my_model_3.h5')
    hist_df = pd.DataFrame(history.history)
    with open("resources/history.csv", mode='w') as f:
        hist_df.to_csv(f)


[33mTraining...[0m
[33mGenerating LSTM model.[0m
[36mShape 60x7[0m
Epoch 1/10


2022-07-21 15:39:10.369065: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 94283280 exceeds 10% of free system memory.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [36]:
train_acc = model.evaluate(x_train, y_train, verbose=0)
train_acc

2022-07-21 16:57:24.849168: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 94283280 exceeds 10% of free system memory.


[0.00014471950998995453, 0.0]

In [40]:
sample = np.array([x_train[0]])
print(sample)
print(y_train[0])
prediction = model.predict(sample)
print(prediction)
# print("train: ", mean_squared_error(y_train, prediction))


[[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   6.93441449e-01 0.00000000e+00 3.53214540e-01]
  [1.40408582e-03 4.46417000e-04 1.91623737e-03 1.10887496e-03
   7.00029497e-01 7.95122799e-05 4.22081137e-01]
  [3.20419584e-03 2.36889021e-03 3.06165746e-03 3.72265165e-03
   7.11896098e-01 1.66458753e-04 4.59864882e-01]
  [3.43460992e-03 3.52093408e-03 3.07606526e-03 3.85226041e-03
   7.12407424e-01 2.60299018e-04 4.60346727e-01]
  [2.34014303e-03 3.82334559e-03 3.14090036e-03 5.09074413e-03
   7.12295364e-01 3.54752712e-04 4.62671173e-01]
  [5.14111422e-03 3.60013709e-03 4.42319454e-03 4.44990081e-03
   7.17778133e-01 4.54975575e-04 5.08317959e-01]
  [4.75229046e-03 4.87458562e-03 4.43760234e-03 5.41476604e-03
   7.18812163e-01 5.58995865e-04 5.08828684e-01]
  [5.30672435e-03 4.91058700e-03 5.58302243e-03 4.66591542e-03
   7.17691243e-01 6.64958684e-04 5.49387412e-01]
  [6.61720444e-03 6.06263086e-03 7.43442470e-03 7.56051109e-03
   7.28814679e-01 7.81846409e-04 6.076693