In [None]:
# Intallation packages before use Jupyter

# !pip install pandas
# !pip install -U scikit-learn
# !pip install keras
# !pip install matplotlib
# !pip install kaleido
# !pip install plotly

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
# from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import load_model
from math import sqrt
import matplotlib
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# be able to save images on server
matplotlib.use('Agg')

# Not show warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# !pip install -U kaleido # w Google Colab wymagany Runtime restart po instalacji (Runtime -> Restart Runtime)
# import kaleido #required
# kaleido.__version__ #0.2.1

print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


In [None]:
# Zip
# importing the "tarfile" module
import tarfile # type: ignore

# open file
file = tarfile.open('../data/2_150x9.tar.gz')

# extracting a specific file
file.extractall(path='../data/2_150x9/')

file.close()

In [82]:
# Data generator
def data(time):
    # Timestep 
    # timestepsPerSample = 20
    timestepsPerSample = time

    # Timesteps in input data
    timestepsPerSampleWholeData = 150

    # Replace 'your_file.csv' with the actual file path
    file_features = '../data/2_150x9/2_150x9f.csv'
    file_labels = '../data/2_150x9/2_150x9l.csv'

    # Read the .csv file and create an array
    data_strings = np.genfromtxt(file_features, delimiter=';')
    labels_strings = np.genfromtxt(file_labels,delimiter=';')

    # Wycinanie wybranych kolumn
    #  0    1         2             3        4       5        6    7        8
    # RSI, VWAP, HeikenResult, closeHeiken, CMF, Stochastic, OBV, QQE, TrendFilter
    # data_s = data_strings[:,[0,2,3,4,5,6,7,8]]
    data_s = data_strings[:,[0,4,5,8]]
    num_features = data_s.shape[1]
    # print(data_strings[:3])
    # print(data_s[:3])
    
    # Convert from strings to float and int
    X = data_s.astype(float).reshape((-1,timestepsPerSampleWholeData,num_features))
    Y = labels_strings.astype(float).reshape((-1,6))
    # print(X.shape)
    # print(Y.shape)

    # Modification from imported to new size X[samples,timesteps]
    X_mod = X[1000:1066,timestepsPerSampleWholeData - timestepsPerSample:]
    Y_mod = Y[1000:1066]
    # X_mod = X[1000:1020,timestepsPerSampleWholeData - timestepsPerSample:]
    # Y_mod = Y[1000:1020]
    timestepsPerSampleWholeData = X_mod.shape[1]
    # print(X_mod.shape)
    # print(Y_mod.shape)
    # print(X_mod[:1])

    # Splitting
    x_train, x_test, Y_train, Y_test = train_test_split(X_mod,Y_mod, test_size=0.15, shuffle=False)
    y_train = Y_train[:, 0:2]
    y_test = Y_test[:, 0:2]
    # print(y_train[:2])
    # print(y_test[:2])

    # Summarize
    num_samples = x_train.shape[0]
    test_samples = x_test.shape[0]
    print(f"Train shape: {x_train.shape}")
    print(f"Timesteps: {timestepsPerSampleWholeData}")
    print(f"Num Samples: {num_samples}")
    print(f"Test Samples: {test_samples}")
    print(f"Num features: {num_features}")
    return x_train, x_test, y_train, y_test, Y_test

In [83]:
# fit lstm model
from tabnanny import verbose


def fit_lstmModel(x_train, y_train, x_test, y_test, batch_size, nb_epoch, neurons, denseType, dropout, model):
    if model == 0:
        LstmLayer = LSTM(
        units=neurons,
        activation="tanh",
        recurrent_activation="sigmoid",
        use_bias=True, # true if cuDNN
        kernel_initializer="glorot_uniform",
        recurrent_initializer="orthogonal",
        bias_initializer="zeros",
        unit_forget_bias=True,
        kernel_regularizer=None,
        recurrent_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        recurrent_constraint=None,
        bias_constraint=None,
        dropout=dropout, # !important parameter for optimization => 0 if cuDNN
        recurrent_dropout=0.0,
        seed=None,
        return_sequences=False,
        return_state=False,
        go_backwards=False,
        stateful=False,
        unroll=False, # false if cuDNN
        input_shape=(x_train.shape[1],x_train.shape[2]),
        # input_dim=(x_train.shape[1])
        )

        if denseType == 0:
            model = Sequential() # initializing model
            # input layer and LSTM layer with 50 neurons
            model.add(LstmLayer)
            # model.add(Dense(100, activation='relu'))
            # model.add(Dense(100, activation='relu'))
            # model.add(Dense(20, activation='relu'))
            # outpute layer with sigmoid activation
            model.add(Dense(y_train.shape[1], activation='sigmoid'))
        elif denseType == 1:
            model = Sequential() # initializing model
            # input layer and LSTM layer with 50 neurons
            model.add(LstmLayer)
            model.add(Dense(100, activation='relu'))
            # model.add(Dense(100, activation='relu'))
            # model.add(Dense(20, activation='relu'))
            # outpute layer with sigmoid activation
            model.add(Dense(y_train.shape[1], activation='sigmoid'))
        elif denseType == 2:
            model = Sequential() # initializing model
            # input layer and LSTM layer with 50 neurons
            model.add(LstmLayer)
            model.add(Dense(100, activation='relu'))
            model.add(Dense(100, activation='relu'))
            # model.add(Dense(20, activation='relu'))
            # outpute layer with sigmoid activation
            model.add(Dense(y_train.shape[1], activation='sigmoid'))
        elif denseType == 3:
            model = Sequential() # initializing model
            # input layer and LSTM layer with 50 neurons
            model.add(LstmLayer)
            model.add(Dense(100, activation='relu'))
            model.add(Dense(100, activation='relu'))
            model.add(Dense(20, activation='relu'))
            # outpute layer with sigmoid activation
            model.add(Dense(y_train.shape[1], activation='sigmoid'))
    elif model == 1:
        model = Sequential()

        # Assuming `data` is your input matrix with shape (samples, time_steps, features)
        model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],x_train.shape[2])))
        model.add(Dropout(0.2))

        model.add(LSTM(units=50, return_sequences=False))
        model.add(Dropout(0.2))

        model.add(Dense(units=25))
        model.add(Dropout(0.2))

        # Output layer for price prediction
        model.add(Dense(y_train.shape[1], activation='sigmoid'))

    
    num_samples = x_train.shape[0]
    STEPS_PER_EPOCH = num_samples/batch_size

    # lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.001,
    decay_steps=STEPS_PER_EPOCH*100,
    decay_rate=0.9,
    staircase=False)

    # Callbackks 
    # EarlyStopping
    checkpoint = ModelCheckpoint(
            filepath='../saved_models/last_saved_model.keras',
            save_best_only=True,
            monitor='val_loss',
            verbose=1
            )
    # checkpoint = ModelCheckpoint('model.h5', save_best_only=True, save_format='h5', verbose=1)
    earlyStoppingCallback = EarlyStopping(monitor='val_loss',
                                             start_from_epoch=10,
                                             restore_best_weights=True,
                                             verbose=0,
                                             patience=5)

    def get_optimizer():
         # return tf.keras.optimizers.Adam(learning_rate=0.001)
        return tf.keras.optimizers.Adam(learning_rate= lr_schedule) # type: ignore

    optimizer = get_optimizer()

    # defining loss function, optimizer, metrics and then compiling model
    model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])
    model.summary()

    # fit model
    history = model.fit(x_train, y_train, epochs=nb_epoch, batch_size=batch_size, 
                        shuffle=False, validation_data=(x_test, y_test), callbacks=[checkpoint, earlyStoppingCallback], verbose=2) # type: ignore
    return model, history

In [84]:
x_train, x_test, y_train, y_test, Y_test = data(150)

Train shape: (56, 150, 4)
Timesteps: 150
Num Samples: 56
Test Samples: 10
Num features: 4


In [91]:
loaded_model = load_model(filepath="../saved_models/last_saved_model.keras") 
# loaded_model.summary()
predict = loaded_model.predict(x_test)
print(predict)
predict = np.array([[1,0],[0,1],[0,1],[0,1],[0,1],[0,1],[1,0],[1,0],[1,0],[1,0]])
predict_classes = np.where(predict > 0.5, 1,0)
print(predict_classes)
concat = np.hstack((predict_classes, Y_test[:,2:]))
print(concat)
print(Y_test)
dfToProfit = pd.DataFrame(concat, columns=['Sell', 'Buy', 'Close', 'Open', 'High', 'Low'])
print(dfToProfit)
print(funcProfit(dfToProfit))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 450ms/step
[[0.11021952 0.8799596 ]
 [0.11442844 0.8766697 ]
 [0.11815286 0.87392056]
 [0.12862694 0.8651782 ]
 [0.14222674 0.85345703]
 [0.15988606 0.83797586]
 [0.17235628 0.8274266 ]
 [0.18302582 0.8183488 ]
 [0.1878275  0.8144575 ]
 [0.1906147  0.81189615]]
[[1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]]
[[ 1.    0.   83.37 83.25 83.37 83.24]
 [ 0.    1.   83.41 83.38 83.42 83.33]
 [ 0.    1.   83.35 83.42 83.43 83.32]
 [ 0.    1.   83.38 83.34 83.4  83.31]
 [ 0.    1.   83.39 83.38 83.43 83.38]
 [ 0.    1.   83.49 83.38 83.51 83.37]
 [ 1.    0.   83.5  83.49 83.52 83.46]
 [ 1.    0.   83.6  83.51 83.6  83.48]
 [ 1.    0.   83.67 83.61 83.69 83.59]
 [ 1.    0.   83.66 83.68 83.7  83.63]]
[[ 0.    1.   83.37 83.25 83.37 83.24]
 [ 0.    1.   83.41 83.38 83.42 83.33]
 [ 0.    1.   83.35 83.42 83.43 83.32]
 [ 0.    1.   83.38 83.34 83.4  83.31]
 [ 0.    1.   83.39 83.38 83.43 83.38]
 [ 0.    1.   83.49

In [92]:
print(dfToProfit)

   Sell  Buy  Close   Open   High    Low
0   1.0  0.0  83.37  83.25  83.37  83.24
1   0.0  1.0  83.41  83.38  83.42  83.33
2   0.0  1.0  83.35  83.42  83.43  83.32
3   0.0  1.0  83.38  83.34  83.40  83.31
4   0.0  1.0  83.39  83.38  83.43  83.38
5   0.0  1.0  83.49  83.38  83.51  83.37
6   1.0  0.0  83.50  83.49  83.52  83.46
7   1.0  0.0  83.60  83.51  83.60  83.48
8   1.0  0.0  83.67  83.61  83.69  83.59
9   1.0  0.0  83.66  83.68  83.70  83.63


In [89]:
# Profit
import unittest
def funcProfit(df):
    # predict_classes = np.where(predict > 0.5, 1,0)
    # concat = np.hstack((predict_classes, Y_test[:,2:]))
    # df = pd.DataFrame(concat, columns=['Sell', 'Buy', 'Price'])

    # Absolute difference prices
    spread = 0.03
    tp = 0.20
    sum = 0
    sell = 0
    buy = 0
    for i in range(0, len(df)):
        if (i-1) >= 0:
            if df.at[i,'Sell'] > 0.8 and df.at[i-1,'Sell'] < 0.2:                  # S 0 1 
                if buy > 0:
                    sell = df.at[i,'Open'] - spread
                    sum += df.at[i,'Open'] - buy
                    buy = 0
                elif sell == 0:
                    sell = df.at[i,'Open'] - spread
            elif df.at[i,'Sell'] > 0.8 and df.at[i-1,'Sell'] > 0.8 and sell > 0:   # S 1 1 sell >0
                if df.at[i-1,'High'] >= (sell + tp):
                    sum -= tp
                    sell = 0
                if df.at[i-1,'Low'] <= (sell - tp):
                    sum += tp
                    sell = 0
# --------------------------------------------------------------------------------------------------------

            elif df.at[i,'Buy'] > 0.8 and df.at[i-1,'Buy'] < 0.2:               # B 0 1 
                if sell > 0:
                    buy = df.at[i,'Open'] + spread
                    sum += sell - df.at[i,'Open']
                    sell = 0
                elif buy == 0:
                    buy = df.at[i,'Open'] + spread
            elif df.at[i,'Buy'] > 0.8 and df.at[i-1,'Buy'] > 0.8 and buy > 0:   # B 1 1 buy >0
                if df.at[i-1,'Low'] <= (buy - tp):
                    sum -= tp
                    buy = 0
                if df.at[i-1,'High'] >= (buy + tp):
                    sum += tp
                    buy = 0

    return round(sum, 2)

print(funcProfit(dfToProfit))

def test_profit():
    test = unittest.TestCase()

    test.assertEqual(funcProfit(dfToProfit), -0.02)

test_profit()

0.2


AssertionError: 0.2 != -0.02

In [80]:
# Experiment
def experiment(repeats, epochs, neurons, time, denseType, dropout, model):
    # Data gen
    x_train, x_test, y_train, y_test, Y_test = data(time)

    # Evaluate declaration
    accuracy = list()
    profit = list()
    metrics = list()

    # Repeats
    for r in range(repeats):
        
        # Print running
        print(f"Repeat {r} running...")

        # Batch size
        batch_size = 64
        # model
        model, history = fit_lstmModel(x_train, y_train, x_test, y_test, batch_size, epochs, neurons, denseType, dropout, model)

        # forecast the entire training dataset to build up state for forecasting
        predict = model.predict(x_test, batch_size=batch_size)
        test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0) # type: ignore

        metric = pd.DataFrame(history.history)
        metric['epoch'] = history.epoch

        metrics.append(metric)
        accuracy.append(test_acc*100)
        profit.append(funcProfit(predict, Y_test))
    
    return accuracy, profit, metrics

In [79]:
# Output
def plotsOut(d, metrics):
    fig = make_subplots(rows=1, cols=2, subplot_titles=('Accuracy', 'Loss'))

    # Layout - set size
    fig.update_layout(
        autosize=True,
        width=1000
    )

    for idx, m in enumerate(metrics):
        fig.add_trace(go.Scatter(x=m['epoch'], y=m['accuracy'], name=f'accuracy{str(idx)}', line_color='#0000ff', showlegend=False), row=1, col=1)
        fig.add_trace(go.Scatter(x=m['epoch'], y=m['loss'], name=f'loss{str(idx)}', line_color='#0000ff', showlegend=False), row=1, col=2)
        fig.add_trace(go.Scatter(x=m['epoch'], y=m['val_accuracy'], name=f'val_accuracy{str(idx)}', line_color='#EF8260', showlegend=False), row=1, col=1)
        fig.add_trace(go.Scatter(x=m['epoch'], y=m['val_loss'], name=f'val_loss{str(idx)}', line_color='#EF8260', showlegend=False), row=1, col=2)

    fig.update_xaxes(title_text='epochs')
    fig.update_yaxes(title_text='')
    # fig.update_layout(width=1000, title='Accuracy and Loss')
    fig.write_image(file=f"compare_models_{str(d)}.jpg", engine="kaleido")

In [81]:
# Run
from numpy import dtype

repeats = 1
results_acc = DataFrame(dtype='float')
results_profit = DataFrame(dtype='float')
metrics = list()

# vary training epochs
model = [0]
dropout = 0.2
denseType = 0
neuronsLstm = 150
timeStep = 150
epochs = 5
for mod in model:
 print(f"{mod} Model test start...")
 results_acc[str(mod)], results_profit[str(mod)], metrics = experiment(repeats, epochs, neuronsLstm, timeStep, denseType, dropout, mod)
#  plotsOut(mod, metrics)

# summarize results
print(results_acc.describe())
print(results_profit.describe())

0 Model test start...
Train shape: (850, 150, 4)
Timesteps: 150
Num Samples: 850
Test Samples: 150
Num features: 4
Repeat 0 running...


  super().__init__(**kwargs)


Epoch 1/5

Epoch 1: val_loss improved from inf to 0.66093, saving model to ../saved_models/last_saved_model.keras
14/14 - 11s - 806ms/step - accuracy: 0.5353 - loss: 0.7034 - val_accuracy: 0.7133 - val_loss: 0.6609
Epoch 2/5

Epoch 2: val_loss improved from 0.66093 to 0.63639, saving model to ../saved_models/last_saved_model.keras
14/14 - 3s - 248ms/step - accuracy: 0.6071 - loss: 0.6635 - val_accuracy: 0.7133 - val_loss: 0.6364
Epoch 3/5

Epoch 3: val_loss improved from 0.63639 to 0.60948, saving model to ../saved_models/last_saved_model.keras
14/14 - 5s - 363ms/step - accuracy: 0.6024 - loss: 0.6479 - val_accuracy: 0.7067 - val_loss: 0.6095
Epoch 4/5

Epoch 4: val_loss improved from 0.60948 to 0.57446, saving model to ../saved_models/last_saved_model.keras
14/14 - 3s - 243ms/step - accuracy: 0.6365 - loss: 0.6323 - val_accuracy: 0.7667 - val_loss: 0.5745
Epoch 5/5

Epoch 5: val_loss improved from 0.57446 to 0.53508, saving model to ../saved_models/last_saved_model.keras
14/14 - 6s - 

TypeError: funcProfit() takes 1 positional argument but 2 were given

In [None]:
# summarize results
print(results_acc.describe())
print(results_profit.describe())

In [None]:
%matplotlib inline
print(results_acc.describe())

# 2 subplots in one row
fig, ax = plt.subplots(1, 2, figsize=(10, 6))

# Generate boxplots
results_acc.boxplot(ax=ax[0])
results_profit.boxplot(ax=ax[1])

# Set labels and titles
ax[0].set_title('Accuracy')
ax[0].set_xlabel('Dropout')
ax[0].set_ylabel('Accuracy')
ax[1].set_title('Profit')
ax[1].set_xlabel('Dropout')
ax[1].set_ylabel('Profit')

# Save to .png and show plot
plt.savefig(f'boxplot_model.png')
plt.show()