In [88]:
import pandas as pd
import numpy as np
import math
import time

from binance.client import Client
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

from itertools import cycle
import plotly.express as px

In [89]:
class Binance:
    def __init__(self, key=None, secret=None):
        self.client = Client(api_key=key, api_secret=secret)
        self.unused = [
            'volume', 'close_time', 'quote_asset_volume',
            'number_of_trades', 'taker_buy_base_asset_volume',
            'taker_buy_quote_asset_volume', 'ignore'
        ]
        
    def get_history(self, symbol, start_time=1335227894, end_time=None, ignore_unused=True):
        endTime = str(int(pd.Timestamp.now('UTC').timestamp() * 1000))
        klines = self.client.get_historical_klines(symbol, Client.KLINE_INTERVAL_1DAY, str(start_time), endTime)
        
        df = pd.DataFrame(klines, columns = [
            'timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time',
            'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume',
            'taker_buy_quote_asset_volume', 'ignore'
        ])

        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        
        if ignore_unused:
            df.drop(columns=self.unused, inplace=True)

        return df


In [90]:
binance = Binance()
maindf = binance.get_history('BTCUSDT')

maindf['open'] = maindf['open'].apply(lambda x: float(x))
maindf['high'] = maindf['high'].apply(lambda x: float(x))
maindf['low'] = maindf['low'].apply(lambda x: float(x))
maindf['close'] = maindf['close'].apply(lambda x: float(x))

In [91]:
closedf = maindf[['timestamp', 'close']]

fig = px.line(
    closedf, 
    x = closedf.timestamp, 
    y=closedf.close, 
    labels = {
        'timestamp': 'პერიოდი', 
        'close': 'ფასიები'
    }
)

fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(
    title_text = 'ფასის ცვალებადობა დროის მიხედვით (2017 - 2023)', plot_bgcolor='white',      
    font_size = 15, 
    font_color = 'black'
)

fig.update_xaxes(showgrid = False)
fig.update_yaxes(showgrid = False)
fig.show()

In [92]:
# !pip install numpy==1.19.5

close_stock = closedf.copy()
del closedf['timestamp']

scaler = MinMaxScaler(feature_range = (0, 1))
closedf = scaler.fit_transform(
    np.array(closedf).reshape(-1,1)
)

print(closedf.shape)

(2076, 1)


In [93]:
training_size = int(len(closedf) * 0.60)
test_size     = len(closedf) - training_size
train_data    = closedf[0:training_size, :] 
test_data     = closedf[training_size:len(closedf), :1]

print(f'train_data: {train_data.shape}')
print(f'test_data: {test_data.shape}')

train_data: (1245, 1)
test_data: (831, 1)


In [94]:
def create_dataset(dataset, time_step=1):
    x, y = [], []
    
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i : (i + time_step), 0]
         
        x.append(a)
        y.append(dataset[i + time_step, 0])
        
    return np.array(x), np.array(y)

In [95]:
time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print(f'X_train: {X_train.shape}')
print(f'y_train: {y_train.shape}')
print(f'X_test: {X_test.shape}')
print(f'y_test: {y_test.shape}')

X_train: (1229, 15)
y_train: (1229,)
X_test: (815, 15)
y_test: (815,)


In [117]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print(f'X_train: {X_train.shape}')
print(f'X_test: {X_test.shape}')

X_train: (1229, 15, 1)
X_test: (815, 15, 1)


In [154]:
model = Sequential()
model.add(LSTM(10, input_shape=(15, 1), activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [155]:
history = model.fit(
    X_train,
    y_train,
    validation_data = (
        X_test,
        y_test
    ),
    
    epochs = 200,
    batch_size = 32,
    verbose = 1
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [134]:
model_name = str(time.strftime('%d_%m_%Y.h5'))
model.save(f'models/{model_name}')

In [156]:
train_predict = model.predict(X_train)
test_predict  = model.predict(X_test)

train_predict.shape, test_predict.shape

((1229, 1), (815, 1))

In [157]:
train_predict   = scaler.inverse_transform(train_predict)
test_predict    = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1, 1)) 
original_ytest  = scaler.inverse_transform(y_test.reshape(-1, 1))

In [158]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  455.3372416337963
Train data MSE:  207332.00361867418
Train data MAE:  295.95725922052986
-------------------------------------------------------------------------------------
Test data RMSE:  2293.9108510680417
Test data MSE:  5262026.9926477065
Test data MAE:  1586.6167342791414


In [159]:
print(f'Train data explained variance regression score: {explained_variance_score(original_ytrain, train_predict)}')
print(f'Test data explained variance regression score: {explained_variance_score(original_ytest, test_predict)}')

Train data explained variance regression score: 0.990768581251903
Test data explained variance regression score: 0.9760030292201267


In [160]:
print(f'Train data R2 score: {r2_score(original_ytrain, train_predict)}')
print(f'Test data R2 score: {r2_score(original_ytest, test_predict)}')

Train data R2 score: 0.9898671492615477
Test data R2 score: 0.9731134378712074


In [161]:
look_back = time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back : len(train_predict) + look_back, :] = train_predict

print(f'Train predicted data: {trainPredictPlot.shape}')

testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict) + (look_back * 2) + 1 : len(closedf) - 1, :] = test_predict

print(f'Test predicted data: {testPredictPlot.shape}')
names = cycle(['ორიგინალი დახურვა', 'დატრენინგებული დახურვა', 'პროგნოზირებული დახურვა'])


plotdf = pd.DataFrame(
    {
        'date': close_stock['timestamp'],
        'original_close': close_stock['close'],
        'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
        'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()
    }
)

fig = px.line(
    plotdf,
    x = plotdf['date'], 
    y = [
        plotdf['original_close'],
        plotdf['train_predicted_close'],
        plotdf['test_predicted_close']
    ],
    
    labels = {
        'value': 'ფასი',
        'date': 'პერიოდი'
    }
)

fig.update_layout(title_text='ორგინალის და პროგნოზირებულის შედარება',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='')

fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data: (2076, 1)
Test predicted data: (2076, 1)


In [162]:
x_input    = test_data[len(test_data) - time_step:].reshape(1, -1)
temp_input = list(x_input)
temp_input = temp_input[0].tolist()
 
lst_output = []
n_steps = time_step
i = 0
pred_days = 30

while(i < pred_days):
    
    if len(temp_input) > time_step:
        x_input = np.array(temp_input[1:])
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        temp_input = temp_input[1:]
        lst_output.extend(yhat.tolist())
        i = i + 1
        
    else:
        
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        
        temp_input.extend(yhat[0].tolist())
        lst_output.extend(yhat.tolist())
        
        i = i + 1
               
print(f'Output of predicted next days: {len(lst_output)}')

Output of predicted next days: 30


In [163]:
last_days = np.arange(1, time_step+1)
day_pred = np.arange(time_step+1,time_step+pred_days+1)

print(last_days)
print(day_pred)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
[16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
 40 41 42 43 44 45]


In [164]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

names = cycle([f'გასული {time_step} დღის ფასები', f'პროგნოზირებული {pred_days} დღის ფასები'])

fig = px.line(
    new_pred_plot,
    x = new_pred_plot.index, 
    y = [
        new_pred_plot['last_original_days_value'],
        new_pred_plot['next_predicted_days_value']
    ],
    
    labels = {
        'value': 'ფასი',
        'index': 'პერიოდი'
    }
)

fig.update_layout(
    title_text = f'წინა 15 დღის და მომდევნო 30 დღის შედეგები',
    plot_bgcolor = 'white', 
    font_size = 15, 
    font_color = 'black',
    legend_title_text = 'Close Price'
)

fig.for_each_trace(lambda data:  data.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()