In [None]:
%%capture
#%conda install -c plotly plotly=5.9.0
#%conda install pip
#%conda install twisted

%pip install plotly==5.9.0
%pip install twisted
%pip install pandas
%pip install ta    
%pip install scikit-learn
%pip install tensorflow
%pip install keras
%pip install matplotlib
%pip install scikeras
%pip install keras-tuner
%pip install plotly
%pip install nbformat
#%pip install huobi-sdk==2.3.3

import pandas as pd
import numpy as np    
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

import os
# Downloading latest pytrade2 data
#os.system("cd ./../deploy/yandex_cloud; ./download_data.sh")
data_dir=f"../data/dev/common"
#print(f"Download completed. Local data dir: {data_dir}")

In [None]:
symbol="BTC-USDT"

## Read candles history

In [None]:
from pathlib import Path

def read_candles(days = 1, period = '1min'):
    """ Read last days' 1 min candles from file system """

    candles_dir = Path(data_dir, 'candles')
    files = sorted([f for f in os.listdir(candles_dir) if f.endswith(f"candles_{period}.csv")])
    # Read last days' files to one dataframe
    df = pd.concat([pd.read_csv(Path(candles_dir, fname), parse_dates=['open_time', 'close_time']) for fname in files[-days:]])
    df = df.set_index('close_time', drop=False)
    #del(df['close_time.1']) # temp fix
    # Resample because row data contains multiple candles inside a period
    print(f'Read {len(df)} candles from {df.index.min()} to {df.index.max()}')
    return df.resample('1min').agg('last')

candles = read_candles(days=10)
candles.tail()

## Calculate targets

In [None]:
import matplotlib.pyplot as plt

def plot_value_counts(ax, df, col, grouped):
    signals = df[col]
    #vc = signals[signals.diff() != 0].value_counts()
    vc = df[col].value_counts() if not grouped else signals[(signals.diff() != 0) & (signals != 0)].value_counts()
    label_map={0:'oom', 1:'buy', -1: 'sell'}
    color_map={'oom':'C0', 'buy': 'C1', 'sell': 'C2'}
    labels = [ label_map[signal] for signal in vc.index.tolist()]
    colors = [color_map[key] for key in labels]
    ax.pie(vc, labels = labels,  autopct= lambda x: '{:.0f}'.format(x*vc.sum()/100), colors = colors)
    tag = 'groups' if grouped else ''
    ax.set_title(f"{col} {tag}")
    
def plot_signal_counts(df):
    fig, (ax1, ax2) = plt.subplots(1, 2) 
    plot_value_counts(ax1, df, 'signal', grouped = False)
    plot_value_counts(ax2, df, 'signal', grouped = True)
    fig.suptitle(f'Signal counts from {df.index.min()} to {df.index.max()}')
    plt.show()

In [None]:

def calc_signal(candles, predict_window, open_ratio, min_profit_ratio, max_loss_ratio, comission_pct=0.012):
    """ Signal -1,0,1 plus extended debugging info """
    #next_candles = candles[['open', 'high', 'low', 'close', 'vol']].shift(1)

    next_candles = pd.DataFrame(index=candles.index)
    next_candles[['high', 'low']] = (candles[['high', 'low']][::-1]
                                     .rolling(predict_window).agg({'high' : 'max', 'low' : 'min'})
                                     [::-1])

    # BTC-USDT 40 000 * 1% = 400
    # BTC-USDT 40 000 * 0.012% = 40 * 0.012 = 4,8
    comission = comission_pct*0.01*2 # Order open, order close, double comission
    # Ratio to open: generate signal if profit/loss > open ratio
    #open_ratio = 1
    min_profit = candles['close']*(max(comission*2, min_profit_ratio))
    max_loss = candles['close']*max_loss_ratio
    
    # Profit / loss > open ratio considering comission and minimal profit
    profit_buy = (next_candles['high'] - candles['high']) - (candles['close']*2*comission)
    loss_buy =  (candles['high'] - next_candles['low']) + (candles['close']*2*comission)
    signal_buy = (profit_buy > 0) & ((profit_buy / loss_buy) > open_ratio)& (profit_buy > min_profit) & (loss_buy < max_loss)
    
    # Profit / loss > open ratio considering comission and minimal profit
    profit_sell = (candles['low'] - next_candles['low']) - (candles['close']*2*comission)
    loss_sell = (next_candles['high'] - candles['low']) + (candles['close']*2*comission)
    signal_sell = (profit_sell > 0) & ((profit_sell / loss_sell) > open_ratio) & (profit_sell > min_profit) & (loss_sell < max_loss)

    # Signal
    signal = pd.DataFrame(index=candles.index)
    signal['signal'] = 0  # Default to 0
    signal.loc[signal_buy & ~signal_sell, 'signal'] = 1  # Set to 1 where 'buy' is True and 'sell' is False
    signal.loc[~signal_buy & signal_sell, 'signal'] = -1  # Set to -1 where 'sell' is True and 'buy' is False

    df = signal
    # Future profit and loss
    df.loc[signal['signal']==1, 'profit'] = profit_buy
    df.loc[signal['signal']==1, 'loss'] = loss_buy
    df.loc[signal['signal']==-1, 'profit'] = profit_sell
    df.loc[signal['signal']==-1, 'loss'] = loss_sell
    return df

signal = (calc_signal(candles, 
                      predict_window = '15min', 
                      open_ratio = 2, 
                      # 0.01*0.012*2 - open+close comission 0.012%
                      # Comission is 4.8 for 40K price, ~10 for one open+close trade
                      min_profit_ratio = (0.01*0.012*2) * 5,
                      max_loss_ratio = (0.01*0.012*2) * 5)
         )
signal[signal['signal']!=0].tail(100)

# Pie plot, signals
plot_signal_counts(signal)


# Line plot, profit/loss
import plotly.express as px
import warnings
# Supress pandas+plotly warning
warnings.simplefilter(action='ignore', category=FutureWarning)
px.line(signal, title='profit/loss').update_traces(mode='lines+markers').show()

# Matplotlib profit/loss chart
# signal[signal['signal']!=0].plot(linestyle=':', marker='o', title='profit/loss')
#plt.show()



## Feature engineering


### Calculate indicators

In [None]:

from ta import trend, momentum, volume, others, volatility



def with_time_features(df: pd.DataFrame):
    # dt = df.index.to_frame()["close_time"].dt
    dt = df.index
    df["time_hour"] = dt.hour
    df["time_minute"] = dt.minute
    #df["time_second"] = dt.second
    return df

def with_ichimoku(candles: pd.DataFrame):
    ichimoku = trend.IchimokuIndicator(candles['high'], candles['low'])
    candles['ichimoku_base_line'] = ichimoku.ichimoku_base_line()
    candles['ichimoku_conversion_line'] = ichimoku.ichimoku_conversion_line()
    candles['ichimoku_a'] = ichimoku.ichimoku_a()
    candles['ichimoku_b'] = ichimoku.ichimoku_b()    
    return candles

# Apply features and indicators
candles = with_time_features(candles)
candles = with_ichimoku(candles)
candles['cci'] = trend.cci(candles['high'], candles['low'], candles['close'])
candles['adx'] = trend.adx(candles['high'], candles['low'], candles['close'])
candles['rsi'] = momentum.rsi(candles['close'])
candles['stoch'] = momentum.stoch(candles['high'], candles['low'], candles['close'])
candles['macd'] = trend.macd(candles['close'])


candles.tail()

In [None]:
import plotly as py
from plotly import graph_objects as go
from plotly.subplots import make_subplots

def plot_indicators(df, extra_lines:[], extra_subplots:[]):
    fig = make_subplots(rows=len(extra_subplots)+1, cols=1, 
                        shared_xaxes=True,
                        vertical_spacing=0.02,
                        subplot_titles=['candles']+extra_subplots
                       )    
    
    # Candles
    fig.add_trace(go.Candlestick(\
                        name='candles',
                        x=df.index,\
                        open=df['open'],\
                        high=df['high'],\
                        low=df['low'],\
                        close=df['close'])
                     , row=1, col=1)

    # Other charts on candles chart
    for name in extra_lines:
        fig.add_trace(go.Scatter(name=name, x=df.index, y=df[name], mode='lines'), row=1, col=1)

    # Other subplots below candles
    for i, col in enumerate(extra_subplots):
        fig.add_trace(go.Scatter(name=col, x=df.index,y=df[col], mode='lines'), row=2+i, col=1)
    
    fig.update_layout(title=f"{candles['ticker'][0]}",
                    xaxis_rangeslider_visible=False, 
                    height=300*len(extra_subplots))
    fig.show()

ichimoku_cols = ['ichimoku_base_line', 'ichimoku_conversion_line', 'ichimoku_a', 'ichimoku_b']
indicators_cols = ['cci', 'adx', 'rsi', 'stoch']
plot_indicators(candles, 
             extra_lines=ichimoku_cols,
             extra_subplots=indicators_cols)

### Create features

In [None]:
def balanced(features, targets):
    """ Balance by signal """
    cnt = min(targets.value_counts())
    balanced_targets = pd.concat([targets[targets['signal'] == signal].sample(cnt) for signal in [-1,0,1]]).sort_index()
    balanced_features = features[features.index.isin(balanced_targets.index)].sort_index()
    return balanced_features, balanced_targets
    
    
def get_features_targets(candles, signal):

    feature_cols = ['time_hour', 'time_minute'] + ichimoku_cols + indicators_cols
    features = candles[feature_cols]
    targets = signal.loc[features.index,['signal']]
    
    return features, targets

features, targets = get_features_targets(candles, signal)
features, targets = balanced(features, targets)


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, MinMaxScaler, OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer


class DiffTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, x, y=None):
        return self

    def transform(self, x):
        x_diff = x.copy()
        for col in self.columns:
            #x_diff[col + '_diff'] = x_diff[col].diff()
            x_diff.loc[:,col] = x_diff[col].diff()
        return x_diff
        #return x_diff.drop(columns=self.columns)

def create_pipe(X, y) -> (Pipeline, Pipeline):
    """ Create feature and target pipelines to use for transform and inverse transform """

    time_cols = [col for col in X.columns if col.startswith("time") or col.endswith("time")]
    float_cols = list(set(X.columns) - set(time_cols))

    # Scale x
    x_pipe = Pipeline(
        [
        #("diff_transform", DiffTransformer(columns=float_cols)),
        ("xscaler", ColumnTransformer([("xrs", RobustScaler(), float_cols)], remainder="passthrough")),
        ("xmms", MinMaxScaler())])
    x_pipe.fit(X)

    # One hot encode y
    y_pipe = Pipeline([('adjust_labels', OneHotEncoder(categories=[[-1, 0, 1]], sparse_output=False, drop=None))])
    y_pipe.fit(y)
    return x_pipe, y_pipe


from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features, targets, shuffle=False)

x_pipe, y_pipe = create_pipe(x_train, y_train)
x_train = x_pipe.transform(x_train)
x_test = x_pipe.transform(x_test)
y_train = y_pipe.transform(y_train)
y_test = y_pipe.transform(y_test)



In [None]:
from keras import Sequential, Input
from keras.layers import Dense, Dropout


import plotly.express as px
import collections

# def create_model_prod(X_train, y_train, window_size):
#     model = Sequential()
#     model.add(LSTM(128,  return_sequences=True, input_shape=(window_size, X_train.shape[1])))
#     model.add(Dropout(0.2))
#     model.add(LSTM(32))         
#     model.add(Dropout(0.2))
#     model.add(Dense(20, activation='relu'))
#     model.add(Dense(y_train.shape[1], activation='linear'))
#     #model.add(Dense(y_train.shape[1], activation='softmax'))
#     model.compile(optimizer='adam', loss='mae', metrics=['mse'])
#     return model  
plot_figsize=(10,5)
#def create_model(X_train, y_train, window_size, lstm1_units, lstm2_units, dense1_units, dense2_units):
def create_models(*specs):
    results={}
    for unit_spec in specs:
        window_size=unit_spec[0]
        yield create_model(X_train, y_train, window_size, unit_spec[1:])

def create_model(x_train,  y_train, specs):
    """ Create model with layers given in specs """

    input_shape=(x_train.shape[1],)
    print(f"Creating model({specs}), input shape={input_shape}")

    model = Sequential()
    model.add(Input(shape=input_shape))

    # Keras layers
    for units in specs:
        model.add(Dense(units, activation='relu'))
        model.add(Dropout(0.2))

    # Output layer
    model.add(Dense(y_train.shape[1], activation='softmax')) # Softmax for classification
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

    return model

def plot_history_plotly(df, title):
    px.line(df,title=title).update_traces(mode='lines+markers').show()
    
def plot_history_plt(df, title):
#         for name in names:
#             plt.plot(history.history[name])
        plt.plot(df)
        #Captions and show the plot
        plt.title(title)
        #plt.ylabel(metric_name)
        plt.xlabel('epoch')
        plt.legend(df.columns)
        #plt.legend(names, loc='upper left')
        plt.show()    

def plot_history(model_name, history, plot_func, metric=None):
    """ Plot history loss and metrics"""
    metric_names = [metric] if metric else history.history
    # Print all merrics
    for metric_name in metric_names:
        # Validation metrics names will be calculated from related train metrics
        if metric_name.startswith("val_"): continue

        # Plot metric and related test (val_..) metric
        plt.figure(figsize=plot_figsize)
        title=f"{model_name} {metric_name}"
        names=[metric_name, f"val_{metric_name}"]
        
        df=pd.DataFrame()
        for name in names:
            df[name] = history.history[name]
        plot_func(df, title)
        #px.line(df,title=title).update_traces(mode='lines+markers').show()

        
#         for name in names:
#             plt.plot(history.history[name])
        # Captions and show the plot
#         plt.title(f"{model_name} {metric_name}")
#         plt.ylabel(metric_name)
#         plt.xlabel('epoch')
#         plt.legend(names, loc='upper left')
#         plt.show()

def fit_model(model, train_gen, test_gen, epochs):
    # Fit the model
    steps_per_epoch=5
    history=model.fit(train_gen, validation_data=test_gen, epochs=epochs, steps_per_epoch=steps_per_epoch)
    return history



def evaluate_models(epochs=10, *specs):
    results={}
    for unit_spec in specs:
        model = create_model(x_train, y_train, unit_spec)
        
        steps_per_epoch=15
        history=model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs, steps_per_epoch=steps_per_epoch)
        model_name = f"Model({unit_spec})"
        results[model_name] = history
    return results

def plot_res(results, plot_func):
    for model_name in results:
        plot_history(model_name, results[model_name], plot_func, None)


        
# Good: Current LSTM2: 10, 320, 0.2, 160, 0.2, 40, 0.2, 16, 0.1

eval_res = evaluate_models(20, 
                                  [32, 128, 64, 16]
                                )

plot_res(eval_res, plot_history_plt)