### Importing Modules

In [1]:
import numpy as np
import pandas as pd
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
from sklearn.decomposition import PCA
import time
from api_keys import api_public, api_secret
from datetime import datetime
from pybit.unified_trading import HTTP
import tti.indicators as ti
import inspect
import warnings

<hr>

### Data preparation

In [2]:
btc_data = pd.read_csv('clean_5min.csv', header=0, index_col=0)

In [3]:
btc_data_copy_v1 = btc_data.copy()

In [4]:
btc_data_copy_v1['date'] = pd.to_datetime(btc_data_copy_v1['date'])

In [5]:
btc_data_copy_v1 = btc_data_copy_v1.set_index('date')
btc_data_copy_v1['target_label'] = btc_data_copy_v1['close'].diff(-1).apply(lambda x: 1 if x <= 0 else -1)
btc_data_copy_v1 = btc_data_copy_v1[:-1]

In [6]:
btc_data_copy_v1.head()

Unnamed: 0_level_0,open,high,low,close,volume,middle_band,upper_band,lower_band,co,cmo,...,vhf,vch,vosc,wc,ws,wad,wr,price_change,close_target,target_label
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-10-08 19:35:00,244.0,244.0,244.0,244.0,0.0,244.491,245.4237,243.5583,-1.1617,-100.0,...,0.4,28.2743,0.0,244.0,244.1036,0.0,-100.0,0.0,244.0,1
2015-10-08 19:40:00,244.0,244.0,244.0,244.0,0.0,244.4415,245.3681,243.5149,-1.1617,-100.0,...,0.5,-79.6453,0.0,244.0,244.0829,0.0,-100.0,0.0,244.0,1
2015-10-08 19:45:00,244.0,244.0,244.0,244.0,0.0,244.392,245.3017,243.4823,-1.1617,-100.0,...,0.6,-86.5569,0.0,244.0,244.0663,0.0,-100.0,0.0,244.0,1
2015-10-08 19:50:00,244.0,244.0,244.0,244.0,0.0,244.3425,245.224,243.461,-1.1617,-100.0,...,0.7,-86.5569,0.0,244.0,244.053,0.0,-100.0,0.0,244.0,1
2015-10-08 19:55:00,244.0,244.0,244.0,244.0,0.0,244.293,245.1337,243.4523,-1.1617,-100.0,...,0.8,-86.5569,0.0,244.0,244.0424,0.0,-100.0,0.0,244.0,1


In [7]:
btc_data_copy_targets = btc_data_copy_v1.target_label
btc_data_copy_features = btc_data_copy_v1.drop('target_label', axis=1)

In [8]:
df_centralized = btc_data_copy_features - btc_data_copy_features.mean()
df_centralized = df_centralized.drop(['ko', 'close_target', 'price_change'], axis=1)

### PCA

In [9]:
pca = PCA(n_components=30)
principal_components = pca.fit_transform(df_centralized)
pca_df = pd.DataFrame(data=principal_components, index=btc_data_copy_features.index)
print(pca_df.shape, btc_data_copy_targets.shape, sep='\n')

(775354, 30)
(775354,)


Will take into account class imbalance building models

In [10]:
btc_data_copy_targets.value_counts()

target_label
 1    454603
-1    320751
Name: count, dtype: int64

### Bayesian Optimization for LightBoostClassifier hyperparameters tuning

In [26]:
def objective(trial):

    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "n_jobs": -1,
        "random_state": 42,
        "is_unbalance": True,
        "subsample": 1.0,
        "boosting_type": 'gbdt',
        'n_estimators': trial.suggest_int("n_estimators", 100, 2000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-1, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-1, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 10, 20),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.1, 0.9),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.2, 0.8),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 3),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "max_depth": trial.suggest_int('max_depth', 1, 5),
        'max_bin': trial.suggest_int('max_bin', 100, 150),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 20, 150),
    }
    
    dtrain = lgb.Dataset(X_train, label=y_train)

    gbm = lgb.train(param, dtrain)
    y_pred = gbm.predict(X_test)
    auc = roc_auc_score(y_test, y_pred)
    return auc

In [27]:
tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(pca_df):
    X_train, X_test = pca_df.iloc[train_index], pca_df.iloc[test_index]
    y_train, y_test = btc_data_copy_targets.iloc[train_index], btc_data_copy_targets.iloc[test_index]
    sampler_complex = TPESampler(n_startup_trials=10, seed=42)
    study_complex = optuna.create_study(direction='maximize', sampler=sampler_complex)
    study_complex.optimize(objective, n_trials=20)

[I 2024-11-09 13:28:59,523] A new study created in memory with name: no-name-d21e7f91-1337-4c89-a59f-5cfaf045205d
[I 2024-11-09 13:29:05,759] Trial 0 finished with value: 0.507369017417107 and parameters: {'n_estimators': 812, 'learning_rate': 0.08927180304353628, 'lambda_l1': 2.9106359131330697, 'lambda_l2': 1.5751320499779735, 'num_leaves': 11, 'feature_fraction': 0.22479561626896213, 'bagging_fraction': 0.23485016730091968, 'bagging_freq': 3, 'min_child_samples': 62, 'max_depth': 4, 'max_bin': 101, 'min_data_in_leaf': 147}. Best is trial 0 with value: 0.507369017417107.
[I 2024-11-09 13:29:13,996] Trial 1 finished with value: 0.5854924684146051 and parameters: {'n_estimators': 1682, 'learning_rate': 0.016305687346221478, 'lambda_l1': 0.23102018878452935, 'lambda_l2': 0.2327067708383781, 'num_leaves': 13, 'feature_fraction': 0.5198051453057903, 'bagging_fraction': 0.4591670111852695, 'bagging_freq': 1, 'min_child_samples': 63, 'max_depth': 1, 'max_bin': 114, 'min_data_in_leaf': 67}. 

In [28]:
params = study_complex.best_params | {
                                        "objective": "binary",
                                        "metric": "auc",
                                        "verbosity": -1,
                                        "n_jobs": -1,
                                        "random_state": 42,
                                        "is_unbalance": True,
                                        "subsample": 1.0,
                                        }
params

{'n_estimators': 1385,
 'learning_rate': 0.04254050587571971,
 'lambda_l1': 1.1513140503215087,
 'lambda_l2': 0.6779331678445241,
 'num_leaves': 15,
 'feature_fraction': 0.39228596334878807,
 'bagging_fraction': 0.6623368331204738,
 'bagging_freq': 2,
 'min_child_samples': 33,
 'max_depth': 1,
 'max_bin': 124,
 'min_data_in_leaf': 44,
 'objective': 'binary',
 'metric': 'auc',
 'verbosity': -1,
 'n_jobs': -1,
 'random_state': 42,
 'is_unbalance': True,
 'subsample': 1.0}

In [11]:
params = {
        'n_estimators': 1385,
        'learning_rate': 0.04254050587571971,
        'lambda_l1': 1.1513140503215087,
        'lambda_l2': 0.6779331678445241,
        'num_leaves': 15,
        'feature_fraction': 0.39228596334878807,
        'bagging_fraction': 0.6623368331204738,
        'bagging_freq': 2,
        'min_child_samples': 33,
        'max_depth': 1,
        'max_bin': 124,
        'min_data_in_leaf': 44,
        'objective': 'binary',
        'metric': 'auc',
        'verbosity': -1,
        'n_jobs': -1,
        'random_state': 42,
        'is_unbalance': True,
        'subsample': 1.0
        }

### Train model on the whole dataset

In [12]:
dtrain = lgb.Dataset(pca_df, label=btc_data_copy_targets)
tuned_model = lgb.train(params, dtrain)



In [15]:
y_pred = tuned_model.predict(pca_df)
btc_data_copy_v2 = btc_data.copy().iloc[:-1]
btc_data_copy_v2['y_pred'] = y_pred

In [18]:
df = btc_data_copy_v2[['close', 'y_pred']]

In [19]:
df = df.sort_index(ascending=False)

In [None]:
df

In [None]:
btc_data_copy_v1

---

## Bybit API

### Bybit Mainnet Demo Trading connection

In [21]:
api_key_ = api_public
api_secret_ = api_secret
session = HTTP(
    demo=True,
    api_key=api_key_,
    api_secret=api_secret_
)

### Access to Bybit OHLCV data, model prediction, order placement

In [23]:
warnings.filterwarnings('ignore')

def generate_indicators(df):
    
    ''' makes new features based on OHLCV data'''
    
    #from warnings import simplefilter 
    #simplefilter(action='ignore', category=FutureWarning)
    combined_df = df.copy()
    
    indicator_classes = [cls for _, cls in inspect.getmembers(ti, inspect.isclass)]
    
    for indicator_class in indicator_classes:
        try:
            indicator = indicator_class(input_data=df)
            indicator_data = indicator.getTiData()
            combined_df = combined_df.join(indicator_data, how='left')
        except Exception as e:
            pass
            #print(f"Error computing {indicator_class.__name__}: {e}")
    combined_df['target_label'] = combined_df['close'].diff(-1).apply(lambda x: 1 if x <= 0 else -1)
    
    return combined_df

def get_clean_ohlcv_data(symbol="BTCUSDT", interval=5):
    
    ''' extracts OHLCV data from Bybit for the last 5 minutes'''
    
    end_time = int(datetime.now().timestamp())  # Current time in seconds
    start_time = end_time - (5 * 60)  # time 5 min ago in seconds
    try:
        response = session.get_kline(
            category="spot",
            symbol=symbol,
            interval=str(interval),
            start=start_time * 1000,  # start_time to milliseconds
            end=end_time * 1000       # end_time to milliseconds
        )

        kline = response['result']['list'][0]
        ohlcv_data = {
                    "date": datetime.fromtimestamp(int(kline[0]) / 1000),
                    "open": float(kline[1]),
                    "high": float(kline[2]),
                    "low": float(kline[3]),
                    "close": float(kline[4]),
                    "volume": float(kline[5])
                    }
        
        df = pd.DataFrame(ohlcv_data.items()).T
        df.columns = df.iloc[0]
        df = df.iloc[1:].set_index('date')
        df = df.astype({
            'open':'Float64',
            'high':'Float64',
            'low':'Float64',
            'close':'Float64',
            'volume':'Float64'
        })
        return df
    except Exception as e:
        print("Error extracting data:", e)

def input_data_preprocessing():
    
    ''' combines previous OHLCV data with the newest one and processes it'''
    
    new_ohlcv_row = get_clean_ohlcv_data()
    last_55_rows_df = btc_data_copy_v1.iloc[-300:, :5]
    ohlcv_data = pd.concat([last_55_rows_df, new_ohlcv_row], axis=0)
    featured_data = generate_indicators(ohlcv_data)
    nan = featured_data[featured_data.columns[featured_data.isna().sum() > 0]].columns
    featured_data[nan] = featured_data[nan].fillna(featured_data[nan].rolling(window=200, min_periods=1).mean())
    return featured_data.iloc[[-1]]

def get_bitcoin_signal():
    
    '''ohlcv_data processing, features generation, 
    PCA application and model prediction'''
    
    preprocessed_data = input_data_preprocessing()
    preprocessed_data = preprocessed_data.drop(['adl', 'cmf', 'emv_ma', 'emv', 'mfi', 'vrc', 'target_label'], axis=1)
    centralized_data = preprocessed_data - preprocessed_data.mean()
    latest_data = pca.transform(centralized_data)
    y_pred = tuned_model.predict(latest_data)
    last_btc_price = preprocessed_data['close'].iloc[-1]
    # FIXME продумать логику добавления новых данных в исходные датасеты (зазписать в .csv)
    return (y_pred, last_btc_price)


def place_order(order_side: str, amount):
    
    '''makes a "buy" or "sell" order'''
    
    try:
        order = session.place_order(
                                    category="spot",
                                    symbol="BTCUSDT",
                                    side=order_side, # Buy or Sell
                                    orderType="Market",
                                    qty=str(amount), # BTC value for Sell and USDT value for Buy
                                    marketunit="quoteCoin",
                                    timeInForce="IOC",
                                    #orderLinkId="spot-test-mainnet-algo",
                                    isLeverage=0,
                                    orderFilter="Order"
                                    )
        print(f"{"Buy order" if order_side=="Buy" else "Sell order"} executed:", order)
    except Exception as e:
        print("Error making an order:", e)

def trading_bot(threshold=0.7, amount=1000):
    while True:
        probability, last_btc_price = get_bitcoin_signal()
        btc_balance_in_usdt = float(session.get_wallet_balance(
                                accountType="UNIFIED",
                                coin="BTC"
                                )['result']['list'][0]['coin'][0]['usdValue'])
        usdt_balance = float(session.get_wallet_balance(
                                accountType="UNIFIED",
                                coin="USDT"
                                )['result']['list'][0]['coin'][0]['usdValue'])
        if probability >= threshold and btc_balance_in_usdt > 100:
            print('Hold your tokens!')
        elif probability >= threshold and btc_balance_in_usdt <= 100:
            print(f"Buy signal! Price increase probability: {probability[0]}")
            place_order("Buy", amount)
        elif probability <= threshold and btc_balance_in_usdt > 100:
            print(f"Sell signal! Price increase probability: {probability[0]}")
            place_order("Sell", btc_balance_in_usdt)
        else:
            print(f"Wait for a better opportunity. Price increase probability: {probability[0]}")
        
        print(f'Current BTC price: {last_btc_price}',
              f'BTC balance in USD: {btc_balance_in_usdt}',
              f'USDT balance: {usdt_balance}', sep='\n')    
        time.sleep(300)  # Checkout every 5 min
        
# Bot launching
trading_bot()

Wait for a better opportunity. Price increase probability: 0.12438708641290597
Current BTC price: 76383.25
BTC balance in USD: 0.05406853
USDT balance: 125849.81130371
Wait for a better opportunity. Price increase probability: 0.12438708641290597
Current BTC price: 76391.15
BTC balance in USD: 0.05406853
USDT balance: 125849.81130371
Wait for a better opportunity. Price increase probability: 0.12438708641290597
Current BTC price: 76448.79
BTC balance in USD: 0.05406853
USDT balance: 125849.81130371
Wait for a better opportunity. Price increase probability: 0.12438708641290597
Current BTC price: 76461.87
BTC balance in USD: 0.05406853
USDT balance: 125849.81130371
Wait for a better opportunity. Price increase probability: 0.12438708641290597
Current BTC price: 76468.09
BTC balance in USD: 0.05406853
USDT balance: 125849.81130371
Wait for a better opportunity. Price increase probability: 0.12438708641290597
Current BTC price: 76522.73
BTC balance in USD: 0.05406853
USDT balance: 125849.8

KeyboardInterrupt: 