### Importing Modules

In [None]:
import numpy as np
import pandas as pd
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
from sklearn.decomposition import PCA
import time
from api_keys import api_public, api_secret
from datetime import datetime
from pybit.unified_trading import HTTP
import tti.indicators as ti
import inspect

<hr>

### Data preparation

In [2]:
btc_data = pd.read_csv('clean_5min.csv', header=0, index_col=0)

In [3]:
btc_data_copy_v1 = btc_data.copy()

In [4]:
btc_data_copy_v1['date'] = pd.to_datetime(btc_data_copy_v1['date'])

In [5]:
btc_data_copy_v1 = btc_data_copy_v1.set_index('date')
btc_data_copy_v1['target_label'] = btc_data_copy_v1['close'].diff(-1).apply(lambda x: 1 if x <= 0 else -1)
btc_data_copy_v1 = btc_data_copy_v1[:-1]

In [6]:
btc_data_copy_v1.head()

Unnamed: 0_level_0,open,high,low,close,volume,middle_band,upper_band,lower_band,co,cmo,...,vhf,vch,vosc,wc,ws,wad,wr,price_change,close_target,target_label
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-10-08 19:35:00,244.0,244.0,244.0,244.0,0.0,244.491,245.4237,243.5583,-1.1617,-100.0,...,0.4,28.2743,0.0,244.0,244.1036,0.0,-100.0,0.0,244.0,1
2015-10-08 19:40:00,244.0,244.0,244.0,244.0,0.0,244.4415,245.3681,243.5149,-1.1617,-100.0,...,0.5,-79.6453,0.0,244.0,244.0829,0.0,-100.0,0.0,244.0,1
2015-10-08 19:45:00,244.0,244.0,244.0,244.0,0.0,244.392,245.3017,243.4823,-1.1617,-100.0,...,0.6,-86.5569,0.0,244.0,244.0663,0.0,-100.0,0.0,244.0,1
2015-10-08 19:50:00,244.0,244.0,244.0,244.0,0.0,244.3425,245.224,243.461,-1.1617,-100.0,...,0.7,-86.5569,0.0,244.0,244.053,0.0,-100.0,0.0,244.0,1
2015-10-08 19:55:00,244.0,244.0,244.0,244.0,0.0,244.293,245.1337,243.4523,-1.1617,-100.0,...,0.8,-86.5569,0.0,244.0,244.0424,0.0,-100.0,0.0,244.0,1


In [8]:
btc_data_copy_targets = btc_data_copy_v1.target_label
btc_data_copy_features = btc_data_copy_v1.drop('target_label', axis=1)

In [9]:
btc_data_copy_features.shape

(775354, 73)

In [10]:
df_centralized = btc_data_copy_features - btc_data_copy_features.mean()

### PCA

In [None]:
pca = PCA(n_components=7)
principal_components = pca.fit_transform(df_centralized)
pca_df = pd.DataFrame(data=principal_components, index=btc_data_copy_features.index)
print(pca_df.shape, btc_data_copy_targets.shape, sep='\n')

Will take into account class imbalance building models

In [11]:
btc_data.target_label.value_counts()

target_label
 1    2543652
-1    1223109
Name: count, dtype: int64

### Bayesian Optimization for LightBoostClassifier hyperparameters tuning

In [51]:
def objective(trial):

    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "n_jobs": -1,
        "random_state": 42,
        "is_unbalance": True,
        "subsample": 1.0,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        "boosting_type": 'gbdt',
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-1, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-1, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 10, 20),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.1, 0.9),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.2, 0.8),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 3),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "max_depth": trial.suggest_int('max_depth', 1, 5),
        'max_bin': trial.suggest_int('max_bin', 100, 150),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 20, 150),
    }
    
    dtrain = lgb.Dataset(X_train, label=y_train)

    gbm = lgb.train(param, dtrain)
    y_pred = gbm.predict(X_test)
    auc = roc_auc_score(y_test, y_pred)
    return auc

In [None]:
tscv = TimeSeriesSplit(n_splits=10)

for train_index, test_index in tscv.split(pca_df):
    X_train, X_test = pca_df.iloc[train_index], pca_df.iloc[test_index]
    y_train, y_test = btc_data_copy_targets.iloc[train_index], btc_data_copy_targets.iloc[test_index]
    sampler_complex = TPESampler(n_startup_trials=10, seed=42)
    study_complex = optuna.create_study(direction='maximize', sampler=sampler_complex)
    study_complex.optimize(objective, n_trials=20)

In [None]:
params = study_complex.best_params | {
                                        "objective": "binary",
                                        "metric": "f1",
                                        "verbosity": -1,
                                        "n_jobs": -1,
                                        "random_state": 42,
                                        "is_unbalance": True,
                                        "subsample": 1.0,
                                        "boosting_type": 'gbdt'
                                        }
params

### Train model on the whole dataset

In [57]:
dtrain = lgb.Dataset(pca_df, label=btc_data_copy_targets)
tuned_model = lgb.train(params, dtrain)

---

## Bybit API

### Bybit Mainnet Demo Trading connection

In [20]:
api_key_ = api_public
api_secret_ = api_secret
session = HTTP(
    demo=True,
    api_key=api_key_,
    api_secret=api_secret_
)

### Access to Bybit OHLCV data, model prediction, order placement

In [None]:
def generate_indicators(df):
    from warnings import simplefilter 
    simplefilter(action='ignore', category=FutureWarning)
    
    combined_df = df.copy()
    
    indicator_classes = [cls for _, cls in inspect.getmembers(ti, inspect.isclass)]
    
    for indicator_class in indicator_classes:
        try:
            indicator = indicator_class(input_data=df)
            indicator_data = indicator.getTiData()
            combined_df = combined_df.join(indicator_data, how='left')
        except Exception as e:
            pass
            print(f"Ошибка при вычислении {indicator_class.__name__}: {e}")
    combined_df['target_label'] = combined_df['close'].diff(-1).apply(lambda x: 1 if x <= 0 else -1)
    combined_df = combined_df[:-1]
    
    return combined_df

def get_clean_ohlcv_data(symbol="BTCUSDT", interval=5):
    
    ''' Extract OHLCV data from Bybit for the last 5 minutes'''
    
    end_time = int(datetime.now().timestamp())  # Current time in seconds
    start_time = end_time - (5 * 60)  # time 5 min ago in seconds
    try:
        response = session.get_kline(
            category="spot",
            symbol=symbol,
            interval=str(interval),
            start=start_time * 1000,  # start_time to milliseconds
            end=end_time * 1000       # end_time to milliseconds
        )

        kline = response['result']['list'][0]
        ohlcv_data = {
                    "date": datetime.fromtimestamp(int(kline[0]) / 1000),
                    "open": float(kline[1]),
                    "high": float(kline[2]),
                    "low": float(kline[3]),
                    "close": float(kline[4]),
                    "volume": float(kline[5])
                    }
        
        df = pd.DataFrame(ohlcv_data.items()).T
        df.columns = df.iloc[0]
        df = df.iloc[1:].set_index('date')
        df = df.astype({
            'open':'Float64',
            'high':'Float64',
            'low':'Float64',
            'close':'Float64',
            'volume':'Float64'
        })
        return df
    except Exception as e:
        print("Ошибка при получении данных:", e)

def input_data_preprocessing():
    new_ohlcv_row = get_clean_ohlcv_data()
    last_25_rows_df = btc_data_copy_v1.iloc[-25:, :5]
    ohlcv_data = pd.concat([last_25_rows_df, new_ohlcv_row], axis=0)
    #Check the next line - could have made a mistake!
    nan = ohlcv_data[ohlcv_data.columns[ohlcv_data.isna().sum() > 0]].columns
    ohlcv_data[nan] = ohlcv_data[nan].interpolate(method="linear")
    cleaned_ohlcv_row = generate_indicators(ohlcv_data).iloc[-1]
    return cleaned_ohlcv_row

def get_bitcoin_signal():
    
    '''ohlcv_data processing, features generation, 
    PCA application and model prediction'''
    
    preprocessed_data = input_data_preprocessing()
    #Дропаем индикаторы, которые удалены в clean_5min.csv
    preprocessed_data = preprocessed_data.drop(['adl', 'cmf', 'emv_ma', 'emv', 'mfi', 'vrc'], axis=1)
    latest_data = pca.transform(preprocessed_data)
    y_pred = tuned_model.predict(latest_data)
    # FIXME продумать логику добавления новых данных в исходные датасеты (зазписать в .csv)
    return y_pred


def place_order(order_side: str, amount):
    
    '''make a "buy" or "sell" order'''
    try:
        order = session.place_order(
                                    category="spot",
                                    symbol="BTCUSDT",
                                    side=order_side, # Buy or Sell
                                    orderType="Market",
                                    qty=str(amount), # BTC value for Sell and USDT value for Buy
                                    marketunit="quoteCoin",
                                    timeInForce="IOC",
                                    #orderLinkId="spot-test-mainnet-algo",
                                    isLeverage=0,
                                    orderFilter="Order"
                                    )
        print(f"{"Покупка" if order_side=="Buy" else "Продажа"} совершена:", order)
    except Exception as e:
        print("Ошибка при размещении ордера:", e)

def trading_bot(threshold=0.7, amount=1000):
    while True:
        probability = get_bitcoin_signal()
        if probability >= threshold:
            # Проверка, куплен ли уже биток или битка нет; #FIXME
            # если есть, то continue и напиши холд
            # если нет, то следующее:
            print(f"Сигнал на покупку! Вероятность роста: {probability:.3f}")
            place_order("Buy", amount) # amount должен быть переменной
            # в зависимости от баланса, т.е. фиксированный процентом, например
        else:
            print(f"Сигнал на продажу! Вероятность роста: {probability:.3f}")
            place_order("Sell", amount) # здесь amount - весь купленный биток
            # который должен вытягиваться из bybit из баланса кошелька
            
        time.sleep(300)  # Checkout every 5 min
        
        # Далее можно считать PnL: #FIXME
        session.get_wallet_balance(
                                accountType="UNIFIED",
                                coin="BTC, USDT"
                                )
# Bot launching
#trading_bot()
#data = get_clean_ohlcv_data()