### Importing Modules

In [53]:
import numpy as np
import pandas as pd
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score, f1_score
import lightgbm as lgb
from sklearn.decomposition import PCA
import ccxt
import time
from api_keys import api_public, api_secret
from datetime import datetime, timedelta

<hr>

### Data preparation

##### Note: unzip btc_data.csv.zip in advance

In [None]:
btc_data = pd.read_csv('btc_data_1min.csv', header=0, index_col=0)

In [3]:
btc_data_copy = btc_data.copy()

In [63]:
btc_data_copy.columns

Index(['open', 'high', 'low', 'close', 'volume', 'adl', 'middle_band',
       'upper_band', 'lower_band', 'cmf', 'co', 'cmo', 'cci', 'dpo', '+di',
       '-di', 'dx', 'adx', 'adxr', 'dema', 'emv', 'emv_ma', 'rl_0.0',
       'rl_23.6', 'rl_38.2', 'rl_50.0', 'rl_61.8', 'rl_100.0', 'fosc',
       'tenkan_sen', 'kijun_sen', 'senkou_a', 'senkou_b', 'imi', 'ko', 'lri',
       'lrs', 'mfi', 'mi', 'mp', 'mom', 'ma-simple', 'macd', 'signal_line',
       'nvi', 'obv', 'sar', 'prf', 'target_LONG', 'pvi', 'pvt', 'highest_high',
       'lowest_low', 'posc', 'prc', 'qstick', 'ri', 'rmi', 'rsi', 'rvi', 'sd',
       'smi', '%K', '%D', 'swi', 'tsf', 'tema', 'tp', 'uosc', 'vhf', 'vch',
       'vosc', 'vrc', 'wc', 'ws', 'wad', 'wr', 'price_change', 'target_label'],
      dtype='object')

In [4]:
nan_counts = btc_data.isna().sum().sort_values(ascending=False)
print(nan_counts.head(15).index)

Index(['emv_ma', 'cmf', 'adl', 'emv', 'mfi', 'swi', 'vrc', 'cmo', 'vhf', 'cci',
       'wr', 'uosc', '%D', 'imi', '%K'],
      dtype='object')


Delete NaN before applying PCA

In [5]:
btc_data_copy = btc_data_copy.dropna()

In [6]:
btc_data_copy_targets = btc_data_copy.target_label
btc_data_copy_features = btc_data_copy.drop('target_label', axis=1)

In [7]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(btc_data_copy_features)

### PCA

In [38]:
pca = PCA(n_components=7)

principal_components = pca.fit_transform(df_scaled)

pca_df = pd.DataFrame(data=principal_components, index=btc_data_copy_features.index)

In [39]:
pca_df.shape

(1015072, 7)

In [19]:
btc_data_copy_targets.shape

(1015072,)

Will take into account class imbalance building models

In [11]:
btc_data.target_label.value_counts()

target_label
 1    2543652
-1    1223109
Name: count, dtype: int64

Simple split

In [40]:
split_index = int(len(pca_df) * 0.8)
X_train = pca_df.iloc[:split_index]
X_test = pca_df.iloc[split_index:]
y_train = btc_data_copy_targets.iloc[:split_index]
y_test = btc_data_copy_targets.iloc[split_index:]

Complex split

In [13]:
tscv = TimeSeriesSplit(n_splits=10)

# Models 

### lgb classifier

### Bayesian Optimization for hyperparameters tuning

In [51]:
def objective(trial):

    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "n_jobs": -1,
        "random_state": 42,
        "is_unbalance": True,
        "subsample": 1.0,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        "boosting_type": 'gbdt',
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-1, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-1, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 10, 20),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.1, 0.9),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.2, 0.8),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 3),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "max_depth": trial.suggest_int('max_depth', 1, 5),
        'max_bin': trial.suggest_int('max_bin', 100, 150),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 20, 150),
    }
    
    dtrain = lgb.Dataset(X_train, label=y_train)

    gbm = lgb.train(param, dtrain)
    y_pred = gbm.predict(X_test)
    #y_pred = np.where(y_pred < 0.5, -1, 1)   
    #f1 = f1_score(y_test, y_pred) # target metric
    auc = roc_auc_score(y_test, y_pred)
    return auc

Optimization for simple split

In [None]:
# sampler = TPESampler(n_startup_trials=10, seed=42)
# study = optuna.create_study(direction='maximize', sampler=sampler)
# study.optimize(objective, n_trials=20)

Optimization for complex split

In [52]:
for train_index, test_index in tscv.split(pca_df):
    X_train, X_test = pca_df.iloc[train_index], pca_df.iloc[test_index]
    y_train, y_test = btc_data_copy_targets.iloc[train_index], btc_data_copy_targets.iloc[test_index]
    sampler_complex = TPESampler(n_startup_trials=10, seed=42)
    study_complex = optuna.create_study(direction='maximize', sampler=sampler_complex)
    study_complex.optimize(objective, n_trials=20)

[I 2024-11-03 12:28:41,884] A new study created in memory with name: no-name-9562e513-74b8-469f-9ed2-bf2e5886a5ee
[I 2024-11-03 12:28:42,518] Trial 0 finished with value: 0.9174143815344098 and parameters: {'learning_rate': 0.023688639503640783, 'lambda_l1': 7.969454818643936, 'lambda_l2': 2.9106359131330697, 'num_leaves': 16, 'feature_fraction': 0.22481491235394924, 'bagging_fraction': 0.2935967122017216, 'bagging_freq': 1, 'min_child_samples': 88, 'max_depth': 4, 'max_bin': 136, 'min_data_in_leaf': 22}. Best is trial 0 with value: 0.9174143815344098.
[I 2024-11-03 12:28:42,912] Trial 1 finished with value: 0.9355933875093618 and parameters: {'learning_rate': 0.09330606024425668, 'lambda_l1': 4.622589001020832, 'lambda_l2': 0.26587543983272705, 'num_leaves': 12, 'feature_fraction': 0.24672360788274705, 'bagging_fraction': 0.38254534577572263, 'bagging_freq': 2, 'min_child_samples': 46, 'max_depth': 2, 'max_bin': 131, 'min_data_in_leaf': 38}. Best is trial 1 with value: 0.9355933875093

In [None]:
# params = study_complex.best_params | {
#                                         "objective": "binary",
#                                         "metric": "f1",
#                                         "verbosity": -1,
#                                         "n_jobs": -1,
#                                         "random_state": 42,
#                                         "is_unbalance": True,
#                                         "subsample": 1.0,
#                                         "boosting_type": 'gbdt'
#                                         }
# params

In [56]:
params = {'learning_rate': 0.06563441943830989,
        'lambda_l1': 0.8108473139204599,
        'lambda_l2': 0.654352336537576,
        'num_leaves': 20,
        'feature_fraction': 0.7912592426983397,
        'bagging_fraction': 0.20735355067611028,
        'bagging_freq': 2,
        'min_child_samples': 56,
        'max_depth': 5,
        'max_bin': 150,
        'min_data_in_leaf': 114,
        'objective': 'binary',
        'metric': 'f1',
        'verbosity': -1,
        'n_jobs': -1,
        'random_state': 42,
        'is_unbalance': True,
        'subsample': 1.0,
        'boosting_type': 'gbdt'}

In [57]:
dtrain = lgb.Dataset(pca_df, label=btc_data_copy_targets)

tuned_model = lgb.train(params, dtrain)

---

## Neural Networks (CHANGE THE DATA SPLITAGE)

In [91]:
# X = combined_df.drop(['close', 'close_target'], axis=1)
# y = combined_df["close_target"]
# tscv = TimeSeriesSplit()

In [92]:
# num = X.select_dtypes(include=['float64', 'int64']).columns

# numeric = make_pipeline(SimpleImputer(strategy="median"),
#                         StandardScaler())

# preproccessing_pipeline = ColumnTransformer([
#     ('num', numeric, num)
#     ], remainder='passthrough')

In [104]:
# from sklearn.pipeline import make_pipeline, Pipeline

# preproccessing = ColumnTransformer([
#     ('num', Pipeline([
#         ('imputer', SimpleImputer(strategy='mean')),  # Handle missing values
#         ('scaler', StandardScaler())  # Scale numerical data
#     ]), num)
# ], remainder='passthrough')

# X_train_processed = preproccessing.fit_transform(X_train)
# X_test_processed = preproccessing.transform(X_test)

# tf.random.set_seed(42)
# nn_model = tf.keras.Sequential([
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(300, activation="relu"),
#     tf.keras.layers.Dense(1, activation="linear")  # Linear activation for regression
# ])

# adam_optimizer = AdamW(learning_rate=0.1)

# # Use mean squared error for regression
# nn_model.compile(loss="mean_squared_error",
#                  optimizer=adam_optimizer,
#                  metrics=[tf.keras.metrics.RootMeanSquaredError()])

# history = nn_model.fit(X_train_processed, y_train, epochs=30, batch_size=8,
#                        validation_data=(X_test_processed, y_test))

<hr>

In [None]:
# # TimeSeriesSplit для кросс-валидации
# tscv = TimeSeriesSplit(n_splits=5)

# # Валидация на временных рядах
# for train_index, test_index in tscv.split(X):
#     X_train, X_test = X.iloc[train_index], X.iloc[test_index]
#     y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
#     # Предобработка
#     X_train_processed = preproccessing_pipeline.fit_transform(X_train)
#     X_test_processed = preproccessing_pipeline.transform(X_test)
    
#     # Построение и компиляция модели
#     nn_model = tf.keras.Sequential([
#         tf.keras.layers.Flatten(),
#         tf.keras.layers.Dense(400, activation="relu"),
#         tf.keras.layers.Dense(1, activation="linear")
#     ])
    
#     adam_optimizer = AdamW(learning_rate=0.01)
    
#     nn_model.compile(loss="mean_squared_error",
#                      optimizer = adam_optimizer,
#                      metrics=[tf.keras.metrics.RootMeanSquaredError()])
    
#     # Обучение модели
#     history = nn_model.fit(X_train_processed, y_train, epochs=30, batch_size=8,
#                            validation_data=(X_test_processed, y_test))


## Bybit API

In [62]:
api_key_ = api_public
api_secret_ = api_secret

### Bybit Testnet connection

In [68]:
from pybit.unified_trading import HTTP
session = HTTP(
    testnet=False,
    api_key=api_key_,
    api_secret=api_secret_,
)
print(session.place_order(
    category="spot",
    symbol="BTCUSDT",
    side="Buy",
    orderType="Market",
    qty="1000",
    marketunit="quoteCoin",
    timeInForce="IOC",
    orderLinkId="spot-test-mainnet-algo",
    isLeverage=0,
    orderFilter="Order"))

InvalidRequestError: You are not authorized to execute this request. (ErrCode: 10003) (ErrTime: 10:54:35).
Request → POST https://api.bybit.com/v5/order/create: {"category": "spot", "symbol": "BTCUSDT", "side": "Buy", "orderType": "Market", "qty": "1000", "marketunit": "quoteCoin", "timeInForce": "IOC", "orderLinkId": "spot-test-mainnet-algo", "isLeverage": 0, "orderFilter": "Order"}.

In [65]:
exchange_spot = ccxt.bybit({
    'apiKey': api_key_,
    'secret': api_secret_,
    'enableRateLimit': True,
    'options': {
        'defaultType': 'spot',
    },
    'urls': {
        'api': {
            'public': 'https://api-demo.bybit.com',
            'private': 'https://api-demo.bybit.com',
        }
    }
})

def get_bitcoin_signal():
    # Здесь вызывается ваша модель, которая предсказывает вероятность роста
    # Например:
    # prediction = model.predict_proba(input_data)
    # Вероятность роста цены (пример)
    probability_of_increase = 0.8  # заменить на реальную вероятность от модели
    return probability_of_increase

def place_order(amount):
    try:
        order = exchange_spot.create_market_buy_order('BTC/USDT', amount)
        print("Покупка совершена:", order)
    except Exception as e:
        print("Ошибка при размещении ордера:", e)

def trading_bot(threshold=0.7, amount=0.001):
    while True:
        probability = get_bitcoin_signal()
        if probability >= threshold:
            print(f"Сигнал на покупку! Вероятность роста: {probability}")
            place_order(amount)
        else:
            print(f"Нет сигнала на покупку. Текущая вероятность: {probability}")
        
        time.sleep(60)  # Проверка каждые 60 секунд (или любой другой интервал)

# Запуск бота
trading_bot()

Сигнал на покупку! Вероятность роста: 0.8
Ошибка при размещении ордера: bybit {"retCode":10032,"retMsg":"Demo trading are not supported.","result":{},"retExtInfo":{},"time":1730629842045}


KeyboardInterrupt: 