In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# 特徴量データをロード
df = pd.read_csv('bybit_btcusdt_20210101-20230330_features.csv', index_col=0)

In [3]:

# ターゲット変数の作成
price_change = (df['Close'].shift(-1) / df['Close'] - 1) * 100
conditions = [
    (price_change >= 0.1),
    (price_change <= -0.1)
]
choices = ['up', 'down']
df['target'] = np.select(conditions, choices, default='neutral')

df['target'], _ = pd.factorize(df['target'])

# 不要な列を削除
df = df.drop(['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'], axis=1).dropna()


In [10]:
# データセットをトレーニングセットとテストセットに分割
X = df.drop('target', axis=1)
y = df['target']

# データセットの長さ
n = len(df)

# トレーニングセットとテストセットの分割点
split_point = int(n * 0.8)

# トレーニングデータとテストデータに分割
X_train = X[:split_point]
X_test = X[split_point:]
y_train = y[:split_point]
y_test = y[split_point:]


In [11]:
# LightGBMモデルのトレーニング
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)

params = {
    'objective': 'multiclass',
    'num_class': 3,
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9
}

model = lgb.train(params, train_data, valid_sets=[test_data], num_boost_round=1000, early_stopping_rounds=100)

# 予測と評価
y_pred = model.predict(X_test)
y_pred_class = [np.argmax(pred) for pred in y_pred]
class_map = {'up': 0, 'neutral': 1, 'down': 2}
y_test_mapped = y_test.map(class_map).values





You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3060
[LightGBM] [Info] Number of data points in the train set: 943536, number of used features: 12
[LightGBM] [Info] Start training from score -0.234873
[LightGBM] [Info] Start training from score -2.265279
[LightGBM] [Info] Start training from score -2.248784
[1]	valid_0's multi_logloss: 0.39618
Training until validation scores don't improve for 100 rounds
[2]	valid_0's multi_logloss: 0.38684
[3]	valid_0's multi_logloss: 0.37911
[4]	valid_0's multi_logloss: 0.371291
[5]	valid_0's multi_logloss: 0.364143
[6]	valid_0's multi_logloss: 0.358771
[7]	valid_0's multi_logloss: 0.352584
[8]	valid_0's multi_logloss: 0.346864
[9]	valid_0's multi_logloss: 0.341622
[10]	valid_0's multi_logloss: 0.336707
[11]	valid_0's multi_logloss: 0.332261
[12]	valid_0's multi_logloss: 0.328042
[13]	valid_0's multi_logloss: 0.324213
[14]	valid_0's multi_logloss: 0

In [12]:
accuracy = accuracy_score(y_test, y_pred_class)
print(f"Accuracy: {accuracy}")

print("Classification report:")
print(classification_report(y_test, y_pred_class))

Accuracy: 0.9158060741720506
Classification report:
              precision    recall  f1-score   support

           0       0.92      1.00      0.96    215857
           1       0.41      0.02      0.03     10143
           2       0.38      0.05      0.08      9884

    accuracy                           0.92    235884
   macro avg       0.57      0.35      0.36    235884
weighted avg       0.87      0.92      0.88    235884



In [13]:
import pickle
filename = 'btcusdr_lightgbm_model.pkl'
pickle.dump(model,open(filename,'wb'))