In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd
import numpy as np
import time
import gc

# from sklearn.linear_model import RidgeCV, Ridge
# from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split

from lightgbm import LGBMClassifier
# from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from scipy.stats.stats import pearsonr
from tqdm import tqdm
import warnings
warnings.simplefilter("ignore")

# データフレームcolumの全表示
pd.set_option("display.max_columns", None)

In [2]:
from label import label

In [3]:
df_train = label('../binance-BTCUSDT-1m.csv', t=1)

In [4]:
df_train = df_train.rename(columns={'Time_UTC_Start':'datetime', 'Number_of_trades':'count'})

In [5]:
df_train['Close'].ewm(alpha=2/3).mean()

Timestamp
1531910040000     7385.000000
1531910100000     7380.635000
1531910160000     7374.982308
1531910220000     7374.285500
1531910280000     7371.416694
                     ...     
1651909740000    35914.984726
1651909800000    35919.968242
1651909860000    35933.329414
1651909920000    35948.783138
1651909980000    35950.721046
Name: Close, Length: 2000000, dtype: float64

In [6]:
df_train['Close']

Timestamp
1531910040000     7385.00
1531910100000     7379.18
1531910160000     7372.47
1531910220000     7373.95
1531910280000     7370.00
                   ...   
1651909740000    35913.04
1651909800000    35922.46
1651909860000    35940.01
1651909920000    35956.51
1651909980000    35951.69
Name: Close, Length: 2000000, dtype: float64

In [7]:
df_train['Close'].rolling(2).mean()

Timestamp
1531910040000          NaN
1531910100000     7382.090
1531910160000     7375.825
1531910220000     7373.210
1531910280000     7371.975
                   ...    
1651909740000    35916.520
1651909800000    35917.750
1651909860000    35931.235
1651909920000    35948.260
1651909980000    35954.100
Name: Close, Length: 2000000, dtype: float64

In [8]:
# Two new features from the competition tutorial
def upper_shadow(df):
    return df["High"] - np.maximum(df["Close"], df["Open"])

def lower_shadow(df):
    return np.minimum(df["Close"], df["Open"]) - df["Low"]

## notebook: crypto-prediction-technical-analysis-features
def SM_A_M(df, colname, n):
    mean = df[colname].rolling(window=n).mean()
    median = df[colname].rolling(window=n).median()
    
    return mean, median

def EMA1(df, colname, n):
    """
    https://qiita.com/MuAuan/items/b08616a841be25d29817
    """
    a= 2/(n+1)
    return df[colname].ewm(alpha=a).mean()

def MACD(df, colname, span1=12, span2=26, span3=9):
    """
    Compute MACD
    # https://www.learnpythonwithrune.org/pandas-calculate-the-moving-average-convergence-divergence-macd-for-a-stock/
    """
    
    exp1 = EMA1(df, colname, span1)
    exp2 = EMA1(df, colname, span2)
    macd = 100 * (exp1 - exp2) / exp2
    signal = macd.ewm(alpha=2./(span3+1)).mean() ##EMA1(df, colname, macd, span3)

    return macd, signal

def BollingerBand(df, colname, window, no_of_std):
    mean = df[colname].rolling(window=window).mean()
    std = df[colname].rolling(window=window).std() 
    bb_high = mean + no_of_std * std
    bb_low = mean - no_of_std * std
    
    return bb_high, bb_low, std

def rsiFunc(df, colname, n=14):
    prices = df[colname].values
    
    deltas = np.diff(prices)
    seed = deltas[:n+1]
    up = seed[seed>=0].sum()/n
    down = -seed[seed<0].sum()/n
    rs = up/down
    rsi = np.zeros_like(prices)
    rsi[:n] = 100. - 100./(1.+rs)

    for i in range(n, len(prices)):
        delta = deltas[i-1] # cause the diff is 1 shorter

        if delta>0:
            upval = delta
            downval = 0.
        else:
            upval = 0.
            downval = -delta

        up = (up*(n-1) + upval)/n
        down = (down*(n-1) + downval)/n

        rs = up/down
        rsi[i] = 100. - 100./(1.+rs)

    return rsi

## https://stackoverflow.com/questions/42138357/pandas-rolling-slope-calculation
def calc_slope(x):
    slope = np.polyfit(range(len(x)), x, 1)[0]
    return slope

In [9]:
def get_feat(df):
    df["High-Low"] = df["High"] - df["Low"]
    df["Close-Open"] = df["Close"] - df["Open"]

    df = df.drop(["datetime", "Open", "High", "Low", "Timestamp End", "Quote_asset_volume", "Taker_buy_base_asset_volume", "Taker_buy_quote_asset_volume"], axis=1)
    gc.collect()
    # df_train = reduce_mem_usage(df_train)
    # df_train.describe()

    macd, signal = MACD(df=df, colname="Close")
    df['MACD_' + "Close" + '_macd'] = macd
    df['MACD_' + "Close" + '_signal'] = signal
    print(macd)
    print(signal)

    df['MACD_' + "Close" + '_macd'] = df['MACD_' + "Close" + '_macd'].astype(np.float32)
    df['MACD_' + "Close" + '_signal'] = df['MACD_' + "Close" + '_signal'].astype(np.float32)

    arr_n = [5, 10, 20, 40, 100]
    # arr_cols = ["Close", "VWAP", "Volume"]
    arr_cols = ["Close", "Volumn"]
    no_of_std = 2.5

    for colname in arr_cols: 
        print("colname = " + colname)
        for n in arr_n:
            print("n = "+ str(n))
            df['label_lag_' + str(n)] = df['label'].shift(n)
            df['label_lag_' + str(n)].fillna(value=df["label"], inplace=True)

            print("SMA")
            mean, median = SM_A_M(df=df, colname=colname, n=n)
            df['SM_' + colname + '_mean_' + str(n)] = mean
            df['SM_' + colname + '_median_' + str(n)] = median

            df['SM_' + colname + '_mean_' + str(n)] = df['SM_' + colname + '_mean_' + str(n)].astype(np.float32)
            df['SM_' + colname + '_median_' + str(n)] = df['SM_' + colname + '_median_' + str(n)].astype(np.float32)

            df['SM_' + colname + '_mean_' + str(n)].fillna(value=df[colname], inplace=True)
            df['SM_' + colname + '_median_' + str(n)].fillna(value=df[colname], inplace=True)
            

            print("EMA1")
            ewmean = EMA1(df=df_train, colname=colname, n=n)
            df['EWM_' + colname + '_mean_' + str(n)] = ewmean
            df['EWM_' + colname + '_mean_' + str(n)] = df['EWM_' + colname + '_mean_' + str(n)].astype(np.float32)
            
            if colname in ['VWAP', 'Volume']:
                continue

            if n==5:
                continue
                
            print("RSI")
            rsi = rsiFunc(df=df,  colname=colname, n=n)
            df['RSI_' + str(n)] = rsi
            df['RSI_' + str(n)] = df['RSI_' + str(n)].astype(np.float32)
            

            print("BollingerBand")
            bb_high, bb_low, std = BollingerBand(df=df, colname=colname, window=n, no_of_std=no_of_std)
            df['SM_' + colname + '_std_' + str(n)] = std
            df['SM_' + colname + '_BB_High_' + str(n)] = bb_high
            df['SM_' + colname + '_BB_Low_' + str(n)] = bb_low

            df['SM_' + colname + '_std_' + str(n)] = df['SM_' + colname + '_std_' + str(n)].astype(np.float32)
            df['SM_' + colname + '_BB_High_' + str(n)] = df['SM_' + colname + '_BB_High_' + str(n)].astype(np.float32)
            df['SM_' + colname + '_BB_Low_' + str(n)] = df['SM_' + colname + '_BB_Low_' + str(n)].astype(np.float32)
            
            gc.collect()
            
    df = df.dropna()

    df.head()

    return df

In [10]:
df_train = get_feat(df=df_train)
df_train.memory_usage(deep=True)

Timestamp
1531910040000    0.000000
1531910100000   -0.001769
1531910160000   -0.005075
1531910220000   -0.005731
1531910280000   -0.008098
                   ...   
1651909740000    0.049452
1651909800000    0.048766
1651909860000    0.051570
1651909920000    0.056843
1651909980000    0.059254
Name: Close, Length: 2000000, dtype: float64
Timestamp
1531910040000    0.000000
1531910100000   -0.000983
1531910160000   -0.002660
1531910220000   -0.003700
1531910280000   -0.005008
                   ...   
1651909740000    0.049707
1651909800000    0.049519
1651909860000    0.049929
1651909920000    0.051312
1651909980000    0.052901
Name: Close, Length: 2000000, dtype: float64
colname = Close
n = 5
SMA
EMA1
n = 10
SMA
EMA1
RSI
BollingerBand
n = 20
SMA
EMA1
RSI
BollingerBand
n = 40
SMA
EMA1
RSI
BollingerBand
n = 100
SMA
EMA1
RSI
BollingerBand
colname = Volumn
n = 5
SMA
EMA1
n = 10
SMA
EMA1
RSI
BollingerBand
n = 20
SMA
EMA1
RSI
BollingerBand
n = 40
SMA
EMA1
RSI
BollingerBand
n = 100
SMA
EMA1

Index                    15999208
Close                    15999208
Volumn                   15999208
count                    15999208
label                    15999208
                           ...   
SM_Volumn_median_100      7999604
EWM_Volumn_mean_100       7999604
SM_Volumn_std_100         7999604
SM_Volumn_BB_High_100     7999604
SM_Volumn_BB_Low_100      7999604
Length: 72, dtype: int64

In [11]:
df_train

Unnamed: 0_level_0,Close,Volumn,count,label,High-Low,Close-Open,MACD_Close_macd,MACD_Close_signal,label_lag_5,SM_Close_mean_5,SM_Close_median_5,EWM_Close_mean_5,label_lag_10,SM_Close_mean_10,SM_Close_median_10,EWM_Close_mean_10,RSI_10,SM_Close_std_10,SM_Close_BB_High_10,SM_Close_BB_Low_10,label_lag_20,SM_Close_mean_20,SM_Close_median_20,EWM_Close_mean_20,RSI_20,SM_Close_std_20,SM_Close_BB_High_20,SM_Close_BB_Low_20,label_lag_40,SM_Close_mean_40,SM_Close_median_40,EWM_Close_mean_40,RSI_40,SM_Close_std_40,SM_Close_BB_High_40,SM_Close_BB_Low_40,label_lag_100,SM_Close_mean_100,SM_Close_median_100,EWM_Close_mean_100,RSI_100,SM_Close_std_100,SM_Close_BB_High_100,SM_Close_BB_Low_100,SM_Volumn_mean_5,SM_Volumn_median_5,EWM_Volumn_mean_5,SM_Volumn_mean_10,SM_Volumn_median_10,EWM_Volumn_mean_10,SM_Volumn_std_10,SM_Volumn_BB_High_10,SM_Volumn_BB_Low_10,SM_Volumn_mean_20,SM_Volumn_median_20,EWM_Volumn_mean_20,SM_Volumn_std_20,SM_Volumn_BB_High_20,SM_Volumn_BB_Low_20,SM_Volumn_mean_40,SM_Volumn_median_40,EWM_Volumn_mean_40,SM_Volumn_std_40,SM_Volumn_BB_High_40,SM_Volumn_BB_Low_40,SM_Volumn_mean_100,SM_Volumn_median_100,EWM_Volumn_mean_100,SM_Volumn_std_100,SM_Volumn_BB_High_100,SM_Volumn_BB_Low_100
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1
1531915980000,7439.27,82.913824,200,1.0,6.44,3.79,0.059198,0.039432,0.0,7434.259766,7435.970215,7434.494629,1.0,7429.705078,7428.410156,7430.798340,65.292259,6.791718,7446.684082,7412.725586,1.0,7424.872070,7423.415039,7426.918457,58.755001,7.103924,7442.631836,7407.112305,1.0,7423.572266,7421.995117,7422.477051,54.768734,6.733529,7440.406250,7406.738281,1.0,7408.281250,7407.904785,7415.750000,49.589909,16.262754,7448.938477,7367.624512,42.038719,32.346714,48.320835,31.033089,26.666887,38.643131,21.999096,86.030830,-23.964653,30.340084,25.406355,33.373032,17.909540,75.113937,-14.433765,30.455532,26.537527,30.277847,16.742907,72.312798,-11.401732,26.558323,23.831718,27.842983,13.729911,60.883102,-7.766454
1531916040000,7434.94,36.244578,217,0.0,8.72,-4.32,0.060352,0.043616,1.0,7436.332031,7435.970215,7434.643066,1.0,7430.306152,7431.415039,7431.551270,50.623158,6.978852,7447.752930,7412.858887,0.0,7425.864258,7424.294922,7427.682617,50.868076,7.051783,7443.494141,7408.234863,0.0,7423.996094,7422.930176,7423.088867,50.499691,6.904397,7441.256836,7406.734863,0.0,7408.780762,7408.500000,7416.187988,47.874619,16.307325,7449.549316,7368.012695,47.026127,36.244579,44.295418,32.089462,29.950508,38.207031,21.967136,87.007301,-22.828377,30.932261,26.666887,33.646523,17.898630,75.678833,-13.814313,30.162920,26.537527,30.570784,16.530264,71.488579,-11.162738,26.645376,23.831718,28.034796,13.763749,61.054749,-7.763998
1531916100000,7438.95,16.747995,131,0.0,6.55,-2.06,0.064871,0.047867,1.0,7436.928223,7436.009766,7436.078613,1.0,7431.418945,7435.205078,7432.896484,45.842426,7.412379,7449.949707,7412.888184,1.0,7426.879883,7424.785156,7428.755859,48.032738,7.409941,7445.404785,7408.354980,1.0,7424.495117,7423.524902,7423.867188,48.867607,7.246148,7442.610352,7406.379395,0.0,7409.378418,7409.290039,7416.706055,47.185886,16.306784,7450.145508,7368.611328,40.100235,32.346714,35.112942,31.767323,29.950508,34.305389,22.187140,87.235176,-23.700527,31.065210,26.666887,32.037079,17.776392,75.506195,-13.375770,30.124846,26.537527,29.892368,16.560083,71.525055,-11.275362,26.643911,23.831718,27.777891,13.764805,61.055923,-7.768100
1531916160000,7437.24,21.542094,160,1.0,5.35,0.54,0.065835,0.051461,1.0,7437.173828,7437.240234,7436.465820,0.0,7432.354004,7435.720215,7433.686035,47.204643,7.506873,7451.121094,7413.586914,1.0,7427.691895,7426.404785,7429.563477,48.771820,7.618556,7446.738281,7408.645508,1.0,7424.950195,7424.169922,7424.523438,49.281006,7.462794,7443.607422,7406.293457,0.0,7410.026367,7409.790039,7417.172363,47.373932,16.111130,7450.304199,7369.748535,37.939312,32.248066,30.589327,33.191029,29.950508,31.984791,20.860132,85.341354,-18.959303,31.037937,26.666887,31.037523,17.791304,75.516197,-13.440320,30.259647,26.537527,29.482666,16.465084,71.422356,-10.903062,26.417751,23.677235,27.636375,13.659525,60.566563,-7.731062
1531916220000,7437.24,17.994306,168,1.0,2.25,0.18,0.065839,0.054336,0.0,7437.527832,7437.240234,7436.724121,0.0,7433.795898,7435.990234,7434.332520,46.248131,6.826097,7450.861328,7416.730957,0.0,7428.655273,7427.854980,7430.294922,48.237411,7.542462,7447.511719,7409.799316,1.0,7425.134766,7424.169922,7425.146973,48.980438,7.675484,7444.323242,7405.945801,1.0,7410.659180,7410.234863,7417.626465,47.248180,15.921627,7450.463379,7370.854980,35.088558,21.542093,26.390987,33.037552,29.950508,29.441065,20.977135,85.480385,-19.405283,30.118820,25.406355,29.795275,17.974861,75.055977,-14.818336,29.634155,25.406355,28.919151,16.443357,70.742546,-11.474238,26.136395,23.520903,27.418186,13.538627,59.982960,-7.710172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1651909740000,35913.04,12.096760,388,0.0,10.47,-6.96,0.049452,0.049707,1.0,35916.605469,35914.511719,35916.007812,0.0,35914.171875,35913.773438,35911.628906,47.161858,10.453337,35940.304688,35888.035156,0.0,35895.457031,35902.289062,35898.386719,48.674694,26.914511,35962.746094,35828.171875,0.0,35869.773438,35866.496094,35878.890625,49.401031,35.865208,35959.437500,35780.109375,1.0,35847.273438,35845.218750,35858.195312,49.796608,35.097454,35935.019531,35759.531250,18.089405,17.753981,15.672263,18.838835,15.430695,18.370110,12.313526,49.622650,-11.944981,21.578747,15.430695,20.174868,15.616542,60.620102,-17.462606,28.472692,12.232645,21.230450,52.949715,160.846970,-103.901588,19.439632,11.527030,20.185188,34.728283,106.260345,-67.381081
1651909800000,35922.46,24.311140,575,1.0,9.44,9.42,0.048766,0.049519,0.0,35918.496094,35920.000000,35918.160156,1.0,35915.601562,35914.851562,35913.597656,51.393059,10.515374,35941.890625,35889.316406,1.0,35899.656250,35903.816406,35900.679688,50.434235,23.941774,35959.511719,35839.804688,0.0,35872.835938,35867.089844,35881.019531,50.248989,34.972687,35960.265625,35785.402344,0.0,35847.714844,35845.218750,35859.468750,50.183376,35.763088,35937.125000,35758.308594,16.859699,17.753981,18.551889,20.769255,21.032560,19.450296,11.382079,49.224453,-7.685943,22.281792,20.016520,20.568798,15.394628,60.768360,-16.204779,20.937586,12.232645,21.380728,21.918381,75.733536,-33.858368,19.612314,11.643090,20.266890,34.708946,106.384682,-67.160057
1651909860000,35940.01,72.302490,782,1.0,27.73,17.55,0.051570,0.049929,1.0,35922.003906,35920.000000,35925.441406,0.0,35920.019531,35917.593750,35918.398438,63.984055,10.568214,35946.441406,35893.597656,1.0,35904.160156,35906.511719,35904.421875,56.589298,22.533199,35960.492188,35847.824219,1.0,35875.988281,35867.730469,35883.894531,53.396305,35.204918,35964.000000,35787.976562,1.0,35848.222656,35845.218750,35861.062500,51.661346,36.703964,35939.980469,35756.460938,27.769402,24.311140,36.468758,26.688763,24.384451,29.059786,19.472288,75.369484,-21.991959,25.590206,23.295099,25.495817,18.532063,71.920364,-20.739950,19.721922,12.232645,23.864716,17.034767,62.308838,-22.864996,20.230093,11.814675,21.297298,35.093224,107.963150,-67.502968
1651909920000,35956.51,44.738440,517,1.0,16.51,16.50,0.056843,0.051312,0.0,35930.402344,35922.460938,35935.796875,1.0,35925.183594,35921.230469,35925.328125,54.907322,14.303555,35960.941406,35889.421875,0.0,35909.671875,35910.570312,35909.382812,52.637436,21.064154,35962.332031,35857.011719,1.0,35879.554688,35868.878906,35887.437500,51.477924,35.966949,35969.472656,35789.636719,1.0,35848.882812,35845.218750,35862.953125,50.787186,38.041328,35943.984375,35753.777344,31.825539,24.311140,39.225319,27.136190,24.384451,31.910450,19.866297,76.801933,-22.529551,27.481825,24.384451,27.328447,18.455212,73.619858,-18.656206,20.814461,12.689160,24.882946,17.206213,63.829994,-22.201073,20.644045,11.968320,21.761478,35.136135,108.484383,-67.196297


In [12]:
feat = list(df_train.columns[6:])

In [13]:
feat.remove('MACD_Close_macd')
feat.remove('MACD_Close_signal')

In [14]:
X, y = df_train[feat], df_train['label']
X_train, X_valid, X_test = X[:int(df_train.shape[0]*0.7)], X[int(df_train.shape[0]*0.7):int(df_train.shape[0]*0.85)], X[int(df_train.shape[0]*0.85):]
y_train, y_valid, y_test = y[:int(df_train.shape[0]*0.7)], y[int(df_train.shape[0]*0.7):int(df_train.shape[0]*0.85)], y[int(df_train.shape[0]*0.85):]

In [15]:
params = {'objective': 'binary',
                      'metric': ['auc', 'binary_error', 'binary_logloss'],
                      'is_unbalance':True,
                      'boosting':'gbdt',
                      'num_boost_round':1000,
                      'early_stopping_rounds':10,
                      'learning_rate': 0.05,
                      'max_depth': 16,
                      'num_leaves': 32,
                      'feature_fraction': 0.8,
                      'verbosity': 1,
                      'subsample' : 0.6058272745943716}

In [16]:
import lightgbm as lgb
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)

In [17]:
model = lgb.train(params, train_data,                     
                  valid_sets=[train_data, valid_data],
                  valid_names=['train', 'valid'])

[LightGBM] [Info] Number of positive: 695468, number of negative: 704462
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14800
[LightGBM] [Info] Number of data points in the train set: 1399930, number of used features: 63
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496788 -> initscore=-0.012849
[LightGBM] [Info] Start training from score -0.012849
[1]	train's auc: 0.516043	train's binary_error: 0.496358	train's binary_logloss: 0.693064	valid's auc: 0.512847	valid's binary_error: 0.494765	valid's binary_logloss: 0.693053
Training until validation scores don't improve for 10 rounds
[2]	train's auc: 0.518213	train's binary_error: 0.494497	train's binary_logloss: 0.693003	valid's auc: 0.513516	valid's binary_error: 0.493228	valid's binary_logloss: 0.693
[3]	train's auc: 0.519308	train's binary_error: 0.493641	train's binary_logloss: 0.692964	valid's auc: 0.512618	valid's binary_error: 0.492915	valid's binary_logloss: 0.692982
[4]	train's auc: 0.

In [18]:
importance_df = (
    pd.DataFrame({
        'feature_name': model.feature_name(),
        'importance_gain': model.feature_importance(importance_type='gain'),
        'importance_split': model.feature_importance(importance_type='split'),
    })
    .sort_values('importance_gain', ascending=False)
    .reset_index(drop=True)
)
importance_df.head(50)

Unnamed: 0,feature_name,importance_gain,importance_split
0,RSI_10,2195.131901,27
1,RSI_20,1746.734885,24
2,SM_Volumn_median_5,1744.238592,20
3,SM_Close_std_10,1508.977909,41
4,label_lag_5,1330.129906,24
5,RSI_40,1267.417009,20
6,SM_Volumn_mean_5,953.9052,24
7,label_lag_10,494.804297,17
8,RSI_100,492.499188,18
9,SM_Close_std_20,444.595899,16


In [19]:
from sklearn.metrics import roc_auc_score 

In [21]:
roc_auc_score(y_test, model.predict(X_test))

0.5091654403268818