In [None]:
'''
Group members:
Bohao XU        ETU20211498
Jijun TAN       ETU20211472
Yilin ZHANG     ETU20211520
Junxin HUANG    ETU20211420
'''

In [50]:
import os
import pandas as pd
import numpy as np
import lightgbm as lgb
import time
import datetime
from datetime import datetime
import gc
import csv
import optuna

seed0 = 8586

creator='group4' # for create a certain folder since we 4 group members work in the same server
TRAIN_CSV = '2018_2022_Cleaned_dataset_with_CloseVolumeTarget.csv'
ASSET_DETAILS_CSV = 'asset_details.csv'
ASSET_ID_SELECTED_FOR_TRAIN = [for i in range(14)] # when you train a certain model for asset_id=x, write x in this tuple like (x,)
FEATURES_EXCLUDE = [] # for those features need to be excluded for training
N_TRIALS = 10 # 10 means that the bayes optimizer would iterate for 10 times

pd.set_option('display.max_rows', 6)
pd.set_option('display.max_columns', 350)
pd.set_option('mode.chained_assignment', None)

In [51]:
lags = [60,300,900]

# Range of hyperparameters to be tuned
param_ranges = {
'learning_rate': (0.15, 0.2), 
    'max_depth': (14, 18), 
    'num_leaves': (55, 85), 
    'lambda_l1': (3.2, 3.8), 
    'lambda_l2': (2.2, 2.8), 
    'max_bin': (900, 1100), 
    'min_data_in_leaf': (48, 80)
}

# Parameters that do not require tuning
params = {
    'early_stopping_rounds': 100,
    'objective': 'regression',
    'metric': 'correlation',
    'boosting_type': 'gbdt',
    'verbose': -1,
    'feature_fraction': 0.9,
    'seed':seed0,
    'feature_fraction_seed': seed0,
    'bagging_fraction_seed': seed0,
    'drop_seed': seed0,
    'data_random_seed': seed0,
    'extra_trees': True,
    'extra_seed': seed0,
    'zero_as_missing': True,
    "first_metric_only": True
         }


In [52]:
def reduce_mem_usage(df):
    """ 
    iterate through all the columns of a dataframe and modify the data type to reduce memory usage.
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype

        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

In [53]:
def MACD(df, short_term=12 * 30, long_term=26 * 30):
    '''Pass in a df and return the df with the 'MACD_id' columns'''
    tmp_col = ['EMA_short', 'EMA_long', 'DIF', 'DEA']
    for id in range(14):
        cols_to_select = [f'Close_{id}', 'timestamp', f'Target_{id}']
        df_selected = df[cols_to_select].sort_values('timestamp')

        # Calculate short-term (fast) moving averages（EMA12）
        df_selected['EMA_short'] = df_selected[f'Close_{id}'].ewm(span=short_term, adjust=False).mean()

        # Calculating long-term (slow) moving averages（EMA26）
        df_selected['EMA_long'] = df_selected[f'Close_{id}'].ewm(span=long_term, adjust=False).mean()

        # Calculation of deviation（DIF）
        df_selected['DIF'] = df_selected['EMA_short'] - df_selected['EMA_long']

        # Calculating the DEA using a 9-day exponential moving average
        df_selected['DEA'] = df_selected['DIF'].ewm(span=9, adjust=False).mean()

        # Calculating the MACD line
        df_selected[f'MACD_id{id}'] = df_selected['DIF'] - df_selected['DEA']

        # Merge back into the original DataFrame
        df = df.merge(df_selected[['timestamp', f'MACD_id{id}']], on='timestamp', how='left')

    # Check and delete temporary columns
    df.drop(columns=tmp_col, errors='ignore', inplace=True)
    return df

In [54]:
def VolumeRatio(df, mean_term=300):
    '''
    Pass in a df and return the df with the column 'Volume_Ratio_id'
    '''
    tmp_col = ['Volume_Mean']
    
    for id in range(14):
        # Calculate the 'Volume Mean' column, which represents the volume mean for the previous n days
        df[f'Volume_Mean'] = df[f'Volume_{id}'].rolling(window=mean_term).mean()
        
        # Calculate the 'Volume Ratio' column
        df[f'Volume_Ratio_id{id}'] = df[f'Volume_{id}'] / df[f'Volume_Mean']
    
    # Check and delete temporary columns
    df.drop(columns=tmp_col, errors='ignore', inplace=True)
    return df

In [55]:
def RSI(test_df, window_size=300):
    '''
    Pass in a df, return the df with the 'RSI_id' columns
    '''
    tmp_col = ['Price_Change', 'Gain', 'Loss', 'Avg_Gain', 'Avg_Loss', 'RS']
    
    for id in range(14):
        col_name = f'Close_{id}'
    
        # Calculate daily price changes
        test_df['Price_Change'] = test_df[col_name].diff()
    
        # Categorising price changes into upward and downward
        test_df['Gain'] = test_df['Price_Change'].apply(lambda x: x if x > 0 else 0)
        test_df['Loss'] = test_df['Price_Change'].apply(lambda x: -x if x < 0 else 0)
    
        # Calculation of the average of increase and decrease
        test_df['Avg_Gain'] = test_df['Gain'].rolling(window=window_size).mean()
        test_df['Avg_Loss'] = test_df['Loss'].rolling(window=window_size).mean()
    
        # Calculation of the relative strength index（RSI）
        test_df['RS'] = test_df['Avg_Gain'] / test_df['Avg_Loss']
        test_df[f'RSI_id{id}'] = 100 - (100 / (1 + test_df['RS']))
    
    test_df.drop(tmp_col, axis=1, inplace=True)
    return test_df

In [56]:
def get_features(df, train=True):   
    for id in range(14):    # 14
        for lag in lags: # 3
            # feature 1
            # Convolutional (smoothing) processing + fill in the vacant bits with 1 + roll rolls 1 to the top
            # This feature represents, for example, the logarithm of the price of asset 1 on day 60 versus the average price from day 1 - day 60
            df[f'log_close/mean_{lag}_id{id}'] = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.append(np.convolve( np.array(df[f'Close_{id}']), np.ones(lag)/lag, mode="valid"), np.ones(lag-1)), lag-1)  )
            # feature 2
            # Logarithmic returns at lagged lag period for asset with id=id
            # This feature represents, for example, the logarithm of the ratio of the price of asset 1 on day 60 to the price on day 1
            df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
    for lag in lags: #3
        # Feature A
        # This feature represents, for example, the mean of the 60th order lag of feature 1 for all assets. 
        df[f'mean_close/mean_{lag}'] =  np.mean(df.iloc[:,df.columns.str.startswith(f'log_close/mean_{lag}_id')], axis=1)
        # Feature B
        # This feature represents, for example, the mean of the 60th order lag of feature 2 for all assets. 
        df[f'mean_log_returns_{lag}'] = np.mean(df.iloc[:,df.columns.str.startswith(f'log_return_{lag}_id')] ,    axis=1)
        for id in range(14):
            # feature 5
            # This feature represents, for example, the difference between feature 1 and feature A of asset 1
            df[f'log_close/mean_{lag}-mean_close/mean_{lag}_id{id}'] = np.array( df[f'log_close/mean_{lag}_id{id}']) - np.array( df[f'mean_close/mean_{lag}']  )
            # feature NO.6
            # This feature represents, for example, the difference between feature 2 and feature B for asset 1
            df[f'log_return_{lag}-mean_log_returns_{lag}_id{id}']    = np.array( df[f'log_return_{lag}_id{id}'])     - np.array( df[f'mean_log_returns_{lag}'] )

    for id in range(14):
        df = df.drop([f'Close_{id}'], axis=1)
        df = df.drop([f'Volume_{id}'],axis=1)

    return df

In [57]:
def replace_below_first_non_empty_900_nan(column):
    first_non_empty_index = column.first_valid_index()
    
    if first_non_empty_index is not None:
        column.loc[first_non_empty_index:first_non_empty_index+900] = np.nan
    return column

In [58]:
def OverallPerformSave(current_time,formatted_time,params_range_set,adjusted_params,params,asset_id,train_score,valid_socre,feature_importance):
    '''This is a function that outputs training results in real time during the training process. It helps us save the results of each round of training to a csv file so that we can view the results at the end of the training (or during the process).'''
    Overall_Performance_file_path =  '/kaggle/working/' + f'training_result/{creator}/整体表现{ASSET_ID_SELECTED_FOR_TRAIN}.csv'
    with open(Overall_Performance_file_path,'a',newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        
        # write data
        csv_writer.writerow([current_time,formatted_time, str(params_range_set),str(adjusted_params),str(params), asset_id, train_score, valid_socre, feature_importance])

In [59]:
def CreateFolderForEachTraining(creator):
    '''
    This is the function used to create a folder where the csv files of the training results are stored, it creates a folder named after the current time before the start of each training session, under which all the files of this training session are stored.
    While creating the folder it creates a csv file which is written to the table header. This csv file is used to write the hyperparameters, training scores, feature importance etc. for the current round in real time during the training process.
    '''
    # Specify the name of the folder to be created
    folder_name = formatted_time

    # Create a folder in the current directory
    base_path = os.getcwd() 
    folder_path = os.path.join(base_path, f'training_result/{creator}/', folder_name)
    os.makedirs(folder_path)
    models_trained_path = folder_path + '/models_trained'
    os.makedirs(models_trained_path)
    
    Overall_Performance_file_path = '/kaggle/working/' + f'training_result/{creator}/整体表现{ASSET_ID_SELECTED_FOR_TRAIN}.csv'
    # Check if 'Overall_Performance.csv' exists
    if not os.path.isfile(Overall_Performance_file_path):
        # If the file does not exist, create and write the table header
        with open(Overall_Performance_file_path, 'w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile)
            csv_writer.writerow(['Output_time','Start_time', 'Params_range_set','Adjusted_Params','Params', 'Asset_id', 'Train_score', 'Valid_score','Feature_importance'])
        
    
    # Write hyperparameters
    params_file_path = os.path.join(base_path, f'training_result/{creator}/', folder_name,'params.txt')

    # Open the file and write the parameters
    with open(params_file_path, 'w') as params_file:
        for key, value in params.items():
            value_str = str(value)
            params_file.write(f"{key}: {value_str}\n")
    print(f"Parameters have been written to '{params_file_path}'.")

In [60]:
def totimestamp(x):
    '''This is a date-timestamp conversion tool'''
    return np.int32(time.mktime(datetime.datetime.strptime(x, "%d/%m/%Y").timetuple()))# totimestamp("21/06/2021")

In [61]:
def correlation(y_true, y_pred):
    '''
    This is a function that evaluates whether or not to let the lgbm model perform earlystopping, and it is called at the completion of each weak learner build, returning the correlation coefficient score of the strong learner that the whole model is composed of when iterating over the current weak learner. We want this score to be as large as possible, so the third value of return is True according to lgbm's rules.
    '''
    # Calculate your evaluation metrics here, assuming result is your evaluation result
    correlation, _ = np.corrcoef(y_true, y_pred)

    # Returns a tuple containing the name of the evaluation metric, the value of the metric, and information on whether a larger metric is better or not
    return 'correlation', correlation[1], True

In [62]:
def correlation_scorer(y_true, y_pred):
    '''This is a function used to evaluate the correlation coefficient scores of two columns of data'''
    correlation, _ = np.corrcoef(y_true, y_pred)
    return correlation[1]

In [63]:
def get_train_valid_splits(data,test_size):
    '''
    This function is called inside the my_custom_cv function as we train each asset_id
    It returns the timestamp of the training set and validation set for the asset_id currently being trained
    This timestamp will be passed to the my_custom_cv function for the final division of the training and validation set
    '''
    all_train_timestamps = data['timestamp'].unique()

    whole_length = len(all_train_timestamps)
    test_length = int(test_size * whole_length)

    test_split = all_train_timestamps[-test_length:]
    train_split = all_train_timestamps[:(whole_length-test_length)]

    train_test_zip = zip([train_split], [test_split])
    return train_test_zip

In [64]:
def my_custom_cv(df_proc, asset_id):
    '''
    This is the function used to divide the training set from the validation set
    It returns x_train, y_train, x_val, y_val
    '''
    # Filter out rows with non-missing values in the f'Target_{asset_id}' column of the target variable.
    # folder_name = formatted_time
    df_proc = df_proc.loc[(df_proc[f'Target_{asset_id}'] == df_proc[f'Target_{asset_id}'])]

    train_test_zip = get_train_valid_splits(df_proc,test_size=0.2)
    train_split, test_split = zip(*train_test_zip)
    train_split = [item for sublist in train_split for item in sublist]
    test_split = [item for sublist in test_split for item in sublist]
    gc.collect()

    train_split_index = df_proc['timestamp'].isin(train_split)
    test_split_index = df_proc['timestamp'].isin(test_split)

    # Obtain sample matrices and target variables for training and validation sets
    X_train = df_proc.loc[train_split_index, features]
    y_train = df_proc.loc[train_split_index, f'Target_{asset_id}']
    X_val = df_proc.loc[test_split_index, features]
    y_val = df_proc.loc[test_split_index, f'Target_{asset_id}']

    print(f"number of train data: {X_train.shape[0]}")
    print(f"number of val data:   {X_val.shape[0]}")

    yield X_train, y_train, X_val, y_val

In [65]:
def objective(trial):
    '''
    This is the OBJECTIVE function of the Bayesian optimiser, which is called during each round of model training
    It guides the Bayesian optimiser in the direction we want it to go (i.e. to improve the correlation_score)
    It returns the correlation_score on the validation set
    '''
    # Copy from global parameters
    local_params = params.copy()

    # Setting the parameters to be optimised
    local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
    local_params['max_depth'] = trial.suggest_int('max_depth', *param_ranges['max_depth'])
    local_params['num_leaves'] = trial.suggest_int('num_leaves', *param_ranges['num_leaves'])
    local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
    local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])
    local_params['max_bin'] = trial.suggest_int('max_bin', *param_ranges['max_bin'])
    local_params['min_data_in_leaf'] = trial.suggest_int('min_data_in_leaf', *param_ranges['min_data_in_leaf'])

    # Creating a LightGBM model
    model = lgb.LGBMRegressor(**local_params,n_estimators=2500)

    # Create 4 lists for receiving the true and predicted values for each fold (used to hold the results of a single round of training if k-fold cross-validation is not used)
    all_y_pred = []
    all_y_pred_on_train = []
    all_y_val = []
    all_y_train = []
    print('current_training_id:' + f'{asset_id}')
    print('current_Bayesian_trial:' + f'{trial_num} '+f'out of {N_TRIALS}')
    
    # Train each k-fold (or only 1 time if k-fold cross-validation is not used)
    for X_train, y_train, X_val, y_val in my_custom_cv(test_df,asset_id):
        model.fit(X_train, 
                  y_train,
                  eval_set=[(X_train, y_train), (X_val, y_val)], 
                  eval_names=['tr', 'vl'], 
                  eval_metric=correlation, )
        
        y_pred_on_train = model.predict(X_train)
        y_pred = model.predict(X_val)
        
        all_y_pred_on_train.extend(y_pred_on_train)
        all_y_pred.extend(y_pred)
        all_y_val.extend(y_val)
        all_y_train.extend(y_train)
    
    # Calculate the training set score and validation set score separately
    all_trainset_score = correlation_scorer(all_y_pred_on_train,all_y_train)
    all_validset_score = correlation_scorer(all_y_pred,all_y_val)
    
    # Get the hyperparameters for the current training, which are used to write to a csv file to record the training information
    adjusted_params = {}
    for key, value in trial.params.items():
        adjusted_params[key] = value
    
    # Get feature importance and feature names for writing to csv file to record training information
    feature_importance = model.feature_importances_
    feature_names = model.booster_.feature_name()
    # Merge into a two-dimensional list
    merged_list = list(zip(feature_names, feature_importance))
    # Sort by size of feature_importance
    sorted_list = sorted(merged_list, key=lambda x: x[1], reverse=True)
    # Convert to Dictionary
    result_string = dict(sorted_list)
    
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    OverallPerformSave(current_time,formatted_time,param_ranges,adjusted_params,params,asset_id,all_trainset_score,all_validset_score,result_string)
    print(f'training_set_score: {all_trainset_score}')
    print(f'validation_set_score: {all_validset_score}')
    return all_validset_score

In [66]:
#Reading a dataset
test_df = pd.read_csv(TRAIN_CSV)

In [67]:
# Cleaning out useless columns
test_df = test_df.drop('Unnamed: 0',axis=1)

In [68]:
test_df

Unnamed: 0,timestamp,Close_0,Target_0,Volume_0,Close_1,Target_1,Volume_1,Close_2,Target_2,Volume_2,Close_3,Target_3,Volume_3,Close_4,Target_4,Volume_4,Close_5,Target_5,Volume_5,Close_6,Target_6,Volume_6,Close_7,Target_7,Volume_7,Close_8,Target_8,Volume_8,Close_9,Target_9,Volume_9,Close_10,Target_10,Volume_10,Close_11,Target_11,Volume_11,Close_12,Target_12,Volume_12,Close_13,Target_13,Volume_13
0,1514764860,78.380000,-0.014399,78.380000,31.550062,-0.014643,31.550062,19.233005,-0.004218,19.233005,,,,,,,6626.713370,-0.013922,6626.713370,335.987856,-0.004809,335.987856,121.087310,-0.008264,121.087310,,,,411.896642,-0.009791,411.896642,,,,6.635710,,6.635710,,,,,,
1,1514764920,71.390000,-0.015875,71.390000,31.046432,-0.015037,31.046432,24.050259,-0.004079,24.050259,,,,,,,3277.475494,-0.014534,3277.475494,232.793141,-0.004441,232.793141,1.468019,-0.029902,1.468019,,,,3640.502706,-0.012991,3640.502706,,,,0.349420,-0.009690,0.349420,,,,,,
2,1514764980,1546.820000,-0.015410,1546.820000,55.061820,-0.010309,55.061820,42.676438,-0.002892,42.676438,,,,,,,5623.557585,-0.012546,5623.557585,174.138031,-0.004206,174.138031,76.163922,-0.030832,76.163922,,,,328.350286,-0.003572,328.350286,,,,1.189553,0.006567,1.189553,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2136957,1642982280,496.467537,,496.467537,49.867700,,49.867700,198.196485,,198.196485,430719.654610,,430719.654610,7.433686e+05,,7.433686e+05,14907.125055,,14907.125055,821.614133,,821.614133,1334.601131,,1334.601131,20511.345658,,20511.345658,790.632440,,790.632440,2.142748,,2.142748,21.597886,,21.597886,7.076212e+05,,7.076212e+05,1.301079e+06,,1.301079e+06
2136958,1642982340,368.684921,,368.684921,43.030556,,43.030556,76.954035,,76.954035,352929.794282,,352929.794282,7.287195e+05,,7.287195e+05,14559.759824,,14559.759824,749.548301,,749.548301,2410.205214,,2410.205214,9064.791667,,9064.791667,331.208442,,331.208442,3.985169,,3.985169,51.115980,,51.115980,1.988516e+05,,1.988516e+05,9.245588e+05,,9.245588e+05
2136959,1642982400,1490.571077,,1490.571077,110.053151,,110.053151,209.645675,,209.645675,249923.229256,,249923.229256,1.009688e+06,,1.009688e+06,202194.015531,,202194.015531,1057.765667,,1057.765667,6709.816310,,6709.816310,6019.269834,,6019.269834,1243.860266,,1243.860266,4.190290,,4.190290,74.397471,,74.397471,2.983796e+06,,2.983796e+06,8.988253e+05,,8.988253e+05


In [69]:
# Call custom feature engineering functions to get features
test_df = reduce_mem_usage(test_df)
test_df=MACD(test_df)
test_df=VolumeRatio(test_df)
test_df = RSI(test_df)
test_df=get_features(test_df)
test_df = reduce_mem_usage(test_df)

Memory usage of dataframe is 701.06 MB
Memory usage after optimization is: 260.86 MB
Decreased by 62.8%


  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_return_{lag}_id{id}']     = np.log( np.array(df[f'Close_{id}']) /  np.roll(np.array(df[f'Close_{id}']), lag)  )
  df[f'log_retur

Memory usage of dataframe is 2828.69 MB
Memory usage after optimization is: 2262.14 MB
Decreased by 20.0%


In [70]:
# Remove the first 900 non-null values (eliminating rows affected by the maximum lag order)
test_df.iloc[:, 15:] = test_df.iloc[:, 15:].apply(replace_below_first_non_empty_900_nan, axis=0)

In [71]:
# Delete the first 900 rows (because the targets in the first 900 rows can't be used for training because their corresponding feature values are all NaN)
test_df = test_df.iloc[900:]

In [72]:
# Leaving 132,961 rows of data from the last final 3 months (2021-10-24 - 2022-1-24) as the prediction set for the final inference stage.
# At this point the training data is ready, it is 2003099*231 df, named test_df
test_df = test_df.loc[test_df['timestamp']<1635004800,]
test_df = reduce_mem_usage(test_df)

Memory usage of dataframe is 2135.72 MB
Memory usage after optimization is: 2135.72 MB
Decreased by 0.0%


In [73]:
test_df

Unnamed: 0,timestamp,Target_0,Target_1,Target_2,Target_3,Target_4,Target_5,Target_6,Target_7,Target_8,Target_9,Target_10,Target_11,Target_12,Target_13,MACD_id0,MACD_id1,MACD_id2,MACD_id3,MACD_id4,MACD_id5,MACD_id6,MACD_id7,MACD_id8,MACD_id9,MACD_id10,MACD_id11,MACD_id12,MACD_id13,Volume_Ratio_id0,Volume_Ratio_id1,Volume_Ratio_id2,Volume_Ratio_id3,Volume_Ratio_id4,Volume_Ratio_id5,Volume_Ratio_id6,Volume_Ratio_id7,Volume_Ratio_id8,Volume_Ratio_id9,Volume_Ratio_id10,Volume_Ratio_id11,Volume_Ratio_id12,Volume_Ratio_id13,RSI_id0,RSI_id1,RSI_id2,RSI_id3,RSI_id4,RSI_id5,RSI_id6,RSI_id7,RSI_id8,RSI_id9,RSI_id10,RSI_id11,RSI_id12,RSI_id13,log_close/mean_60_id0,log_return_60_id0,log_close/mean_300_id0,log_return_300_id0,log_close/mean_900_id0,log_return_900_id0,log_close/mean_60_id1,log_return_60_id1,log_close/mean_300_id1,log_return_300_id1,log_close/mean_900_id1,log_return_900_id1,log_close/mean_60_id2,log_return_60_id2,log_close/mean_300_id2,log_return_300_id2,log_close/mean_900_id2,log_return_900_id2,log_close/mean_60_id3,log_return_60_id3,log_close/mean_300_id3,log_return_300_id3,log_close/mean_900_id3,log_return_900_id3,log_close/mean_60_id4,log_return_60_id4,log_close/mean_300_id4,log_return_300_id4,log_close/mean_900_id4,log_return_900_id4,log_close/mean_60_id5,log_return_60_id5,log_close/mean_300_id5,log_return_300_id5,log_close/mean_900_id5,log_return_900_id5,log_close/mean_60_id6,log_return_60_id6,log_close/mean_300_id6,log_return_300_id6,log_close/mean_900_id6,log_return_900_id6,log_close/mean_60_id7,log_return_60_id7,log_close/mean_300_id7,log_return_300_id7,log_close/mean_900_id7,log_return_900_id7,log_close/mean_60_id8,log_return_60_id8,log_close/mean_300_id8,log_return_300_id8,log_close/mean_900_id8,log_return_900_id8,log_close/mean_60_id9,log_return_60_id9,log_close/mean_300_id9,log_return_300_id9,log_close/mean_900_id9,log_return_900_id9,log_close/mean_60_id10,log_return_60_id10,log_close/mean_300_id10,log_return_300_id10,log_close/mean_900_id10,log_return_900_id10,log_close/mean_60_id11,log_return_60_id11,log_close/mean_300_id11,log_return_300_id11,log_close/mean_900_id11,log_return_900_id11,log_close/mean_60_id12,log_return_60_id12,log_close/mean_300_id12,log_return_300_id12,log_close/mean_900_id12,log_return_900_id12,log_close/mean_60_id13,log_return_60_id13,log_close/mean_300_id13,log_return_300_id13,log_close/mean_900_id13,log_return_900_id13,mean_close/mean_60,mean_log_returns_60,log_close/mean_60-mean_close/mean_60_id0,log_return_60-mean_log_returns_60_id0,log_close/mean_60-mean_close/mean_60_id1,log_return_60-mean_log_returns_60_id1,log_close/mean_60-mean_close/mean_60_id2,log_return_60-mean_log_returns_60_id2,log_close/mean_60-mean_close/mean_60_id3,log_return_60-mean_log_returns_60_id3,log_close/mean_60-mean_close/mean_60_id4,log_return_60-mean_log_returns_60_id4,log_close/mean_60-mean_close/mean_60_id5,log_return_60-mean_log_returns_60_id5,log_close/mean_60-mean_close/mean_60_id6,log_return_60-mean_log_returns_60_id6,log_close/mean_60-mean_close/mean_60_id7,log_return_60-mean_log_returns_60_id7,log_close/mean_60-mean_close/mean_60_id8,log_return_60-mean_log_returns_60_id8,log_close/mean_60-mean_close/mean_60_id9,log_return_60-mean_log_returns_60_id9,log_close/mean_60-mean_close/mean_60_id10,log_return_60-mean_log_returns_60_id10,log_close/mean_60-mean_close/mean_60_id11,log_return_60-mean_log_returns_60_id11,log_close/mean_60-mean_close/mean_60_id12,log_return_60-mean_log_returns_60_id12,log_close/mean_60-mean_close/mean_60_id13,log_return_60-mean_log_returns_60_id13,mean_close/mean_300,mean_log_returns_300,log_close/mean_300-mean_close/mean_300_id0,log_return_300-mean_log_returns_300_id0,log_close/mean_300-mean_close/mean_300_id1,log_return_300-mean_log_returns_300_id1,log_close/mean_300-mean_close/mean_300_id2,log_return_300-mean_log_returns_300_id2,log_close/mean_300-mean_close/mean_300_id3,log_return_300-mean_log_returns_300_id3,log_close/mean_300-mean_close/mean_300_id4,log_return_300-mean_log_returns_300_id4,log_close/mean_300-mean_close/mean_300_id5,log_return_300-mean_log_returns_300_id5,log_close/mean_300-mean_close/mean_300_id6,log_return_300-mean_log_returns_300_id6,log_close/mean_300-mean_close/mean_300_id7,log_return_300-mean_log_returns_300_id7,log_close/mean_300-mean_close/mean_300_id8,log_return_300-mean_log_returns_300_id8,log_close/mean_300-mean_close/mean_300_id9,log_return_300-mean_log_returns_300_id9,log_close/mean_300-mean_close/mean_300_id10,log_return_300-mean_log_returns_300_id10,log_close/mean_300-mean_close/mean_300_id11,log_return_300-mean_log_returns_300_id11,log_close/mean_300-mean_close/mean_300_id12,log_return_300-mean_log_returns_300_id12,log_close/mean_300-mean_close/mean_300_id13,log_return_300-mean_log_returns_300_id13,mean_close/mean_900,mean_log_returns_900,log_close/mean_900-mean_close/mean_900_id0,log_return_900-mean_log_returns_900_id0,log_close/mean_900-mean_close/mean_900_id1,log_return_900-mean_log_returns_900_id1,log_close/mean_900-mean_close/mean_900_id2,log_return_900-mean_log_returns_900_id2,log_close/mean_900-mean_close/mean_900_id3,log_return_900-mean_log_returns_900_id3,log_close/mean_900-mean_close/mean_900_id4,log_return_900-mean_log_returns_900_id4,log_close/mean_900-mean_close/mean_900_id5,log_return_900-mean_log_returns_900_id5,log_close/mean_900-mean_close/mean_900_id6,log_return_900-mean_log_returns_900_id6,log_close/mean_900-mean_close/mean_900_id7,log_return_900-mean_log_returns_900_id7,log_close/mean_900-mean_close/mean_900_id8,log_return_900-mean_log_returns_900_id8,log_close/mean_900-mean_close/mean_900_id9,log_return_900-mean_log_returns_900_id9,log_close/mean_900-mean_close/mean_900_id10,log_return_900-mean_log_returns_900_id10,log_close/mean_900-mean_close/mean_900_id11,log_return_900-mean_log_returns_900_id11,log_close/mean_900-mean_close/mean_900_id12,log_return_900-mean_log_returns_900_id12,log_close/mean_900-mean_close/mean_900_id13,log_return_900-mean_log_returns_900_id13
900,1514818860,0.003670,-0.000046,-0.009682,,,0.026840,0.005878,0.027252,,-0.004345,,-0.000653,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
901,1514818920,0.000658,-0.001395,0.005344,,,0.032043,0.005554,0.019608,,0.000405,,0.011620,,,0.488525,0.099976,0.127075,,,11.289062,0.421143,0.331189,,1.437500,,0.048615,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.857422,0.415527,1.074219,3.695312,1.310547,3.330078,0.169434,0.998047,0.595703,1.240234,0.770508,1.057617,-0.846680,0.495605,-0.427979,1.048828,-0.279785,-0.594727,,,,,,,,,,,,,-0.150513,3.751953,0.409912,0.690918,0.621094,-0.249268,-0.369141,0.486084,-0.142944,-0.592773,-0.133667,-0.331055,-0.740234,1.807617,-0.625000,1.960938,-0.985352,5.675781,,,,,,,-0.486572,-0.113342,-0.091858,1.287109,-0.411377,-2.744141,,,,,,,-1.315619,2.146484,-0.837071,-0.014755,-0.754139,3.257812,,,,,,,,,,,,,-0.360293,1.248572,1.217481,-0.833078,0.529750,-0.250525,-0.486544,-0.752966,,,,,0.209732,2.503654,-0.008902,-0.762539,-0.379919,0.559472,,,-0.126272,-1.361931,,,-0.955327,0.897913,,,,,-0.005624,1.164455,1.080069,2.530615,0.601179,0.075779,-0.422472,-0.115627,,,,,0.415640,-0.473736,-0.137376,-1.757041,-0.619332,0.796323,,,-0.086261,0.122897,,,-0.831447,-1.179210,,,,,0.017286,1.175482,1.293650,2.155421,0.753307,-0.117865,-0.297075,-1.770209,,,,,0.603700,-1.424733,-0.151005,-1.506588,-1.002505,4.500334,,,-0.428646,-3.918690,,,-0.771426,2.082330,,,,
902,1514818980,0.002512,0.000497,0.001775,,,0.033173,0.002216,0.026505,,-0.002016,,0.001867,,,-1.286133,0.014160,0.068909,,,20.031250,0.100159,-1.023582,,1.041992,,0.162842,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-2.324219,-3.894531,-2.181641,-0.871094,-1.942383,-3.001953,-0.929199,-0.570312,-0.508301,0.215332,-0.332520,-0.619629,-1.195312,0.536621,-0.774902,0.797852,-0.623535,-1.514648,,,,,,,,,,,,,0.711914,2.287109,1.293945,1.583984,1.513672,0.104248,-1.117188,-0.688965,-0.896973,0.710938,-0.886230,-0.793945,-1.547852,-1.286133,-1.441406,-0.969238,-1.802734,0.909668,,,,,,,-0.562988,-0.014412,-0.170898,1.787109,-0.487793,-0.414062,,,,,,,0.775507,3.205078,1.277415,5.554688,1.368025,4.160156,,,,,,,,,,,,,-0.773667,-0.053340,-1.550377,-3.841115,-0.155529,-0.516972,-0.421319,0.589962,,,,,1.485487,2.339701,-0.343298,-0.635759,-0.774664,-1.233167,,,0.210526,0.038932,,,1.549174,3.258419,,,,,-0.425388,1.101220,-1.756459,-1.972388,-0.083140,-0.885888,-0.349605,-0.303369,,,,,1.719062,0.482864,-0.471470,-0.390406,-1.015638,-2.070456,,,0.254448,0.686177,,,1.702803,4.453467,,,,,-0.399232,-0.146278,-1.543607,-2.855433,0.066813,-0.473351,-0.224501,-1.368371,,,,,1.913083,0.250501,-0.487098,-0.647692,-1.403457,1.055759,,,-0.088491,-0.267847,,,1.767257,4.306434,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2003996,1635004620,0.004490,-0.002438,0.000604,-0.001764,-0.003277,0.002123,0.003429,0.002497,0.003248,-0.000154,0.002647,0.003078,-0.000059,0.002581,1.094727,0.097961,-0.300293,591.418579,3287.896240,-1.572266,5.027344,1.399485,37.312500,-3.355469,0.017715,0.059967,-346.152832,5289.912109,1.033203,0.941406,0.355469,0.974609,1.374023,0.694336,1.184570,0.766113,1.719727,0.454590,1.046875,0.607910,0.882812,1.423828,50.03125,49.40625,49.90625,49.875,50.03125,50.06250,50.125,50.09375,49.8750,50.03125,49.96875,49.84375,49.87500,49.71875,0.131592,0.309326,0.032837,0.130005,0.228027,-0.152344,0.056641,-0.519531,-0.060272,-1.182617,0.408936,0.496826,-0.542480,-0.487305,-1.034180,-0.738770,-0.660156,-0.027725,-0.019012,0.247559,-0.025635,-0.417969,0.239746,0.281250,0.422119,1.418945,0.317627,0.153320,0.384277,0.581543,-0.476562,-0.318359,-0.364502,0.305176,0.070068,-0.506348,-0.435303,1.681641,0.169067,0.397705,0.500488,-1.287109,-0.559570,0.260498,-0.266357,0.839355,0.024139,0.461182,0.734863,1.751953,0.541992,-0.331055,0.661133,-0.750000,-0.506836,-0.243164,-0.788574,0.574707,-0.330322,-1.027344,0.007469,1.059570,0.045966,-0.152100,0.397991,2.060547,-0.312255,0.031708,-0.497678,-0.950684,-0.257082,0.651855,-0.316650,0.684570,-0.124878,-0.517578,0.148438,-0.463867,0.615723,1.342773,0.353271,-0.359131,0.632324,1.111328,-0.085742,0.515693,0.217382,-0.206380,0.142377,-1.035224,-0.456515,-1.002998,0.066731,-0.268155,0.507831,0.903592,-0.390857,-0.834015,-0.349589,1.165816,-0.474044,-0.255259,0.820498,1.235934,-0.421058,-0.758873,0.093211,0.543877,-0.226513,-0.483985,-0.230942,0.168904,0.701487,0.826765,-0.121514,-0.160677,0.154359,0.290700,0.061234,-1.021940,-0.912795,-0.578093,0.095884,-0.257392,0.439124,0.314017,-0.243098,0.465861,0.290528,0.558469,-0.144752,1.000013,0.663690,-0.170391,-0.666893,0.735357,0.167480,0.008577,-0.376164,-0.790007,-0.003368,-0.356717,0.474771,-0.198456,0.174812,0.102076,0.053212,-0.254406,0.234227,0.394750,-0.835210,-0.129802,0.064915,0.179292,0.209413,0.479444,-0.104716,-0.608651,0.325464,-1.388903,-0.150668,0.359018,0.486160,-0.852314,-0.505124,-1.129628,0.223145,1.958470,-0.431894,0.549779,-0.026414,-0.565852,0.457456,1.008801
2003997,1635004680,0.004589,-0.002665,0.000402,-0.001911,-0.002472,-0.000771,0.002779,0.003443,0.002815,-0.000671,0.002897,0.003733,-0.000678,0.002525,0.418945,0.093445,-0.378906,438.684570,2384.702148,-30.765625,3.492188,-0.430889,8.265625,-4.867188,0.011330,-0.071472,-558.652893,2578.300049,0.421387,0.909668,0.251709,0.766602,0.672852,0.365479,0.777344,0.517578,0.175415,0.144409,0.583984,0.133179,0.400146,0.625488,49.71875,49.75000,49.90625,49.875,49.34375,49.96875,49.750,49.28125,49.9375,49.90625,49.50000,49.68750,49.68750,49.12500,-0.768555,-0.045715,-0.864258,-1.257812,-0.672363,-0.149414,0.008423,0.986328,-0.094910,-0.690430,0.370850,0.331787,-0.870117,-1.325195,-1.379883,-1.001953,-1.005859,-1.126953,-0.259521,-0.059814,-0.265381,-0.478027,-0.002037,0.385010,-0.297363,-0.223633,-0.395996,-1.621094,-0.336914,-0.999512,-1.044922,-2.652344,-1.006836,-0.239380,-0.572266,-0.106079,-0.863281,1.086914,-0.251709,-0.672363,0.076904,0.130371,-0.961914,-0.056030,-0.658203,-1.958008,-0.378418,0.254150,-1.540039,-1.265625,-1.740234,-0.909180,-1.622070,-1.111328,-1.636719,-1.927734,-1.935547,-1.432617,-1.477539,-1.295898,-0.570566,-0.970703,-0.537760,-1.787109,-0.196243,3.185547,-1.835282,-0.396973,-2.015630,-2.732422,-1.781444,0.253174,-1.112305,-0.047760,-0.916016,-1.468750,-0.645996,-0.906738,-0.219604,0.576172,-0.469727,-1.429688,-0.197632,0.708984,-0.855006,-0.451587,0.086480,0.405887,0.863426,1.437916,-0.014896,-0.873608,0.595378,0.391770,0.557641,0.227989,-0.189712,-2.200254,-0.008235,1.538228,-0.106848,0.395545,-0.684624,-0.814332,-0.781282,-1.476118,0.284440,-0.519116,-0.980276,0.054615,-0.256902,0.403830,0.635410,1.027649,-0.895103,-1.262675,0.030951,0.005126,0.800211,0.572246,-0.484536,0.260722,0.629629,0.784707,0.499224,-0.358079,-0.111734,1.023339,0.643359,0.590264,0.236844,-0.694845,-0.845300,0.353329,-1.040138,-0.169819,0.357422,-0.524434,-1.120526,-1.469747,-0.020822,-0.205887,0.425495,-0.166922,-0.603025,-0.031961,-0.069386,-0.117421,0.973989,0.363749,-0.403301,-1.094992,0.600989,0.417007,0.266060,-0.967450,0.030636,-0.074098,0.679915,0.162279,0.224564,0.286025,-1.019002,-1.079647,-0.874982,-1.264278,0.406738,3.217508,-1.178419,0.285135,-0.043210,-0.874567,0.405366,0.740750
2003998,1635004740,0.003696,-0.001935,0.000716,-0.000983,-0.001260,-0.000049,0.000637,0.002508,0.001601,-0.000375,0.002010,0.002522,-0.000515,0.002510,0.424805,0.029968,-0.354004,279.279358,1466.282959,30.515625,2.115234,-2.290436,-7.800781,-3.585938,0.013664,-0.111572,-243.123032,3358.561035,0.898438,0.436523,0.525391,0.642578,0.592773,2.052734,0.676270,0.391357,0.419922,1.057617,1.634766,0.487305,1.432617,1.189453,49.90625,49.87500,49.71875,50.000,49.81250,50.37500,50.000,49.84375,49.4375,49.68750,49.68750,49.75000,49.65625,49.75000,-0.014267,0.120361,-0.106873,-0.332764,0.083435,0.227905,-0.684082,-1.828125,-0.829102,-0.710449,-0.364258,-0.433838,-0.086548,-1.487305,-0.643066,-1.210938,-0.272949,-0.818848,-0.396973,-1.520508,-0.442139,0.018265,-0.176025,-1.358398,-0.417480,-0.616699,-0.522461,-0.841309,-0.465576,-0.798828,0.688477,-0.069946,0.718750,1.327148,1.157227,0.413818,-0.998535,-0.454590,-0.390869,0.058014,-0.058411,-1.515625,-1.228516,-1.456055,-0.937988,-1.065430,-0.660156,-0.486572,-0.655273,-1.236328,-0.868164,-2.005859,-0.758789,0.063660,0.355713,-0.202393,0.056366,-0.875977,0.507812,0.602539,0.435617,1.065430,0.491331,-0.733398,0.825009,1.483398,-0.540659,-0.283691,-0.718696,-1.478516,-0.489825,-0.228882,0.157837,0.031982,0.359619,-0.735840,0.623047,0.711426,0.421875,-0.021805,0.173096,-0.398926,0.441650,1.675781,-0.211643,-0.568598,0.197373,0.688944,-0.472552,-1.259527,0.125105,-0.918707,-0.185340,-0.951856,-0.205802,-0.048223,0.899905,0.498644,-0.787082,0.114129,-1.016521,-0.887917,-0.443570,-0.668010,0.567338,0.366212,0.647260,1.634027,-0.329016,0.284906,0.369494,0.600579,0.633410,0.546798,-0.261447,-0.641927,0.154598,0.309042,-0.567562,-0.068522,-0.381736,-0.569010,-0.180692,0.660199,-0.261229,-0.199426,0.980150,1.969330,-0.129434,0.699930,-0.676576,-0.423963,-0.606505,-1.364408,0.317798,-0.234285,0.752930,-0.091471,-0.457249,-0.836588,0.621075,-0.093853,0.434584,0.243026,0.028022,-0.033038,0.055441,0.260966,-0.392261,-0.400800,-0.300944,-0.785810,-0.203995,-1.325451,-0.493536,-0.765929,1.128969,0.446938,-0.086424,-1.482910,-0.688368,-0.453511,-0.786596,0.096694,0.479796,0.635456,0.796875,1.516436,-0.517847,-0.195844,0.595061,0.744506,0.413717,1.709260


In [74]:
# Specify the column of features that need to be used for training
no_use_columns = [f'Target_{i}' for i in range(14)]
no_use_columns.append('timestamp')
no_use_columns.extend(FEATURES_EXCLUDE)
features = test_df.columns
features = features.drop(no_use_columns)
features = list(features)

In [75]:
features

['MACD_id0',
 'MACD_id1',
 'MACD_id2',
 'MACD_id3',
 'MACD_id4',
 'MACD_id5',
 'MACD_id6',
 'MACD_id7',
 'MACD_id8',
 'MACD_id9',
 'MACD_id10',
 'MACD_id11',
 'MACD_id12',
 'MACD_id13',
 'Volume_Ratio_id0',
 'Volume_Ratio_id1',
 'Volume_Ratio_id2',
 'Volume_Ratio_id3',
 'Volume_Ratio_id4',
 'Volume_Ratio_id5',
 'Volume_Ratio_id6',
 'Volume_Ratio_id7',
 'Volume_Ratio_id8',
 'Volume_Ratio_id9',
 'Volume_Ratio_id10',
 'Volume_Ratio_id11',
 'Volume_Ratio_id12',
 'Volume_Ratio_id13',
 'RSI_id0',
 'RSI_id1',
 'RSI_id2',
 'RSI_id3',
 'RSI_id4',
 'RSI_id5',
 'RSI_id6',
 'RSI_id7',
 'RSI_id8',
 'RSI_id9',
 'RSI_id10',
 'RSI_id11',
 'RSI_id12',
 'RSI_id13',
 'log_close/mean_60_id0',
 'log_return_60_id0',
 'log_close/mean_300_id0',
 'log_return_300_id0',
 'log_close/mean_900_id0',
 'log_return_900_id0',
 'log_close/mean_60_id1',
 'log_return_60_id1',
 'log_close/mean_300_id1',
 'log_return_300_id1',
 'log_close/mean_900_id1',
 'log_return_900_id1',
 'log_close/mean_60_id2',
 'log_return_60_id2',


In [76]:
current_time = datetime.now()
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
# CreateFolderForEachTraining(creator)
df_asset_details = pd.read_csv(ASSET_DETAILS_CSV)

# Start training
for asset_id in ASSET_ID_SELECTED_FOR_TRAIN:
    asset_name = df_asset_details.loc[df_asset_details['Asset_ID'] == asset_id, 'Asset_Name'].values[0]
    print(f"Training model for {asset_name} (ID={asset_id:<2})")
    folder_name = formatted_time
    test_df = test_df.loc[(test_df[f'Target_{asset_id}'] == test_df[f'Target_{asset_id}'])]
    # The Bayesian optimiser is applied here
    trial_num = 1
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=N_TRIALS)

    # Print the result of optimization
    print('Number of finished trials: ', len(study.trials))
    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)
    print('Params: ')
    for key, value in trial.params.items():
        print(f'    {key}: {value}')

Training model for TRON (ID=13)


[I 2023-12-05 21:45:18,413] A new study created in memory with name: no-name-5e871e03-1da2-48d5-aa2c-6bfebb08987b
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.038309233354679714
validation_set_score: 0.019314335945582073


[I 2023-12-05 21:47:46,752] Trial 0 finished with value: 0.019314335945582073 and parameters: {'learning_rate': 0.17103342039433958, 'max_depth': 16, 'num_leaves': 73, 'lambda_l1': 3.5414462176375663, 'lambda_l2': 2.2156494174066674, 'max_bin': 922, 'min_data_in_leaf': 66}. Best is trial 0 with value: 0.019314335945582073.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.04665526283827228
validation_set_score: 0.021453635491829562


[I 2023-12-05 21:49:47,010] Trial 1 finished with value: 0.021453635491829562 and parameters: {'learning_rate': 0.17147517451217056, 'max_depth': 16, 'num_leaves': 85, 'lambda_l1': 3.2867247313606596, 'lambda_l2': 2.479134155334773, 'max_bin': 1080, 'min_data_in_leaf': 80}. Best is trial 1 with value: 0.021453635491829562.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.04375855131325042
validation_set_score: 0.02182920394652923


[I 2023-12-05 21:51:31,061] Trial 2 finished with value: 0.02182920394652923 and parameters: {'learning_rate': 0.18775525060281287, 'max_depth': 16, 'num_leaves': 72, 'lambda_l1': 3.595672965324523, 'lambda_l2': 2.6280997468379885, 'max_bin': 975, 'min_data_in_leaf': 59}. Best is trial 2 with value: 0.02182920394652923.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.04320966811757967
validation_set_score: 0.020376455539917147


[I 2023-12-05 21:53:10,059] Trial 3 finished with value: 0.020376455539917147 and parameters: {'learning_rate': 0.17835832755500075, 'max_depth': 16, 'num_leaves': 76, 'lambda_l1': 3.4148987422722645, 'lambda_l2': 2.767095059777164, 'max_bin': 1078, 'min_data_in_leaf': 59}. Best is trial 2 with value: 0.02182920394652923.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.03972018509311678
validation_set_score: 0.020533713129957255


[I 2023-12-05 21:54:56,865] Trial 4 finished with value: 0.020533713129957255 and parameters: {'learning_rate': 0.19207367836712985, 'max_depth': 15, 'num_leaves': 60, 'lambda_l1': 3.6916167210647672, 'lambda_l2': 2.421891547613181, 'max_bin': 1021, 'min_data_in_leaf': 73}. Best is trial 2 with value: 0.02182920394652923.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.04343960808440715
validation_set_score: 0.02101909956096167


[I 2023-12-05 21:56:38,770] Trial 5 finished with value: 0.02101909956096167 and parameters: {'learning_rate': 0.19389570284294644, 'max_depth': 18, 'num_leaves': 84, 'lambda_l1': 3.583688824096252, 'lambda_l2': 2.5622336246471287, 'max_bin': 942, 'min_data_in_leaf': 77}. Best is trial 2 with value: 0.02182920394652923.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.033585876589426536
validation_set_score: 0.020431492410442936


[I 2023-12-05 21:58:26,815] Trial 6 finished with value: 0.020431492410442936 and parameters: {'learning_rate': 0.1705694482841451, 'max_depth': 14, 'num_leaves': 84, 'lambda_l1': 3.7596953307221748, 'lambda_l2': 2.6673484606744955, 'max_bin': 957, 'min_data_in_leaf': 75}. Best is trial 2 with value: 0.02182920394652923.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.07170671305642118
validation_set_score: 0.020091966521703863


[I 2023-12-05 22:01:13,020] Trial 7 finished with value: 0.020091966521703863 and parameters: {'learning_rate': 0.152366395949454, 'max_depth': 17, 'num_leaves': 76, 'lambda_l1': 3.22968915537249, 'lambda_l2': 2.5047901896334626, 'max_bin': 1094, 'min_data_in_leaf': 79}. Best is trial 2 with value: 0.02182920394652923.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.04594944291478268
validation_set_score: 0.02021948698288218


[I 2023-12-05 22:03:05,318] Trial 8 finished with value: 0.02021948698288218 and parameters: {'learning_rate': 0.16447226805141438, 'max_depth': 15, 'num_leaves': 77, 'lambda_l1': 3.273927589671661, 'lambda_l2': 2.2425971527355486, 'max_bin': 991, 'min_data_in_leaf': 62}. Best is trial 2 with value: 0.02182920394652923.
  local_params['learning_rate'] = trial.suggest_loguniform('learning_rate', *param_ranges['learning_rate'])
  local_params['lambda_l1'] = trial.suggest_uniform('lambda_l1', *param_ranges['lambda_l1'])
  local_params['lambda_l2'] = trial.suggest_uniform('lambda_l2', *param_ranges['lambda_l2'])


current_training_id:13
current_Bayesian_trial:1 out of 10
number of train data: 1520409
number of val data:   380102
training_set_score: 0.04459255599912468
validation_set_score: 0.02130638994225777


[I 2023-12-05 22:04:42,220] Trial 9 finished with value: 0.02130638994225777 and parameters: {'learning_rate': 0.1923161674780338, 'max_depth': 18, 'num_leaves': 77, 'lambda_l1': 3.3213622863277736, 'lambda_l2': 2.512969583894226, 'max_bin': 1081, 'min_data_in_leaf': 65}. Best is trial 2 with value: 0.02182920394652923.


Number of finished trials:  10
Best trial:
Value:  0.02182920394652923
Params: 
    learning_rate: 0.18775525060281287
    max_depth: 16
    num_leaves: 72
    lambda_l1: 3.595672965324523
    lambda_l2: 2.6280997468379885
    max_bin: 975
    min_data_in_leaf: 59
