# task type, support train and predict task = train # boosting type, support gbdt for now, alias: boosting, boost boosting_type = gbdt # application type, support following application # regression , regression task # binary , binary classification task # lambdarank , lambdarank task # alias: application, app objective = binary # eval metrics, support multi metric, delimite by ',' , support following metrics # l1 # l2 , default metric for regression # ndcg , default metric for lambdarank # auc # binary_logloss , default metric for binary # binary_error metric = auc # frequence for metric output metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data # if exsting weight file, should name to "binary.train.weight" # alias: train_data, train data = train.txt # validation data, support multi validation data, separated by ',' # if exsting weight file, should name to "binary.test.weight" # alias: valid, test, test_data, valid_data = test.txt # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds num_trees = 150 # shrinkage rate , alias: shrinkage_rate learning_rate = 0.1 # number of leaves for one tree, alias: num_leaf num_leaves = 10 # type of tree learner, support following types: # serial , single machine version # feature , use feature parallel to train # data , use data parallel to train # voting , use voting based parallel to train # alias: tree tree_learner = serial # number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu. # num_threads = 8 # feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 0.8 # Support bagging (data sub-sample), will perform bagging every 5 iterations bagging_freq = 0.8 # Bagging farction, will random select 80% data on bagging # alias: sub_row bagging_fraction = 0.8 # minimal number data for one leaf, use this to deal with over-fit # alias : min_data_per_leaf, min_data min_data_in_leaf = 10 # minimal sum hessians for one leaf, use this to deal with over-fit min_sum_hessian_in_leaf = 5 # save memory and faster speed for sparse feature, alias: is_sparse is_enable_sparse = true # when data is bigger than memory size, set this to true. otherwise set false will have faster speed # alias: two_round_loading, two_round use_two_round_loading = false # true if need to save data to binary file and application will auto load data from binary file next time # alias: is_save_binary, save_binary is_save_binary_file = false # output model file output_model = LightGBM_model.txt # support continuous train from trained gbdt model # input_model= trained_model.txt # output prediction file for predict task # output_result= prediction.txt # support continuous train from initial score file #input_init_score= LightGBM_predict_result.txt # number of machines in parallel training, alias: num_machine num_machines = 1 # local listening port in parallel training, alias: local_port local_listen_port = 12400 # machines list file for parallel training, alias: mlist machine_list_file = mlist.txt