In [2]:
import numpy as np
import pandas as pd
# library for splitting training-testing
from sklearn.model_selection import train_test_split
# library for classification
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from sklearn import pipeline
from lightgbm import LGBMClassifier

from hyperopt import tpe,hp,Trials
from hyperopt.fmin import fmin
import warnings

In [11]:
x_train = pd.read_feather("Data/x_train__IterativeImputer_genFeats.ftr")
x_test = pd.read_feather("Data/x_test__IterativeImputer_genFeats.ftr")

x_tr = pd.read_csv("Preds/preds_train.csv")
x_val_1 = pd.read_csv("Preds/preds_val.csv")
x_test_1 = pd.read_csv("Preds/preds_test.csv")

y_train = pd.read_csv("Data/train_y.csv")
y_test = pd.read_csv("Data/Sample_Output.csv", names = ['ID', 'Target'], header=None)

In [12]:
x_train_1, x_val, y_train_1, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=59, stratify=y_train)

In [13]:
x_train_1 = pd.concat([x_train_1.reset_index(drop=True), x_tr], axis=1)
x_val = pd.concat([x_val.reset_index(drop=True), x_val_1], axis=1)
x_test = pd.concat([x_test.reset_index(drop=True), x_test_1], axis=1)

In [17]:
#check if given parameter can be interpreted as a numerical value
def is_number(s):
    if s is None:
        return False
    try:
        float(s)
        return True
    except ValueError:
        return False

def convert_int_params(names, params):
    for int_type in names:
        #sometimes the parameters can be choices between options or numerical values. like "log2" vs "1-10"
        raw_val = params[int_type]
        if is_number(raw_val):
            params[int_type] = int(raw_val)
    return params

def create_fit_params(params):
    using_dart = params['boosting_type'] == "dart"
    
    fit_params = {"eval_metric": "multi_logloss"}
    if using_dart:
        n_estimators = 75
    else:
        n_estimators = 200
        fit_params["early_stopping_rounds"] = 10
    params["n_estimators"] = n_estimators
    return fit_params

def objective(params):
    warnings.filterwarnings(action='ignore', category=DeprecationWarning)
    
    int_types = ["num_leaves", "min_child_samples", "subsample_for_bin", "min_data_in_leaf"]
    params = convert_int_params(int_types, params)

    # Extract the boosting type
    params['boosting_type'] = params['boosting_type']['boosting_type']
    #    print("running with params:"+str(params))

    fit_params = create_fit_params(params)
    
    model = LGBMClassifier(**params)
    
    model.fit(x_tr, 
              y_train_1.values.reshape(-1), 
              eval_set=[(x_tr, y_train_1.values.reshape(-1)), (x_val_1, y_val.values.reshape(-1))], 
              verbose=1, **fit_params)
    
    preds_tr = model.predict(x_tr,)
    preds_val = model.predict(x_val_1)
    val_score = accuracy_score(y_val, preds_val)
    tr_score = accuracy_score(y_train_1, preds_tr)
    print(tr_score, val_score, params)
    return -1.0*val_score

def optimize(trial):
    space = {
        #this is just piling on most of the possible parameter values for LGBM
        #some of them apparently don't make sense together, but works for now.. :)
        'boosting_type': hp.choice('boosting_type',
                                   [{'boosting_type': 'gbdt',
#                                     'subsample': hp.uniform('dart_subsample', 0.5, 1)
                                     },
                                    {'boosting_type': 'dart',
#                                     'subsample': hp.uniform('dart_subsample', 0.5, 1)
                                     },
                                    {'boosting_type': 'goss'}]),
        'num_leaves': hp.quniform('num_leaves', 30, 150, 1),
        'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
        'subsample_for_bin': hp.quniform('subsample_for_bin', 20000, 300000, 20000),
        'feature_fraction': hp.uniform('feature_fraction', 0.5, 1),
        'bagging_fraction': hp.uniform('bagging_fraction', 0.5, 1), #alias "subsample"
        'min_data_in_leaf': hp.qloguniform('min_data_in_leaf', 4, 15, 1),
        'lambda_l1': hp.choice('lambda_l1', [0, hp.loguniform('lambda_l1_positive', -16, 2)]),
        'lambda_l2': hp.choice('lambda_l2', [0, hp.loguniform('lambda_l2_positive', -16, 2)]),
        'verbose': -1,
        #the LGBM parameters docs list various aliases, and the LGBM implementation seems to complain about
        #the following not being used due to other params, so trying to silence the complaints by setting to None
        'subsample': None, #overridden by bagging_fraction
        'reg_alpha': None, #overridden by lambda_l1
        'reg_lambda': None, #overridden by lambda_l2
        'min_sum_hessian_in_leaf': None, #overrides min_child_weight
        'min_child_samples': None, #overridden by min_data_in_leaf
        'colsample_bytree': None, #overridden by feature_fraction
#        'min_child_samples': hp.quniform('min_child_samples', 20, 500, 5),
        'min_child_weight': hp.loguniform('min_child_weight', -16, 5), #also aliases to min_sum_hessian
#        'reg_alpha': hp.uniform('reg_alpha', 0.0, 1.0),
#        'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
#        'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
        'objective' : "multiclass",
        "num_class" : 4,
        'num_threads' : 8
    }
    best=fmin(fn=objective,space=space,algo=tpe.suggest,trials=trial,max_evals=20,rstate=np.random.default_rng(59))
    return best

trial=Trials()
best=optimize(trial)

  0%|                                                                     | 0/20 [00:00<?, ?trial/s, best loss=?]





[1]	valid_0's multi_logloss: 0.854213	valid_1's multi_logloss: 0.854375                                          
[2]	valid_0's multi_logloss: 0.854057	valid_1's multi_logloss: 0.854387                                          
[3]	valid_0's multi_logloss: 0.853908	valid_1's multi_logloss: 0.854395                                          
[4]	valid_0's multi_logloss: 0.853762	valid_1's multi_logloss: 0.854402                                          
[5]	valid_0's multi_logloss: 0.853611	valid_1's multi_logloss: 0.854405                                          
[6]	valid_0's multi_logloss: 0.85346	valid_1's multi_logloss: 0.854414                                           
[7]	valid_0's multi_logloss: 0.85331	valid_1's multi_logloss: 0.854414                                           
[8]	valid_0's multi_logloss: 0.853164	valid_1's multi_logloss: 0.854424                                          
[9]	valid_0's multi_logloss: 0.853018	valid_1's multi_logloss: 0.854422                 





[1]	valid_0's multi_logloss: 0.854363	valid_1's multi_logloss: 0.854355                                          
[2]	valid_0's multi_logloss: 0.85436	valid_1's multi_logloss: 0.854353                                           
[3]	valid_0's multi_logloss: 0.854357	valid_1's multi_logloss: 0.854351                                          
[4]	valid_0's multi_logloss: 0.854354	valid_1's multi_logloss: 0.854352                                          
[5]	valid_0's multi_logloss: 0.854352	valid_1's multi_logloss: 0.854352                                          
[6]	valid_0's multi_logloss: 0.85435	valid_1's multi_logloss: 0.854351                                           
[7]	valid_0's multi_logloss: 0.854348	valid_1's multi_logloss: 0.854352                                          
[8]	valid_0's multi_logloss: 0.854345	valid_1's multi_logloss: 0.854352                                          
[9]	valid_0's multi_logloss: 0.854343	valid_1's multi_logloss: 0.854352                 




[1]	valid_0's multi_logloss: 0.854322	valid_1's multi_logloss: 0.854358                                          
[2]	valid_0's multi_logloss: 0.85428	valid_1's multi_logloss: 0.854359                                           
[3]	valid_0's multi_logloss: 0.854238	valid_1's multi_logloss: 0.854361                                          
[4]	valid_0's multi_logloss: 0.854198	valid_1's multi_logloss: 0.854362                                          
[5]	valid_0's multi_logloss: 0.854158	valid_1's multi_logloss: 0.854361                                          
[6]	valid_0's multi_logloss: 0.854118	valid_1's multi_logloss: 0.85437                                           
[7]	valid_0's multi_logloss: 0.854079	valid_1's multi_logloss: 0.854373                                          
[8]	valid_0's multi_logloss: 0.854093	valid_1's multi_logloss: 0.854371                                          
[9]	valid_0's multi_logloss: 0.854054	valid_1's multi_logloss: 0.854372                 

[72]	valid_0's multi_logloss: 0.885531	valid_1's multi_logloss: 0.886868                                         
[73]	valid_0's multi_logloss: 0.883571	valid_1's multi_logloss: 0.88493                                          
[74]	valid_0's multi_logloss: 0.883719	valid_1's multi_logloss: 0.885063                                         
[75]	valid_0's multi_logloss: 0.881863	valid_1's multi_logloss: 0.883227                                         
0.7410658435791999                                                                                               
0.7410694829815361                                                                                               
{'bagging_fraction': 0.9037778055261227, 'boosting_type': 'dart', 'colsample_bytree': None, 'feature_fraction': 0.9533634871827803, 'lambda_l1': 0.05880970092021131, 'lambda_l2': 0, 'learning_rate': 0.02508956890463392, 'min_child_samples': None, 'min_child_weight': 4.4122243902352913e-07, 'min_data_in_leaf': 10055





[1]	valid_0's multi_logloss: 0.854353	valid_1's multi_logloss: 0.854356                                          
[2]	valid_0's multi_logloss: 0.85434	valid_1's multi_logloss: 0.854358                                           
[3]	valid_0's multi_logloss: 0.854327	valid_1's multi_logloss: 0.854356                                          
[4]	valid_0's multi_logloss: 0.854315	valid_1's multi_logloss: 0.854355                                          
[5]	valid_0's multi_logloss: 0.854303	valid_1's multi_logloss: 0.854355                                          
[6]	valid_0's multi_logloss: 0.854291	valid_1's multi_logloss: 0.854355                                          
[7]	valid_0's multi_logloss: 0.85428	valid_1's multi_logloss: 0.854355                                           
[8]	valid_0's multi_logloss: 0.854268	valid_1's multi_logloss: 0.854357                                          
[9]	valid_0's multi_logloss: 0.854257	valid_1's multi_logloss: 0.854356                 





[1]	valid_0's multi_logloss: 0.854295	valid_1's multi_logloss: 0.854351                                          
[2]	valid_0's multi_logloss: 0.854222	valid_1's multi_logloss: 0.854353                                          
[3]	valid_0's multi_logloss: 0.854153	valid_1's multi_logloss: 0.854354                                          
[4]	valid_0's multi_logloss: 0.854081	valid_1's multi_logloss: 0.854351                                          
[5]	valid_0's multi_logloss: 0.854011	valid_1's multi_logloss: 0.854352                                          
[6]	valid_0's multi_logloss: 0.853944	valid_1's multi_logloss: 0.854359                                          
[7]	valid_0's multi_logloss: 0.853877	valid_1's multi_logloss: 0.854353                                          
[8]	valid_0's multi_logloss: 0.853809	valid_1's multi_logloss: 0.854351                                          
[9]	valid_0's multi_logloss: 0.853745	valid_1's multi_logloss: 0.854353                 





[1]	valid_0's multi_logloss: 0.85413	valid_1's multi_logloss: 0.854389                                           
[2]	valid_0's multi_logloss: 0.853892	valid_1's multi_logloss: 0.854416                                          
[3]	valid_0's multi_logloss: 0.853688	valid_1's multi_logloss: 0.854434                                          
[4]	valid_0's multi_logloss: 0.853486	valid_1's multi_logloss: 0.854472                                          
[5]	valid_0's multi_logloss: 0.853303	valid_1's multi_logloss: 0.854509                                          
[6]	valid_0's multi_logloss: 0.853103	valid_1's multi_logloss: 0.854527                                          
[7]	valid_0's multi_logloss: 0.853067	valid_1's multi_logloss: 0.854555                                          
[8]	valid_0's multi_logloss: 0.853019	valid_1's multi_logloss: 0.854606                                          
[9]	valid_0's multi_logloss: 0.85298	valid_1's multi_logloss: 0.854631                  




[1]	valid_0's multi_logloss: 0.854214	valid_1's multi_logloss: 0.85436                                           
[2]	valid_0's multi_logloss: 0.85407	valid_1's multi_logloss: 0.854356                                           
[3]	valid_0's multi_logloss: 0.853921	valid_1's multi_logloss: 0.854357                                          
[4]	valid_0's multi_logloss: 0.853771	valid_1's multi_logloss: 0.854362                                          
[5]	valid_0's multi_logloss: 0.853627	valid_1's multi_logloss: 0.854364                                          
[6]	valid_0's multi_logloss: 0.853483	valid_1's multi_logloss: 0.854369                                          
[7]	valid_0's multi_logloss: 0.853339	valid_1's multi_logloss: 0.854369                                          
[8]	valid_0's multi_logloss: 0.853385	valid_1's multi_logloss: 0.854368                                          
[9]	valid_0's multi_logloss: 0.853253	valid_1's multi_logloss: 0.854374                 

[72]	valid_0's multi_logloss: 0.87357	valid_1's multi_logloss: 0.878489                                          
[73]	valid_0's multi_logloss: 0.871388	valid_1's multi_logloss: 0.876403                                         
[74]	valid_0's multi_logloss: 0.871592	valid_1's multi_logloss: 0.876551                                         
[75]	valid_0's multi_logloss: 0.869573	valid_1's multi_logloss: 0.874626                                         
0.7410658435791999                                                                                               
0.7410694829815361                                                                                               
{'bagging_fraction': 0.9188762985491308, 'boosting_type': 'dart', 'colsample_bytree': None, 'feature_fraction': 0.6463599607934534, 'lambda_l1': 0.02224525554914679, 'lambda_l2': 0, 'learning_rate': 0.036691935902934486, 'min_child_samples': None, 'min_child_weight': 7.861358931762953e-06, 'min_data_in_leaf': 3818,





[1]	valid_0's multi_logloss: 0.854274	valid_1's multi_logloss: 0.854359                                          
[2]	valid_0's multi_logloss: 0.854179	valid_1's multi_logloss: 0.85436                                           
[3]	valid_0's multi_logloss: 0.854089	valid_1's multi_logloss: 0.854365                                          
[4]	valid_0's multi_logloss: 0.853998	valid_1's multi_logloss: 0.854369                                          
[5]	valid_0's multi_logloss: 0.853905	valid_1's multi_logloss: 0.85437                                           
[6]	valid_0's multi_logloss: 0.853816	valid_1's multi_logloss: 0.854371                                          
[7]	valid_0's multi_logloss: 0.853728	valid_1's multi_logloss: 0.854371                                          
[8]	valid_0's multi_logloss: 0.853641	valid_1's multi_logloss: 0.854375                                          
[9]	valid_0's multi_logloss: 0.853552	valid_1's multi_logloss: 0.85438                  




[1]	valid_0's multi_logloss: 0.85281	valid_1's multi_logloss: 0.854483                                           
[2]	valid_0's multi_logloss: 0.851308	valid_1's multi_logloss: 0.854744                                          
[3]	valid_0's multi_logloss: 0.84989	valid_1's multi_logloss: 0.854838                                           
[4]	valid_0's multi_logloss: 0.848615	valid_1's multi_logloss: 0.854892                                          
[5]	valid_0's multi_logloss: 0.847248	valid_1's multi_logloss: 0.855044                                          
[6]	valid_0's multi_logloss: 0.845898	valid_1's multi_logloss: 0.855104                                          
[7]	valid_0's multi_logloss: 0.844587	valid_1's multi_logloss: 0.855222                                          
[8]	valid_0's multi_logloss: 0.845013	valid_1's multi_logloss: 0.855062                                          
[9]	valid_0's multi_logloss: 0.843719	valid_1's multi_logloss: 0.855171                 

[72]	valid_0's multi_logloss: 0.81123	valid_1's multi_logloss: 0.857714                                          
[73]	valid_0's multi_logloss: 0.810019	valid_1's multi_logloss: 0.857562                                         
[74]	valid_0's multi_logloss: 0.810518	valid_1's multi_logloss: 0.857534                                         
[75]	valid_0's multi_logloss: 0.809297	valid_1's multi_logloss: 0.857515                                         
0.7410658435791999                                                                                               
0.7410694829815361                                                                                               
{'bagging_fraction': 0.8005568409731003, 'boosting_type': 'dart', 'colsample_bytree': None, 'feature_fraction': 0.6236538685126177, 'lambda_l1': 0, 'lambda_l2': 0, 'learning_rate': 0.16971755734559354, 'min_child_samples': None, 'min_child_weight': 0.001281604100468014, 'min_data_in_leaf': 147, 'min_sum_hessian_in_





[1]	valid_0's multi_logloss: 0.854148	valid_1's multi_logloss: 0.854373                                          
[2]	valid_0's multi_logloss: 0.853939	valid_1's multi_logloss: 0.854395                                          
[3]	valid_0's multi_logloss: 0.853732	valid_1's multi_logloss: 0.854434                                          
[4]	valid_0's multi_logloss: 0.85352	valid_1's multi_logloss: 0.854441                                           
[5]	valid_0's multi_logloss: 0.853316	valid_1's multi_logloss: 0.85445                                           
[6]	valid_0's multi_logloss: 0.853119	valid_1's multi_logloss: 0.854448                                          
[7]	valid_0's multi_logloss: 0.852917	valid_1's multi_logloss: 0.854464                                          
[8]	valid_0's multi_logloss: 0.852728	valid_1's multi_logloss: 0.854463                                          
[9]	valid_0's multi_logloss: 0.852531	valid_1's multi_logloss: 0.854462                 





[1]	valid_0's multi_logloss: 0.854274	valid_1's multi_logloss: 0.854352                                          
[2]	valid_0's multi_logloss: 0.854187	valid_1's multi_logloss: 0.854348                                          
[3]	valid_0's multi_logloss: 0.854101	valid_1's multi_logloss: 0.854346                                          
[4]	valid_0's multi_logloss: 0.854016	valid_1's multi_logloss: 0.854344                                          
[5]	valid_0's multi_logloss: 0.853932	valid_1's multi_logloss: 0.854344                                          
[6]	valid_0's multi_logloss: 0.853849	valid_1's multi_logloss: 0.854351                                          
[7]	valid_0's multi_logloss: 0.853765	valid_1's multi_logloss: 0.854353                                          
[8]	valid_0's multi_logloss: 0.853681	valid_1's multi_logloss: 0.854343                                          
[9]	valid_0's multi_logloss: 0.853599	valid_1's multi_logloss: 0.854346                 





[1]	valid_0's multi_logloss: 0.853881	valid_1's multi_logloss: 0.854406                                          
[2]	valid_0's multi_logloss: 0.853396	valid_1's multi_logloss: 0.854468                                          
[3]	valid_0's multi_logloss: 0.852921	valid_1's multi_logloss: 0.8545                                            
[4]	valid_0's multi_logloss: 0.852482	valid_1's multi_logloss: 0.854541                                          
[5]	valid_0's multi_logloss: 0.852045	valid_1's multi_logloss: 0.854558                                          
[6]	valid_0's multi_logloss: 0.851607	valid_1's multi_logloss: 0.854644                                          
[7]	valid_0's multi_logloss: 0.851213	valid_1's multi_logloss: 0.854638                                          
[8]	valid_0's multi_logloss: 0.850787	valid_1's multi_logloss: 0.854634                                          
[9]	valid_0's multi_logloss: 0.850486	valid_1's multi_logloss: 0.854685                 





[1]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[2]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[3]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[4]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[5]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[6]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[7]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[8]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[9]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                    




[1]	valid_0's multi_logloss: 0.853515	valid_1's multi_logloss: 0.854443                                          
[2]	valid_0's multi_logloss: 0.852682	valid_1's multi_logloss: 0.854526                                          
[3]	valid_0's multi_logloss: 0.851915	valid_1's multi_logloss: 0.854645                                          
[4]	valid_0's multi_logloss: 0.85123	valid_1's multi_logloss: 0.854729                                           
[5]	valid_0's multi_logloss: 0.850484	valid_1's multi_logloss: 0.854751                                          
[6]	valid_0's multi_logloss: 0.849795	valid_1's multi_logloss: 0.854792                                          
[7]	valid_0's multi_logloss: 0.849082	valid_1's multi_logloss: 0.854797                                          
[8]	valid_0's multi_logloss: 0.849315	valid_1's multi_logloss: 0.85473                                           
[9]	valid_0's multi_logloss: 0.848654	valid_1's multi_logloss: 0.854801                 

[72]	valid_0's multi_logloss: 0.830725	valid_1's multi_logloss: 0.856274                                         
[73]	valid_0's multi_logloss: 0.830027	valid_1's multi_logloss: 0.856208                                         
[74]	valid_0's multi_logloss: 0.830302	valid_1's multi_logloss: 0.856189                                         
[75]	valid_0's multi_logloss: 0.829646	valid_1's multi_logloss: 0.856108                                         
0.7410658435791999                                                                                               
0.7410694829815361                                                                                               
{'bagging_fraction': 0.5400789012709521, 'boosting_type': 'dart', 'colsample_bytree': None, 'feature_fraction': 0.5401176170184878, 'lambda_l1': 0, 'lambda_l2': 0, 'learning_rate': 0.1934241860331929, 'min_child_samples': None, 'min_child_weight': 0.05291269202396168, 'min_data_in_leaf': 318, 'min_sum_hessian_in_le





[1]	valid_0's multi_logloss: 0.854156	valid_1's multi_logloss: 0.854353                                          
[2]	valid_0's multi_logloss: 0.853958	valid_1's multi_logloss: 0.854353                                          
[3]	valid_0's multi_logloss: 0.853755	valid_1's multi_logloss: 0.854351                                          
[4]	valid_0's multi_logloss: 0.853557	valid_1's multi_logloss: 0.854359                                          
[5]	valid_0's multi_logloss: 0.853371	valid_1's multi_logloss: 0.854359                                          
[6]	valid_0's multi_logloss: 0.853183	valid_1's multi_logloss: 0.854377                                          
[7]	valid_0's multi_logloss: 0.853005	valid_1's multi_logloss: 0.854395                                          
[8]	valid_0's multi_logloss: 0.852822	valid_1's multi_logloss: 0.854395                                          
[9]	valid_0's multi_logloss: 0.852646	valid_1's multi_logloss: 0.854419                 





[1]	valid_0's multi_logloss: 0.854229	valid_1's multi_logloss: 0.85436                                           
[2]	valid_0's multi_logloss: 0.854083	valid_1's multi_logloss: 0.854372                                          
[3]	valid_0's multi_logloss: 0.853947	valid_1's multi_logloss: 0.85438                                           
[4]	valid_0's multi_logloss: 0.85381	valid_1's multi_logloss: 0.854369                                           
[5]	valid_0's multi_logloss: 0.853673	valid_1's multi_logloss: 0.854369                                          
[6]	valid_0's multi_logloss: 0.853539	valid_1's multi_logloss: 0.854379                                          
[7]	valid_0's multi_logloss: 0.853415	valid_1's multi_logloss: 0.854381                                          
[8]	valid_0's multi_logloss: 0.853285	valid_1's multi_logloss: 0.854382                                          
[9]	valid_0's multi_logloss: 0.853162	valid_1's multi_logloss: 0.854381                 




[1]	valid_0's multi_logloss: 0.854358	valid_1's multi_logloss: 0.854357                                          
[2]	valid_0's multi_logloss: 0.854351	valid_1's multi_logloss: 0.854357                                          
[3]	valid_0's multi_logloss: 0.854344	valid_1's multi_logloss: 0.854359                                          
[4]	valid_0's multi_logloss: 0.854338	valid_1's multi_logloss: 0.854359                                          
[5]	valid_0's multi_logloss: 0.854333	valid_1's multi_logloss: 0.854361                                          
[6]	valid_0's multi_logloss: 0.854328	valid_1's multi_logloss: 0.854361                                          
[7]	valid_0's multi_logloss: 0.854322	valid_1's multi_logloss: 0.854359                                          
[8]	valid_0's multi_logloss: 0.854323	valid_1's multi_logloss: 0.85436                                           
[9]	valid_0's multi_logloss: 0.854319	valid_1's multi_logloss: 0.85436                  

[72]	valid_0's multi_logloss: 0.867016	valid_1's multi_logloss: 0.867192                                         
[73]	valid_0's multi_logloss: 0.865166	valid_1's multi_logloss: 0.865344                                         
[74]	valid_0's multi_logloss: 0.86532	valid_1's multi_logloss: 0.865497                                          
[75]	valid_0's multi_logloss: 0.863708	valid_1's multi_logloss: 0.863886                                         
0.7410658435791999                                                                                               
0.7410694829815361                                                                                               
{'bagging_fraction': 0.7650305183987045, 'boosting_type': 'dart', 'colsample_bytree': None, 'feature_fraction': 0.7819202362918904, 'lambda_l1': 0, 'lambda_l2': 0, 'learning_rate': 0.06005062066761176, 'min_child_samples': None, 'min_child_weight': 3.7655421286129434, 'min_data_in_leaf': 96735, 'min_sum_hessian_in_





[1]	valid_0's multi_logloss: 0.854072	valid_1's multi_logloss: 0.854367                                          
[2]	valid_0's multi_logloss: 0.853792	valid_1's multi_logloss: 0.854377                                          
[3]	valid_0's multi_logloss: 0.85351	valid_1's multi_logloss: 0.854395                                           
[4]	valid_0's multi_logloss: 0.85325	valid_1's multi_logloss: 0.854433                                           
[5]	valid_0's multi_logloss: 0.852997	valid_1's multi_logloss: 0.854446                                          
[6]	valid_0's multi_logloss: 0.852742	valid_1's multi_logloss: 0.854465                                          
[7]	valid_0's multi_logloss: 0.852501	valid_1's multi_logloss: 0.854479                                          
[8]	valid_0's multi_logloss: 0.852248	valid_1's multi_logloss: 0.854503                                          
[9]	valid_0's multi_logloss: 0.852019	valid_1's multi_logloss: 0.854543                 





[1]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[2]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[3]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[4]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[5]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[6]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[7]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[8]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                                             
[9]	valid_0's multi_logloss: 1.14992	valid_1's multi_logloss: 1.1499                    





[1]	valid_0's multi_logloss: 0.85436	valid_1's multi_logloss: 0.854354                                           
[2]	valid_0's multi_logloss: 0.854355	valid_1's multi_logloss: 0.854352                                          
[3]	valid_0's multi_logloss: 0.85435	valid_1's multi_logloss: 0.854354                                           
[4]	valid_0's multi_logloss: 0.854345	valid_1's multi_logloss: 0.854354                                          
[5]	valid_0's multi_logloss: 0.85434	valid_1's multi_logloss: 0.854354                                           
[6]	valid_0's multi_logloss: 0.854335	valid_1's multi_logloss: 0.854358                                          
[7]	valid_0's multi_logloss: 0.854331	valid_1's multi_logloss: 0.854359                                          
[8]	valid_0's multi_logloss: 0.854327	valid_1's multi_logloss: 0.854356                                          
[9]	valid_0's multi_logloss: 0.854324	valid_1's multi_logloss: 0.854358                 

0.8408130723243675                                                                                               
0.818920057527202                                                                                                
{'bagging_fraction': 0.7735281708228888, 'boosting_type': 'gbdt', 'colsample_bytree': None, 'feature_fraction': 0.7019688791355287, 'lambda_l1': 0.0005066902069864062, 'lambda_l2': 3.374501183311695e-06, 'learning_rate': 0.04616756254060491, 'min_child_samples': None, 'min_child_weight': 3.29982863252954, 'min_data_in_leaf': 56, 'min_sum_hessian_in_leaf': None, 'num_class': 4, 'num_leaves': 58, 'num_threads': 8, 'objective': 'multiclass', 'reg_alpha': None, 'reg_lambda': None, 'subsample': None, 'subsample_for_bin': 140000, 'verbose': -1, 'n_estimators': 200}

In [8]:
bestParams = {'bagging_fraction': 0.7735281708228888, 'boosting_type': 'gbdt', 'colsample_bytree': None, 'feature_fraction': 0.7019688791355287, 'lambda_l1': 0.0005066902069864062, 'lambda_l2': 3.374501183311695e-06, 'learning_rate': 0.04616756254060491, 'min_child_samples': None, 'min_child_weight': 3.29982863252954, 'min_data_in_leaf': 56, 'min_sum_hessian_in_leaf': None, 'num_class': 4, 'num_leaves': 58, 'num_threads': 8, 'objective': 'multiclass', 'reg_alpha': None, 'reg_lambda': None, 'subsample': None, 'subsample_for_bin': 140000, 'verbose': -1, 'n_estimators': 200}

In [14]:
model = LGBMClassifier(**bestParams)
    
model.fit(x_train_1, 
          y_train_1.values.reshape(-1), 
          eval_set=[(x_train_1, y_train_1.values.reshape(-1)), (x_val, y_val.values.reshape(-1))], 
          verbose=1, early_stopping_rounds=10)

preds_tr = model.predict(x_train_1,)
preds_val = model.predict(x_val)
val_score = accuracy_score(y_val, preds_val)
tr_score = accuracy_score(y_train_1, preds_tr)
print(tr_score, val_score)



[1]	valid_0's multi_logloss: 0.809338	valid_1's multi_logloss: 0.809506
[2]	valid_0's multi_logloss: 0.773751	valid_1's multi_logloss: 0.774106
[3]	valid_0's multi_logloss: 0.744055	valid_1's multi_logloss: 0.744591
[4]	valid_0's multi_logloss: 0.718559	valid_1's multi_logloss: 0.71929
[5]	valid_0's multi_logloss: 0.696536	valid_1's multi_logloss: 0.697507
[6]	valid_0's multi_logloss: 0.677077	valid_1's multi_logloss: 0.678234
[7]	valid_0's multi_logloss: 0.659963	valid_1's multi_logloss: 0.661316
[8]	valid_0's multi_logloss: 0.644045	valid_1's multi_logloss: 0.645566
[9]	valid_0's multi_logloss: 0.629871	valid_1's multi_logloss: 0.631601
[10]	valid_0's multi_logloss: 0.617197	valid_1's multi_logloss: 0.619105
[11]	valid_0's multi_logloss: 0.60545	valid_1's multi_logloss: 0.60754
[12]	valid_0's multi_logloss: 0.594796	valid_1's multi_logloss: 0.597066
[13]	valid_0's multi_logloss: 0.584992	valid_1's multi_logloss: 0.587412
[14]	valid_0's multi_logloss: 0.575775	valid_1's multi_logloss:

[114]	valid_0's multi_logloss: 0.425519	valid_1's multi_logloss: 0.45266
[115]	valid_0's multi_logloss: 0.425133	valid_1's multi_logloss: 0.452572
[116]	valid_0's multi_logloss: 0.424785	valid_1's multi_logloss: 0.452483
[117]	valid_0's multi_logloss: 0.424411	valid_1's multi_logloss: 0.452387
[118]	valid_0's multi_logloss: 0.424037	valid_1's multi_logloss: 0.452296
[119]	valid_0's multi_logloss: 0.42368	valid_1's multi_logloss: 0.452195
[120]	valid_0's multi_logloss: 0.423339	valid_1's multi_logloss: 0.452136
[121]	valid_0's multi_logloss: 0.422981	valid_1's multi_logloss: 0.452047
[122]	valid_0's multi_logloss: 0.422626	valid_1's multi_logloss: 0.451973
[123]	valid_0's multi_logloss: 0.422282	valid_1's multi_logloss: 0.45189
[124]	valid_0's multi_logloss: 0.421934	valid_1's multi_logloss: 0.451832
[125]	valid_0's multi_logloss: 0.421609	valid_1's multi_logloss: 0.451761
[126]	valid_0's multi_logloss: 0.421243	valid_1's multi_logloss: 0.451692
[127]	valid_0's multi_logloss: 0.420899	v

In [10]:
probs_tr = model.predict_proba(x_train_1)
probs_val = model.predict_proba(x_val)

In [11]:
np.save("Preds/LGB_val", probs_val)
np.save("Preds/LGB_tr", probs_tr)

probs_test = model.predict_proba(x_test)
np.save("Preds/LGB_test", probs_test)