<a href="https://colab.research.google.com/github/MStamirski/Spaceship-Titanic/blob/main/Model_LightGBM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preparation

In [None]:
from google.colab import drive
drive.mount("/content/drive")
%cd "/content/drive/MyDrive/Colab_Notebooks/SDA_upskill/Spaceship"

In [None]:
!pip install lightgbm

# Model with parameters

In [None]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score

In [None]:
model = lgb.LGBMClassifier()

In [None]:
def objective(trial):

    params = {
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100)
    }

    optuna_model = lgb.LGBMClassifier(**params)
    optuna_model.fit(X_train, y_train)

    y_pred = optuna_model.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)

    return accuracy

# Features datasets

In [None]:
!pip install import-ipynb

In [None]:
import import_ipynb

In [None]:
from FeaturesEngineering import get_features, categories_one_hot_encoding, categories_target_encoding, categories_leave_one_out_encoding

In [None]:
df = get_features('train')
df_ohe = categories_one_hot_encoding(df)
df_te = categories_target_encoding(df)
df_looe = categories_leave_one_out_encoding(df)

# Initial verification

In [None]:
from Optimization import verify_feature_dataset, get_subsets, model_optimization, test_tuned_model

In [None]:
acc_ohe_ini = verify_feature_dataset(df_ohe, model)

Accuracy: 78.18251533742331%


In [None]:
acc_te_ini = verify_feature_dataset(df_te, model)

Accuracy: 78.94938650306749%


In [None]:
acc_looe_ini = verify_feature_dataset(df_looe, model)

Accuracy: 77.03220858895705%


# Parameters tuning

In [None]:
from optuna.samplers import TPESampler
sampler = TPESampler(seed=1)

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_ohe)
params = model_optimization(20, objective, samp=sampler)

[32m[I 2023-04-08 19:15:18,201][0m A new study created in memory with name: no-name-7be131df-b924-4231-93eb-f482d693ef47[0m
[32m[I 2023-04-08 19:15:18,311][0m Trial 0 finished with value: 0.7757009345794392 and parameters: {'lambda_l1': 5.6649755830282306e-05, 'lambda_l2': 0.030403280126677572, 'num_leaves': 2, 'feature_fraction': 0.5813995435791038, 'bagging_fraction': 0.48805353449026784, 'bagging_freq': 1, 'min_child_samples': 22}. Best is trial 0 with value: 0.7757009345794392.[0m




[32m[I 2023-04-08 19:15:19,113][0m Trial 1 finished with value: 0.805176132278936 and parameters: {'lambda_l1': 1.2883876209377052e-05, 'lambda_l2': 3.72312200494449e-05, 'num_leaves': 139, 'feature_fraction': 0.6515167086419769, 'bagging_fraction': 0.8111317002380557, 'bagging_freq': 2, 'min_child_samples': 89}. Best is trial 1 with value: 0.805176132278936.[0m




[32m[I 2023-04-08 19:15:21,201][0m Trial 2 finished with value: 0.8037383177570093 and parameters: {'lambda_l1': 1.763958399884789e-08, 'lambda_l2': 0.010819509974097813, 'num_leaves': 108, 'feature_fraction': 0.735213897067451, 'bagging_fraction': 0.4842321631571403, 'bagging_freq': 2, 'min_child_samples': 81}. Best is trial 1 with value: 0.805176132278936.[0m




[32m[I 2023-04-08 19:15:21,746][0m Trial 3 finished with value: 0.8123652048885693 and parameters: {'lambda_l1': 5.180291295699627, 'lambda_l2': 6.6193844201488494e-06, 'num_leaves': 178, 'feature_fraction': 0.9258334913776229, 'bagging_fraction': 0.9367639981023084, 'bagging_freq': 1, 'min_child_samples': 8}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:22,346][0m Trial 4 finished with value: 0.8080517613227893 and parameters: {'lambda_l1': 3.376554594427989e-07, 'lambda_l2': 0.8003547575557912, 'num_leaves': 27, 'feature_fraction': 0.6526645750030313, 'bagging_fraction': 0.9747337180903012, 'bagging_freq': 4, 'min_child_samples': 71}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:23,815][0m Trial 5 finished with value: 0.8023005032350827 and parameters: {'lambda_l1': 6.912588094940543e-06, 'lambda_l2': 0.015083716080906013, 'num_leaves': 214, 'feature_fraction': 0.4109729664065151, 'bagging_fraction': 0.8500865889669804, 'bagging_freq': 7, 'min_child_samples': 76}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:24,385][0m Trial 6 finished with value: 0.8080517613227893 and parameters: {'lambda_l1': 3.341919070318744e-06, 'lambda_l2': 0.12691529280491062, 'num_leaves': 28, 'feature_fraction': 0.6687361157055431, 'bagging_fraction': 0.9451573018558573, 'bagging_freq': 3, 'min_child_samples': 32}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:28,056][0m Trial 7 finished with value: 0.7951114306254493 and parameters: {'lambda_l1': 1.4799844388224288e-07, 'lambda_l2': 1.493834966470408e-08, 'num_leaves': 175, 'feature_fraction': 0.5269768696000354, 'bagging_fraction': 0.5593279956233357, 'bagging_freq': 4, 'min_child_samples': 10}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:29,836][0m Trial 8 finished with value: 0.805176132278936 and parameters: {'lambda_l1': 0.0014691239860705116, 'lambda_l2': 2.091978294467618e-07, 'num_leaves': 152, 'feature_fraction': 0.8198550160125587, 'bagging_fraction': 0.46140065729669555, 'bagging_freq': 3, 'min_child_samples': 71}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:31,352][0m Trial 9 finished with value: 0.8080517613227893 and parameters: {'lambda_l1': 5.340887446119036e-05, 'lambda_l2': 2.8156659603215856e-08, 'num_leaves': 138, 'feature_fraction': 0.7982767871318732, 'bagging_fraction': 0.7089334672349852, 'bagging_freq': 7, 'min_child_samples': 61}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:32,265][0m Trial 10 finished with value: 0.8015815959741194 and parameters: {'lambda_l1': 8.712643539939922, 'lambda_l2': 1.549083458183585e-05, 'num_leaves': 248, 'feature_fraction': 0.9925937090708492, 'bagging_fraction': 0.6768889197383504, 'bagging_freq': 6, 'min_child_samples': 43}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:33,304][0m Trial 11 finished with value: 0.8080517613227893 and parameters: {'lambda_l1': 0.02188304077492897, 'lambda_l2': 1.1380260878706794, 'num_leaves': 67, 'feature_fraction': 0.9643954701119819, 'bagging_fraction': 0.9817232796912836, 'bagging_freq': 5, 'min_child_samples': 53}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:33,965][0m Trial 12 finished with value: 0.8080517613227893 and parameters: {'lambda_l1': 2.195652566262263, 'lambda_l2': 6.423737310936281, 'num_leaves': 87, 'feature_fraction': 0.8834122861669267, 'bagging_fraction': 0.996775506576717, 'bagging_freq': 1, 'min_child_samples': 98}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:39,356][0m Trial 13 finished with value: 0.803019410496046 and parameters: {'lambda_l1': 0.05932616793504327, 'lambda_l2': 0.0006907393317806886, 'num_leaves': 194, 'feature_fraction': 0.8782196400387425, 'bagging_fraction': 0.8916085583286862, 'bagging_freq': 5, 'min_child_samples': 40}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:41,592][0m Trial 14 finished with value: 0.7979870596693026 and parameters: {'lambda_l1': 0.0014140905655246048, 'lambda_l2': 0.0006433268192134389, 'num_leaves': 55, 'feature_fraction': 0.7429055342955537, 'bagging_fraction': 0.9082747796615125, 'bagging_freq': 4, 'min_child_samples': 64}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:45,895][0m Trial 15 finished with value: 0.7864845434938893 and parameters: {'lambda_l1': 4.7018778691238357e-07, 'lambda_l2': 1.2383774907200228e-06, 'num_leaves': 230, 'feature_fraction': 0.6137149364143296, 'bagging_fraction': 0.9989651471399537, 'bagging_freq': 2, 'min_child_samples': 6}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:46,465][0m Trial 16 finished with value: 0.8037383177570093 and parameters: {'lambda_l1': 0.4160822459679812, 'lambda_l2': 2.4109339979461693e-05, 'num_leaves': 178, 'feature_fraction': 0.7418959549242107, 'bagging_fraction': 0.8163982412288103, 'bagging_freq': 3, 'min_child_samples': 21}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:46,827][0m Trial 17 finished with value: 0.8037383177570093 and parameters: {'lambda_l1': 0.0002921275560269595, 'lambda_l2': 0.4505582970336609, 'num_leaves': 91, 'feature_fraction': 0.918791290057132, 'bagging_fraction': 0.9101690807113071, 'bagging_freq': 5, 'min_child_samples': 53}. Best is trial 3 with value: 0.8123652048885693.[0m
[32m[I 2023-04-08 19:15:46,961][0m Trial 18 finished with value: 0.8058950395398994 and parameters: {'lambda_l1': 0.012148220538081511, 'lambda_l2': 0.002353473009545225, 'num_leaves': 4, 'feature_fraction': 0.837250628796929, 'bagging_fraction': 0.8598022393299446, 'bagging_freq': 1, 'min_child_samples': 86}. Best is trial 3 with value: 0.8123652048885693.[0m




[32m[I 2023-04-08 19:15:47,203][0m Trial 19 finished with value: 0.805176132278936 and parameters: {'lambda_l1': 0.3031698611293977, 'lambda_l2': 8.221430169867247, 'num_leaves': 110, 'feature_fraction': 0.772600092505302, 'bagging_fraction': 0.772612216280748, 'bagging_freq': 4, 'min_child_samples': 65}. Best is trial 3 with value: 0.8123652048885693.[0m



Number of finished trials: 20
Best trial:
Value: 0.8123652048885693
  Params: 
    lambda_l1: 5.180291295699627
    lambda_l2: 6.6193844201488494e-06
    num_leaves: 178
    feature_fraction: 0.9258334913776229
    bagging_fraction: 0.9367639981023084
    bagging_freq: 1
    min_child_samples: 8


In [None]:
model = lgb.LGBMClassifier(**params)
result_ohe_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)


Accuracy after tuning: 79.12593444508337%

Classification report
              precision    recall  f1-score   support

       False       0.80      0.77      0.79       861
        True       0.78      0.81      0.80       878

    accuracy                           0.79      1739
   macro avg       0.79      0.79      0.79      1739
weighted avg       0.79      0.79      0.79      1739


Confusion matrix
[[663 198]
 [165 713]]

All features number: 90, Selected features number: 40
Selected_features:

HomePlanet_Earth
HomePlanet_Europa
CryoSleep_False
Destination_55 Cancri e
Destination_TRAPPIST-1e
Cabin_deck_C
Cabin_deck_D
Cabin_deck_E
Cabin_deck_F
Cabin_deck_G
Cabin_side_P
Cabin_side_S
Cabin_num/100_0
Cabin_num/100_3
Cabin_num/100_4
Cabin_num/100_6
Cabin_num/100_8
Cabin_num/100_9
Cabin_num/100_10
Cabin_num/100_11
Cabin_num/100_14
Age_deciles_4
Age_deciles_5
Age_deciles_7
RService_deciles_1
RService_deciles_9
RService_deciles_10
FCourt_deciles_7
FCourt_deciles_9
FCourt_deciles_10
Sh

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_te)
params = model_optimization(20, objective, samp=sampler)

[32m[I 2023-04-08 19:15:47,650][0m A new study created in memory with name: no-name-31964fbf-c35f-4951-bf14-21a8a2d3e070[0m
[32m[I 2023-04-08 19:15:47,807][0m Trial 0 finished with value: 0.800862688713156 and parameters: {'lambda_l1': 1.3508814484493448, 'lambda_l2': 1.7269204695427844e-07, 'num_leaves': 37, 'feature_fraction': 0.8844347732257143, 'bagging_fraction': 0.6386061021913202, 'bagging_freq': 2, 'min_child_samples': 94}. Best is trial 0 with value: 0.800862688713156.[0m




[32m[I 2023-04-08 19:15:48,116][0m Trial 1 finished with value: 0.800862688713156 and parameters: {'lambda_l1': 1.3486297561351885e-05, 'lambda_l2': 0.05718852932029385, 'num_leaves': 187, 'feature_fraction': 0.9299836547234859, 'bagging_fraction': 0.7742033242333654, 'bagging_freq': 6, 'min_child_samples': 38}. Best is trial 0 with value: 0.800862688713156.[0m




[32m[I 2023-04-08 19:15:48,808][0m Trial 2 finished with value: 0.794392523364486 and parameters: {'lambda_l1': 2.6875157982766948e-06, 'lambda_l2': 1.1560482583788434, 'num_leaves': 111, 'feature_fraction': 0.9789040282890313, 'bagging_fraction': 0.7980648986910688, 'bagging_freq': 5, 'min_child_samples': 16}. Best is trial 0 with value: 0.800862688713156.[0m




[32m[I 2023-04-08 19:15:49,704][0m Trial 3 finished with value: 0.798705966930266 and parameters: {'lambda_l1': 3.510777677785371, 'lambda_l2': 0.00011199772508910995, 'num_leaves': 149, 'feature_fraction': 0.6448820816567687, 'bagging_fraction': 0.5422161881458166, 'bagging_freq': 7, 'min_child_samples': 60}. Best is trial 0 with value: 0.800862688713156.[0m




[32m[I 2023-04-08 19:15:51,478][0m Trial 4 finished with value: 0.803019410496046 and parameters: {'lambda_l1': 1.0612872414677476e-08, 'lambda_l2': 0.0035834737096036534, 'num_leaves': 85, 'feature_fraction': 0.7162348613545656, 'bagging_fraction': 0.9315652595864647, 'bagging_freq': 3, 'min_child_samples': 92}. Best is trial 4 with value: 0.803019410496046.[0m




[32m[I 2023-04-08 19:15:52,877][0m Trial 5 finished with value: 0.7850467289719626 and parameters: {'lambda_l1': 0.004076064447663881, 'lambda_l2': 1.3880054955129728e-08, 'num_leaves': 239, 'feature_fraction': 0.8145381505101543, 'bagging_fraction': 0.9983937102708883, 'bagging_freq': 2, 'min_child_samples': 18}. Best is trial 4 with value: 0.803019410496046.[0m
[32m[I 2023-04-08 19:15:53,068][0m Trial 6 finished with value: 0.7972681524083394 and parameters: {'lambda_l1': 2.4737691393844554, 'lambda_l2': 0.018679422849155414, 'num_leaves': 18, 'feature_fraction': 0.8532778315614798, 'bagging_fraction': 0.8523257130767479, 'bagging_freq': 7, 'min_child_samples': 73}. Best is trial 4 with value: 0.803019410496046.[0m




[32m[I 2023-04-08 19:15:53,172][0m Trial 7 finished with value: 0.798705966930266 and parameters: {'lambda_l1': 1.3135259518964455e-07, 'lambda_l2': 1.5098062003276678e-08, 'num_leaves': 8, 'feature_fraction': 0.4169838928124768, 'bagging_fraction': 0.5477266405618275, 'bagging_freq': 7, 'min_child_samples': 56}. Best is trial 4 with value: 0.803019410496046.[0m
[32m[I 2023-04-08 19:15:53,360][0m Trial 8 finished with value: 0.7979870596693026 and parameters: {'lambda_l1': 0.0009449260201027949, 'lambda_l2': 0.3786849376628258, 'num_leaves': 33, 'feature_fraction': 0.5675102074066837, 'bagging_fraction': 0.7514555628749727, 'bagging_freq': 7, 'min_child_samples': 58}. Best is trial 4 with value: 0.803019410496046.[0m




[32m[I 2023-04-08 19:15:53,539][0m Trial 9 finished with value: 0.7951114306254493 and parameters: {'lambda_l1': 1.4717213981231934e-08, 'lambda_l2': 0.16058096155247428, 'num_leaves': 61, 'feature_fraction': 0.8842631173712675, 'bagging_fraction': 0.632716386438503, 'bagging_freq': 7, 'min_child_samples': 76}. Best is trial 4 with value: 0.803019410496046.[0m
[32m[I 2023-04-08 19:15:53,719][0m Trial 10 finished with value: 0.7958303378864127 and parameters: {'lambda_l1': 1.710207903954888e-08, 'lambda_l2': 0.00022973279844881492, 'num_leaves': 100, 'feature_fraction': 0.7425004717497646, 'bagging_fraction': 0.4000632675583052, 'bagging_freq': 3, 'min_child_samples': 98}. Best is trial 4 with value: 0.803019410496046.[0m




[32m[I 2023-04-08 19:15:53,928][0m Trial 11 finished with value: 0.8058950395398994 and parameters: {'lambda_l1': 0.03690331630276406, 'lambda_l2': 1.7861910801078849e-06, 'num_leaves': 65, 'feature_fraction': 0.7658164138178464, 'bagging_fraction': 0.9174120039694148, 'bagging_freq': 1, 'min_child_samples': 94}. Best is trial 11 with value: 0.8058950395398994.[0m
[32m[I 2023-04-08 19:15:54,135][0m Trial 12 finished with value: 0.8044572250179727 and parameters: {'lambda_l1': 0.012039636731088692, 'lambda_l2': 6.652237646956939e-06, 'num_leaves': 78, 'feature_fraction': 0.7412127480019319, 'bagging_fraction': 0.940753657243913, 'bagging_freq': 1, 'min_child_samples': 84}. Best is trial 11 with value: 0.8058950395398994.[0m




[32m[I 2023-04-08 19:15:54,376][0m Trial 13 finished with value: 0.8037383177570093 and parameters: {'lambda_l1': 0.04157373612083644, 'lambda_l2': 5.544016431939428e-06, 'num_leaves': 147, 'feature_fraction': 0.7767294482208248, 'bagging_fraction': 0.922260883467722, 'bagging_freq': 1, 'min_child_samples': 79}. Best is trial 11 with value: 0.8058950395398994.[0m




[32m[I 2023-04-08 19:15:54,603][0m Trial 14 finished with value: 0.8087706685837527 and parameters: {'lambda_l1': 0.021139466940639787, 'lambda_l2': 7.846489121019732e-06, 'num_leaves': 63, 'feature_fraction': 0.6622556022515516, 'bagging_fraction': 0.9874066124989476, 'bagging_freq': 1, 'min_child_samples': 83}. Best is trial 14 with value: 0.8087706685837527.[0m




[32m[I 2023-04-08 19:15:54,912][0m Trial 15 finished with value: 0.800862688713156 and parameters: {'lambda_l1': 0.07297591725797659, 'lambda_l2': 5.235125397987929e-06, 'num_leaves': 61, 'feature_fraction': 0.6468491359491065, 'bagging_fraction': 0.8570657721884546, 'bagging_freq': 4, 'min_child_samples': 41}. Best is trial 14 with value: 0.8087706685837527.[0m




[32m[I 2023-04-08 19:15:55,197][0m Trial 16 finished with value: 0.8080517613227893 and parameters: {'lambda_l1': 0.00018362798598981086, 'lambda_l2': 6.376158958255017e-07, 'num_leaves': 128, 'feature_fraction': 0.6539182335158303, 'bagging_fraction': 0.9710121999514061, 'bagging_freq': 2, 'min_child_samples': 68}. Best is trial 14 with value: 0.8087706685837527.[0m




[32m[I 2023-04-08 19:15:55,505][0m Trial 17 finished with value: 0.8058950395398994 and parameters: {'lambda_l1': 0.00023547171008895986, 'lambda_l2': 4.8091773408054164e-05, 'num_leaves': 179, 'feature_fraction': 0.5559025186081251, 'bagging_fraction': 0.9699393104221425, 'bagging_freq': 2, 'min_child_samples': 68}. Best is trial 14 with value: 0.8087706685837527.[0m




[32m[I 2023-04-08 19:15:55,841][0m Trial 18 finished with value: 0.798705966930266 and parameters: {'lambda_l1': 9.251468143597606e-05, 'lambda_l2': 5.722463159817255e-07, 'num_leaves': 123, 'feature_fraction': 0.6772608157082901, 'bagging_fraction': 0.8637854440611954, 'bagging_freq': 3, 'min_child_samples': 44}. Best is trial 14 with value: 0.8087706685837527.[0m




[32m[I 2023-04-08 19:15:56,114][0m Trial 19 finished with value: 0.807332854061826 and parameters: {'lambda_l1': 0.0006384568018630738, 'lambda_l2': 0.00269718929214816, 'num_leaves': 225, 'feature_fraction': 0.5935572310318713, 'bagging_fraction': 0.9768655891718595, 'bagging_freq': 4, 'min_child_samples': 65}. Best is trial 14 with value: 0.8087706685837527.[0m



Number of finished trials: 20
Best trial:
Value: 0.8087706685837527
  Params: 
    lambda_l1: 0.021139466940639787
    lambda_l2: 7.846489121019732e-06
    num_leaves: 63
    feature_fraction: 0.6622556022515516
    bagging_fraction: 0.9874066124989476
    bagging_freq: 1
    min_child_samples: 83


In [None]:
model = lgb.LGBMClassifier(**params)
result_te_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)


Accuracy after tuning: 78.83841288096608%

Classification report
              precision    recall  f1-score   support

       False       0.79      0.79      0.79       861
        True       0.79      0.79      0.79       878

    accuracy                           0.79      1739
   macro avg       0.79      0.79      0.79      1739
weighted avg       0.79      0.79      0.79      1739


Confusion matrix
[[678 183]
 [185 693]]

All features number: 14, Selected features number: 4
Selected_features:

Cabin_deck_transformed
Cabin_side_transformed
Cabin_num/100_transformed
Age_deciles_transformed


In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_looe)
params = model_optimization(20, objective, samp=sampler)

[32m[I 2023-04-08 19:15:56,539][0m A new study created in memory with name: no-name-c2b612b9-5cb9-492e-a805-94ad636d7336[0m
[32m[I 2023-04-08 19:15:56,754][0m Trial 0 finished with value: 0.7814521926671459 and parameters: {'lambda_l1': 0.0010142899116384612, 'lambda_l2': 1.690818655548677e-07, 'num_leaves': 17, 'feature_fraction': 0.4728060734444424, 'bagging_fraction': 0.42673112712685707, 'bagging_freq': 1, 'min_child_samples': 26}. Best is trial 0 with value: 0.7814521926671459.[0m
[32m[I 2023-04-08 19:15:56,853][0m Trial 1 finished with value: 0.7915168943206327 and parameters: {'lambda_l1': 0.026115649029146128, 'lambda_l2': 0.001090066111976454, 'num_leaves': 5, 'feature_fraction': 0.4431845678136921, 'bagging_fraction': 0.9803657980001632, 'bagging_freq': 4, 'min_child_samples': 24}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:57,274][0m Trial 2 finished with value: 0.7828900071890726 and parameters: {'lambda_l1': 1.8660861996646596e-06, 'lambda_l2': 0.04948034130913776, 'num_leaves': 51, 'feature_fraction': 0.7488153563639547, 'bagging_fraction': 0.9820119934529874, 'bagging_freq': 6, 'min_child_samples': 28}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:57,513][0m Trial 3 finished with value: 0.7828900071890726 and parameters: {'lambda_l1': 0.00027792488102483314, 'lambda_l2': 0.003798406718422058, 'num_leaves': 213, 'feature_fraction': 0.4940748367876506, 'bagging_fraction': 0.4111457213064457, 'bagging_freq': 1, 'min_child_samples': 51}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:57,970][0m Trial 4 finished with value: 0.7677929547088426 and parameters: {'lambda_l1': 0.0028639481750884796, 'lambda_l2': 0.0013172340052502289, 'num_leaves': 82, 'feature_fraction': 0.9931696926474693, 'bagging_fraction': 0.7478471315474782, 'bagging_freq': 3, 'min_child_samples': 57}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:58,353][0m Trial 5 finished with value: 0.7692307692307693 and parameters: {'lambda_l1': 0.051051661169711944, 'lambda_l2': 0.01054620092199292, 'num_leaves': 69, 'feature_fraction': 0.43980090065706495, 'bagging_fraction': 0.6220505187484637, 'bagging_freq': 5, 'min_child_samples': 25}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:58,702][0m Trial 6 finished with value: 0.7757009345794392 and parameters: {'lambda_l1': 0.059538787171203415, 'lambda_l2': 3.9703457871994557e-08, 'num_leaves': 68, 'feature_fraction': 0.8828527382460072, 'bagging_fraction': 0.5160605695739966, 'bagging_freq': 5, 'min_child_samples': 55}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:58,971][0m Trial 7 finished with value: 0.785765636232926 and parameters: {'lambda_l1': 2.105095164203584, 'lambda_l2': 2.3424532621925857e-06, 'num_leaves': 18, 'feature_fraction': 0.8410395779732016, 'bagging_fraction': 0.8633068177259481, 'bagging_freq': 7, 'min_child_samples': 94}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:59,414][0m Trial 8 finished with value: 0.7757009345794392 and parameters: {'lambda_l1': 1.3352548274436844e-08, 'lambda_l2': 1.2860527283566704e-06, 'num_leaves': 159, 'feature_fraction': 0.9694097924125699, 'bagging_fraction': 0.9701056715482478, 'bagging_freq': 4, 'min_child_samples': 92}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:15:59,717][0m Trial 9 finished with value: 0.7800143781452192 and parameters: {'lambda_l1': 0.0059442337679175535, 'lambda_l2': 3.2364539145853725e-05, 'num_leaves': 125, 'feature_fraction': 0.762586289751984, 'bagging_fraction': 0.7297287529051375, 'bagging_freq': 7, 'min_child_samples': 93}. Best is trial 1 with value: 0.7915168943206327.[0m




[32m[I 2023-04-08 19:16:00,648][0m Trial 10 finished with value: 0.7821710999281093 and parameters: {'lambda_l1': 1.6295269055018433, 'lambda_l2': 3.3731699094708314, 'num_leaves': 249, 'feature_fraction': 0.5997075513769773, 'bagging_fraction': 0.8644456085862976, 'bagging_freq': 3, 'min_child_samples': 7}. Best is trial 1 with value: 0.7915168943206327.[0m
[32m[I 2023-04-08 19:16:00,832][0m Trial 11 finished with value: 0.7936736161035226 and parameters: {'lambda_l1': 9.772906213555897, 'lambda_l2': 3.461773950137805e-05, 'num_leaves': 9, 'feature_fraction': 0.6282789521745081, 'bagging_fraction': 0.8815908214390611, 'bagging_freq': 7, 'min_child_samples': 76}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:00,957][0m Trial 12 finished with value: 0.7749820273184759 and parameters: {'lambda_l1': 5.19575779015331, 'lambda_l2': 6.780387409454993e-05, 'num_leaves': 2, 'feature_fraction': 0.6088759897514149, 'bagging_fraction': 0.8836734089008191, 'bagging_freq': 3, 'min_child_samples': 75}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:01,207][0m Trial 13 finished with value: 0.785765636232926 and parameters: {'lambda_l1': 8.17816922635577, 'lambda_l2': 9.806935699148325e-05, 'num_leaves': 131, 'feature_fraction': 0.40128316726889884, 'bagging_fraction': 0.9821275674795433, 'bagging_freq': 5, 'min_child_samples': 75}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:01,536][0m Trial 14 finished with value: 0.7778576563623293 and parameters: {'lambda_l1': 0.2674979141296675, 'lambda_l2': 0.00045006506524657825, 'num_leaves': 35, 'feature_fraction': 0.5660904187505842, 'bagging_fraction': 0.7996114082980238, 'bagging_freq': 4, 'min_child_samples': 41}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:01,967][0m Trial 15 finished with value: 0.7699496764917325 and parameters: {'lambda_l1': 0.27410843411812313, 'lambda_l2': 9.230642863390291e-06, 'num_leaves': 111, 'feature_fraction': 0.6678126108576247, 'bagging_fraction': 0.9032251276770884, 'bagging_freq': 6, 'min_child_samples': 72}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:06,152][0m Trial 16 finished with value: 0.783608914450036 and parameters: {'lambda_l1': 0.02443178325346628, 'lambda_l2': 0.00033928016698265315, 'num_leaves': 185, 'feature_fraction': 0.5264935561348706, 'bagging_fraction': 0.8216496104752591, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:06,712][0m Trial 17 finished with value: 0.7785765636232926 and parameters: {'lambda_l1': 0.00020562874554551846, 'lambda_l2': 6.3309043719365335e-06, 'num_leaves': 91, 'feature_fraction': 0.5343474439353354, 'bagging_fraction': 0.9267718770669064, 'bagging_freq': 3, 'min_child_samples': 67}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:07,150][0m Trial 18 finished with value: 0.7792954708842559 and parameters: {'lambda_l1': 0.43393824018590366, 'lambda_l2': 1.4121264626764952e-08, 'num_leaves': 44, 'feature_fraction': 0.6725212485489659, 'bagging_fraction': 0.9310473687985822, 'bagging_freq': 2, 'min_child_samples': 41}. Best is trial 11 with value: 0.7936736161035226.[0m




[32m[I 2023-04-08 19:16:07,410][0m Trial 19 finished with value: 0.7742631200575126 and parameters: {'lambda_l1': 0.794633568568987, 'lambda_l2': 0.07156452471571594, 'num_leaves': 30, 'feature_fraction': 0.414562480994978, 'bagging_fraction': 0.8179053739827917, 'bagging_freq': 4, 'min_child_samples': 83}. Best is trial 11 with value: 0.7936736161035226.[0m



Number of finished trials: 20
Best trial:
Value: 0.7936736161035226
  Params: 
    lambda_l1: 9.772906213555897
    lambda_l2: 3.461773950137805e-05
    num_leaves: 9
    feature_fraction: 0.6282789521745081
    bagging_fraction: 0.8815908214390611
    bagging_freq: 7
    min_child_samples: 76


In [None]:
model = lgb.LGBMClassifier(**params)
result_looe_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)


Accuracy after tuning: 78.26336975273146%

Classification report
              precision    recall  f1-score   support

       False       0.80      0.75      0.77       861
        True       0.77      0.81      0.79       878

    accuracy                           0.78      1739
   macro avg       0.78      0.78      0.78      1739
weighted avg       0.78      0.78      0.78      1739


Confusion matrix
[[649 212]
 [166 712]]

All features number: 14, Selected features number: 8
Selected_features:

HomePlanet_transformed
Cabin_deck_transformed
Cabin_side_transformed
Cabin_num/100_transformed
RService_deciles_transformed
FCourt_deciles_transformed
Spa_deciles_transformed
VRD_deciles_transformed


# Save results

In [None]:
data = []
data.append(['LightGBM', 'OHE', acc_ohe_ini, result_ohe_tuned[0], str(result_ohe_tuned[1])+" / 90"])
data.append(['LightGBM', 'TE', acc_te_ini, result_te_tuned[0], str(result_te_tuned[1])+" / 14"])
data.append(['LightGBM', 'LOOE', acc_looe_ini, result_looe_tuned[0], str(result_looe_tuned[1])+" / 14"])

In [None]:
import csv
from os.path import exists
resfile = 'spaceship_results.csv'

In [None]:
if exists(resfile):
  f = open(resfile, 'a')
  writer = csv.writer(f)
else:
  header = ['Model', 'Categories_encoding', 'Initial_accuracy', 'Tuned_Accuracy', 'Important_Features']
  f = open(resfile, 'w', newline='')
  writer = csv.writer(f)
  writer.writerow(header)

writer.writerows(data)
f.close()