In [1]:
import pandas as pd
import numpy as np
import random
import os

import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
import lightgbm as lgb
import xgboost as xgb
import catboost as cat

import sys
sys.path.append("..")
from utils import *
from preprocessing_utils import *
import warnings
warnings.filterwarnings("ignore")

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42)

In [4]:
train_x, train_y, valid_x, valid_y = load_train_valid()
lec = LabelEncoder()
train_y = lec.fit_transform(train_y)
valid_y = lec.transform(valid_y)
train_x, valid_x= preprocessing(train_x, valid_x)

# lightGBM
lgbm = lgb.LGBMClassifier()
lgbm.fit(train_x, train_y)
prediction_lgbm = lgbm.predict_proba(valid_x)

# XGBoost 
xgbm = xgb.XGBClassifier()
xgbm.fit(train_x, train_y)
prediction_xgbm = xgbm.predict_proba(valid_x)

# Catboost 
catboost = cat.CatBoostClassifier()
catboost.fit(train_x, train_y)
prediction_catboost = catboost.predict_proba(valid_x)

# loss 
multi_loloss_lgbm = log_loss(valid_y, prediction_lgbm)
multi_loloss_xgbm = log_loss(valid_y, prediction_xgbm)
multi_loloss_cat = log_loss(valid_y, prediction_catboost)

print(multi_loloss_lgbm)
print(multi_loloss_xgbm)
print(multi_loloss_cat)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000445 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 691
[LightGBM] [Info] Number of data points in the train set: 8920, number of used features: 21
[LightGBM] [Info] Start training from score -1.229712
[LightGBM] [Info] Start training from score -1.396209
[LightGBM] [Info] Start training from score -0.776334
Learning rate set to 0.088469
0:	learn: 1.0547341	total: 70.5ms	remaining: 1m 10s
1:	learn: 1.0145674	total: 92.8ms	remaining: 46.3s
2:	learn: 0.9818046	total: 124ms	remaining: 41.2s
3:	learn: 0.9557680	total: 142ms	remaining: 35.4s
4:	learn: 0.9332163	total: 160ms	remaining: 31.8s
5:	learn: 0.9142838	total: 185ms	remaining: 30.7s
6:	learn: 0.8975192	total: 187ms	remaining: 26.6s
7:	learn: 0.8821395	total: 197ms	remaining: 24.4s
8:	learn: 0.8700030	total: 199ms	remaining: 21.9s
9:	learn: 0.8591677	total: 201ms	remaining: 19.8s
10:	learn: 0.84714

## Test

In [7]:
train_x, train_y, test = load_train_test()

lec = LabelEncoder()
train_y = lec.fit_transform(train_y)
train_x, test= preprocessing(train_x, test)

#
# lightGBM
lgbm = lgb.LGBMClassifier()
lgbm.fit(train_x, train_y)
prediction_lgbm = lgbm.predict_proba(test)

# XGBoost 
xgbm = xgb.XGBClassifier()
xgbm.fit(train_x, train_y)
prediction_xgbm = xgbm.predict_proba(test)

# Catboost 
catboost = cat.CatBoostClassifier()
catboost.fit(train_x, train_y)
prediction_catboost = catboost.predict_proba(test)


sample_submission = pd.read_csv('/home/workspace/DACON/soccer/Data/sample_submission.csv')
sample_submission.iloc[:,1:] = prediction_xgbm
sample_submission.to_csv('sample_submission_xgbm_no_tunning.csv', index= False)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000525 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 692
[LightGBM] [Info] Number of data points in the train set: 9008, number of used features: 21
[LightGBM] [Info] Start training from score -1.226955
[LightGBM] [Info] Start training from score -1.398356
[LightGBM] [Info] Start training from score -0.776934
Learning rate set to 0.088513
0:	learn: 1.0546616	total: 13.6ms	remaining: 13.6s
1:	learn: 1.0174752	total: 19ms	remaining: 9.46s
2:	learn: 0.9844981	total: 21.3ms	remaining: 7.09s
3:	learn: 0.9580077	total: 38.8ms	remaining: 9.66s
4:	learn: 0.9339225	total: 41.9ms	remaining: 8.34s
5:	learn: 0.9147213	total: 46.4ms	remaining: 7.69s
6:	learn: 0.8977827	total: 49.7ms	remaining: 7.04s
7:	learn: 0.8821462	total: 59.6ms	remaining: 7.38s
8:	learn: 0.8700211	total: 63.1ms	remaining: 6.95s
