In [4]:
import os
import numpy as np
import pandas as pd

import lightgbm as lgb
from catboost import CatBoostRegressor
import xgboost as xgb

from sklearn.linear_model import Lasso

# Data Load

In [3]:
base = os.getcwd()
data_path = os.path.join(base, 'data')
submit_path = os.path.join(base, 'submit')
model_path = os.path.join(base, 'model')

def load_data(name):
    return np.load(os.path.join(data_path, f"{name}.npy"))

def save_data(data, name):
    np.save(os.path.join(data_path, f"{name}.npy"), data)

def reshape(data):
    return data.reshape(data.shape[0] * 40 * 40, data.shape[-1])

def get_test_data():
    return reshape(load_data('test'))

In [None]:
data = reshape(load_data('dl_train'))
print("data load")

In [None]:
X = data[:, :-1]
Y = data[:,  -1].reshape(data.shape[0], 1)
data = range(data.shape[0])
print(X.shape, Y.shape)

# lightgbm

In [None]:
lgb_clf = lgb.LGBMRegressor(boosting_type='gbdt', num_leaves=31, max_depth=- 1, learning_rate=0.01, \
                        n_estimators=800, subsample_for_bin=200000, objective=None, class_weight=None, \
                        min_split_gain=0.0, min_child_weight=0.001, min_child_samples=20, subsample=1.0, \
                        subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0, \
                        random_state=7, n_jobs=- 1, silent=True, importance_type='split')

In [None]:
clf.fit(X, Y, verbose=True)
print("lightbgm fit")

In [None]:
save_data(lgb_clf.predict(X), "lgb_train")
save_data(lgb_clf.predict(get_test_data()), "lgb_test")
print("lightbgm save data")

# catboost

In [None]:
cat_clf = CatBoostRegressor(iterations=200, learning_rate=0.1, \
                        depth=4, l2_leaf_reg=20, \
                        bootstrap_type='Bernoulli', subsample=0.6, \
                        eval_metric='RMSE', metric_period=10, \
                        od_type='Iter', od_wait=45, random_seed=17,\
                        allow_writing_files=False)

In [None]:
cat_clf.fit(X, Y, \
        cat_features=[], verbose=True)
print("catboost fit")

In [None]:
save_data(cat_clf.predict(X), "cat_train")
save_data(cat_clf.predict(get_test_data()), "cat_test")
print("catboost save")

# xgboost

In [None]:
xgb_clf = xgb.XGBRegressor(max_depth=24, learning_rate=0.1, n_estimators=400, verbosity=1, silent=None, \
                           objective='reg:linear', booster='gbtree', n_jobs=1, nthread=None, gamma=0, \
                           min_child_weight=1, max_delta_step=0, subsample=1, \
                           colsample_bytree=1, colsample_bylevel=1, colsample_bynode=1, reg_alpha=0, reg_lambda=1, \
                           scale_pos_weight=1, base_score=0.5, random_state=0, seed=7, missing=None, importance_type='gain')

In [None]:
xgb_clf.fit(X, Y, \
            sample_weight=None, xgb_model=None, \
            sample_weight_eval_set=None, \
            verbose=True, callbacks=None)
print("xgboost fit")

In [None]:
save_data(xgb_clf.predict(X), "xgb_train")
save_data(xgb_clf.predict(get_test_data()), "xgb_test")
print("xgboost save")

# Stack

In [None]:
del X

In [None]:
names = ["lgb", "cat", "xgb"]
types = ["_train", "_test"]

def data_join(i):
    return np.hstack((load_data(names[0] + types[i]), \
                      load_data(names[1] + types[i]), \
                      load_data(names[2] + types[i])))

In [None]:
X = data_join(0)
stack_clf = Lasso()
print("stack load ", X.shape)

In [None]:
stack_clf.fit(X, Y)
print("stack fit")

In [None]:
del X

# Submit

In [None]:
def submit(clf, name):
    pred = clf.predict(data_join(1))

    submission = pd.read_csv(os.path.join(data_path, 'sample_submission.csv'))
    submission.iloc[:, 1:] = pred.reshape(-1, 1600)

    submission.to_csv(os.path.join(submit_path, f'{name}.csv'), index=False)

In [None]:
print("Submit")
submit(stack_clf, "stack_lgb_cat_xgb")