In [1]:
import os
import numpy as np
import pandas as pd
from testMail import Mail

import lightgbm as lgb
from catboost import CatBoostRegressor
import xgboost as xgb

from sklearn.linear_model import Lasso

In [None]:
mail = Mail("rhkd865@gmail.com", "drbhcuuccubulmkd")

# Data Load

In [2]:
base = os.getcwd()
data_path = os.path.join(base, 'data')
save_data_path = os.path.join(data_path, 'sub')
submit_path = os.path.join(base, 'submit')
model_path = os.path.join(base, 'model')

if not os.path.isdir(save_data_path):
    os.mkdir(save_data_path)

def load_data(name):
    return np.load(os.path.join(data_path, f"{name}.npy"))

def load_y_data(name):
    return np.load(os.path.join(save_data_path, f"{name}.npy"))

def save_data(data, name):
    np.save(os.path.join(save_data_path, f"{name}.npy"), data)

def reshape(data):
    return data.reshape(data.shape[0] * 40 * 40, data.shape[-1])

def get_test_data():
    return reshape(load_data('test'))

In [3]:
data = reshape(load_data('dl_train'))
print("data load")

data load


In [None]:
X = data[:, :-1]
Y = data[:,  -1].reshape(X.shape[0], 1)
data = range(X.shape[0])
print(X.shape, Y.shape)

# lightgbm

In [8]:
lgb_clf = lgb.LGBMRegressor(boosting_type='gbdt', num_leaves=31, max_depth=- 1, learning_rate=0.01, \
                        n_estimators=800, subsample_for_bin=200000, objective=None, class_weight=None, \
                        min_split_gain=0.0, min_child_weight=0.001, min_child_samples=20, subsample=1.0, \
                        subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0, \
                        random_state=7, n_jobs=- 1, silent=True, importance_type='split')

In [10]:
lgb_clf.fit(X, Y, verbose=True)
print("lightbgm fit")

  y = column_or_1d(y, warn=True)


lightbgm fit


In [11]:
save_data(lgb_clf.predict(X), "lgb_train")
save_data(lgb_clf.predict(get_test_data()), "lgb_test")
print("lightbgm save data")

lightbgm save data


In [None]:
mail.send("rhkd865@gmail.com", "AIFrenz2 LightGBM", "Finished LightGBM")
mail.send("rhkd865@naver.com", "AIFrenz2 LightGBM", "Finished LightGBM")

# catboost

In [12]:
cat_clf = CatBoostRegressor(iterations=500, learning_rate=0.01, \
                        depth=4, l2_leaf_reg=20, \
                        bootstrap_type='Bernoulli', subsample=0.6, \
                        eval_metric='RMSE', metric_period=10, \
                        od_type='Iter', od_wait=45, random_seed=17,\
                        allow_writing_files=False)

In [13]:
cat_clf.fit(X, Y, \
        cat_features=[], verbose=True)
print("catboost fit")

0:	learn: 1.0073958	total: 375ms	remaining: 3.37s
9:	learn: 0.8974059	total: 3.23s	remaining: 0us
catboost fit


In [14]:
save_data(cat_clf.predict(X), "cat_train")
save_data(cat_clf.predict(get_test_data()), "cat_test")
print("catboost save")

catboost save


In [None]:
mail.send("rhkd865@gmail.com", "AIFrenz2 Catboost", "Finished Catboost")
mail.send("rhkd865@naver.com", "AIFrenz2 Catboost", "Finished Catboost")

# xgboost

In [15]:
xgb_clf = xgb.XGBRegressor(max_depth=8, n_estimators=500, learning_rate=0.01,verbosity=1, silent=None, \
                           objective='reg:linear', booster='gbtree', n_jobs=1, nthread=None, gamma=0, \
                           min_child_weight=1, max_delta_step=0, subsample=1, \
                           colsample_bytree=1, colsample_bylevel=1, colsample_bynode=1, reg_alpha=0, reg_lambda=1, \
                           scale_pos_weight=1, base_score=0.5, random_state=0, seed=7, missing=None, importance_type='gain')

In [16]:
xgb_clf.fit(X, Y, \
            sample_weight=None, xgb_model=None, \
            sample_weight_eval_set=None, \
            verbose=True, callbacks=None)
print("xgboost fit")





KeyboardInterrupt: 

In [None]:
save_data(xgb_clf.predict(X), "xgb_train")
save_data(xgb_clf.predict(get_test_data()), "xgb_test")
print("xgboost save")

In [None]:
mail.send("rhkd865@gmail.com", "AIFrenz2 XGBoost", "Finished XGBoost")
mail.send("rhkd865@naver.com", "AIFrenz2 XGBoost", "Finished XGBoost")

# Stack

In [None]:
names = ["lgb", "cat", "xgb"]
types = ["_train", "_test"]

def data_join(i):
    return np.hstack((load_y_data(names[0] + types[i]), \
                      load_y_data(names[1] + types[i]), \
                      load_y_data(names[2] + types[i]))).reshape(-1, len(names))

In [25]:
stack_clf = Lasso()
print("stack load ")

stack load  (5000000, 2)


In [27]:
stack_clf.fit(data_join(0), Y)
print("stack fit")

stack fit


In [None]:
mail.send("rhkd865@gmail.com", "AIFrenz2 Stacking", "Finished Stacking")
mail.send("rhkd865@naver.com", "AIFrenz2 Stacking", "Finished Stacking")

# Submit

In [28]:
def submit(clf, name):
    pred = clf.predict(data_join(1))

    submission = pd.read_csv(os.path.join(data_path, 'sample_submission.csv'))
    submission.iloc[:, 1:] = pred.reshape(-1, 1600)

    submission.to_csv(os.path.join(submit_path, f'{name}.csv'), index=False)

In [29]:
print("Submit")
submit(stack_clf, "stack_lgb_cat_xgb")

Submit


In [None]:
mail.send("rhkd865@gmail.com", "AIFrenz2", "Finished Submit")
mail.send("rhkd865@naver.com", "AIFrenz2", "Finished Submit")

mail.close()