In [2]:
# SKLEARN
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
from sklearn.metrics import r2_score

# LightGBM
from lightgbm import LGBMRegressor
import lightgbm as lgb

import numpy as np
import pandas as pd

# Bayessian Optimization
from bayes_opt import BayesianOptimization
from hyperopt import hp
from hyperopt import fmin, tpe, space_eval
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [3]:
def get_lgbm(params = {}):
    return LGBMRegressor(
           boosting_type= 'gbdt',
           objective = 'rmsle',
           metric = "rmsle",
           random_state= 1,
           silent = True,
           **params)

def create_submission(model, submission_name, send_submission=False, message="LGBM"):
    result = model.predict(X_test_sub.drop('revenue', axis=1))
    submission = pd.DataFrame({
        "ID": np.arange(result.shape[0]), 
        "item_cnt_month": result
    })
    submission.to_csv(submission_name, index=False)
   
    if send_submission:
        !kaggle competitions submit -c ashrae-energy-prediction -f {submission_name} -m {message}

In [4]:
train, test = pd.read_pickle('data/train.pkl'), pd.read_pickle('data/val.pkl')
X_train, y_train, X_test, y_test = train.drop('meter', axis=1), train['meter'], test.drop('meter', axis=1), train['meter']

In [8]:
cat_feats = ['building_id', 'site_id']

In [None]:
#%%notify -m "Grid search LGBM was ended"

gridParams = {
    'learning_rate': np.linspace(0.01, 2, 10),
    'max_depth': np.arange(0, 14),
    'n_estimators': np.arange(100, 1000, 100),
    'num_leaves': np.arange(8, 65, 8),
    'colsample_bytree' : [0.65, 0.66],
    'reg_alpha' : np.arange(0, 100),
    'reg_lambda' : np.arange(0, 100)
}

lgbm = get_lgbm()

# Create the grid
grid = RandomizedSearchCV(lgbm, gridParams, verbose=0, cv=TimeSeriesSplit(n_splits=5))
grid.fit(X_train, y_train, 
    eval_metric="rmsle", 
    eval_set=[(X_train, y_train), (X_test, y_test)], 
#    evals_result=evals_result,
    verbose=10, 
    early_stopping_rounds=20,
    categorical_feature = cat_feats)


In [None]:
grid.best_paramt