In [1]:
%env CUDA_VISIBLE_DEVICES=7
# ended

env: CUDA_VISIBLE_DEVICES=7


In [2]:
#imports:
import pandas as pd
import numpy as np
import math
import os
import json 

import xgboost
from sklearn.metrics import r2_score
from skopt import BayesSearchCV
from skopt.space import Real, Space, Integer
import time
from tqdm import tqdm

In [3]:
# from sklearn.model_selection import TimeSeriesSplit
RES_SAVE_DIR = "./data/"

dataset_name = "physics"

In [4]:
pjme = pd.read_csv('./data/input/training.csv.zip')#, index_col=[0], parse_dates=[0])
def create_features(df, label=None):
    """
    Creates time series features from datetime index
    """
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.weekofyear
    
    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    if label:
        y = df[label]
        return X, y
    return X

In [5]:
# get some data
X, y = create_features(pjme, label='PJME_MW')
dtrain = xgboost.DMatrix(X, label=y)

  


In [None]:
for i in tqdm(range(1, 4)):
    bayes_cv_opt = BayesSearchCV(
        estimator = xgboost.XGBRegressor(
          base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, 
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, 
          tree_method = "gpu_hist", 
          gpu_id = 0, 
          verbosity = 0
        ),
        search_spaces = {
            'min_child_weight': Integer(1, 20),
            'max_depth': Integer(1, 20),
            'subsample': Real(0.5, 0.9, 'uniform'),
            'colsample_bytree': Real(0.5, 0.9, 'uniform'),
            'reg_lambda': Real(0.0, 1.0, 'uniform'),
            'reg_alpha': Real(0.0, 1.0, 'uniform'),        
        },    
        scoring = 'r2',
        cv=TimeSeriesSplit(n_splits=3), #cv = 3,
        n_iter = 729,
        iid=False, 
        optimizer_kwargs={
            "acq_func": "EI"
            }
    )

    start = time.time()
    bayes_cv_opt.fit(X, y)
    print("Evaluate in {} sec".format(time.time()-start))

    print("val. score: %s" % bayes_cv_opt.best_score_)
    res=pd.DataFrame(bayes_cv_opt.cv_results_)
    res['run_number']=i
    res['experiment_name']="bayes_opt"
    res.to_csv(RES_SAVE_DIR + f"bayes_opt_{i}_{dataset_name}.csv", index=False)
    print(f"{i} completed")


  0%|          | 0/3 [00:00<?, ?it/s]

In [8]:
print("1")

1
