In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split, KFold
import warnings
from numpy import savetxt
from numpy import loadtxt
warnings.filterwarnings("ignore")

In [None]:
from sklearn.metrics import r2_score
import optuna
from sklearn.ensemble import BaggingRegressor,AdaBoostRegressor,GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
import xgboost as xgb
from sklearn.svm import SVR
from catboost import CatBoostRegressor, Pool
from sklearn.ensemble import RandomForestRegressor

In [None]:
train = loadtxt('Data_Set/efficiencyTrain.csv', delimiter=',')
label = loadtxt('Data_Set/efficiencylabel.csv', delimiter=',')

In [None]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="4"

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train, label, test_size=0.15, random_state=42)

# Optuna

In [None]:
def bossting_objectiveDecisionTreeRegressor(trail):
    result=[]
    random_state=42
    losses=['squared_error', 'absolute_error', 'huber', 'quantile']
    min_samples_split=trail.suggest_float('min_samples_split',0.001,1.0)
    random_state=trail.suggest_int('random_state',4,50)
    min_samples_leaf=trail.suggest_float('min_samples_leaf',0.001,0.5)
    min_weight_fraction_leaf=trail.suggest_float('min_weight_fraction_leaf',0.0,0.5)
    min_impurity_decrease=trail.suggest_float('min_impurity_decrease',0.0,0.5)
    ccp_alpha=trail.suggest_float('ccp_alpha',0.0,10.5)


    max_depth=trail.suggest_int('max_depth',2,50)

    params={x:y for (x,y) in trail.params.items() if x!='number_of_splits'}
    xgb_reg = DecisionTreeRegressor(**params)
    xgb_reg.fit(X_train, y_train)
    
    return (round(r2_score(y_test,xgb_reg.predict(X_test)),4))

In [None]:
def bossting_objectiveAdaBoostRegressor(trail):
    result=[]
    n_estimators = trail.suggest_int('n_estimators',1,50)
    learning_rate=trail.suggest_float("learning_rate", 1e-3, 1e-1,log=True)
    random_state= trail.suggest_int('random_state',1,100)

    params={x:y for (x,y) in trail.params.items() if x!='number_of_splits'}
    reg = AdaBoostRegressor(**params)
    reg.fit(X_train, y_train)
    return (round(r2_score(y_test,reg.predict(X_test)),4))

In [None]:
def bossting_objectiveCatBoostRegressor(trail):
    result=[]
    
    iterations = trail.suggest_int('iterations',3000,40000)
    learning_rate=trail.suggest_float("learning_rate", 1e-3, 1e-1,log=True)
    depth=trail.suggest_int('depth',2,16)
    random_seed = trail.suggest_int('random_seed',50,500)
    metric_period = trail.suggest_int('metric_period',100,500)
    od_wait = trail.suggest_int('od_wait',10,150)
    

    params={x:y for (x,y) in trail.params.items() if x!='number_of_splits'}
    params['loss_function'] = 'RMSE'
    params['eval_metric'] = 'RMSE'
    params['od_type'] = 'Iter'
    params['use_best_model'] = True
    params['verbose'] = False
    model_regressor = CatBoostRegressor(**params)
    model_regressor.fit(X_train, y_train, 
          eval_set=(X_test, y_test),  
          use_best_model=True,  
          plot= False   
         );
    return (round(r2_score(y_test,model_regressor.predict(X_test)),4))

In [None]:
def bossting_objectiveKNeighborsRegressor(trail):
    result=[]
    n_neighbors = trail.suggest_int('n_neighbors',1,50)
    leaf_size= trail.suggest_int('leaf_size',1,100)
    p= trail.suggest_int('p',1,2)
    n_jobs = trail.suggest_int('n_jobs',2,8)

    params={x:y for (x,y) in trail.params.items() if x!='number_of_splits'}
    
    reg = KNeighborsRegressor(n_neighbors=3,leaf_size=leaf_size,p=p,n_jobs=n_jobs)
    reg.fit(X_train, y_train)
    return (round(r2_score(y_test,reg.predict(X_test)),4))


In [None]:
def bossting_objectiveRandomForestRegressor(trail):
    random_state=42
    losses=['squared_error', 'absolute_error', 'huber', 'quantile']
    criterions=['friedman_mse', 'squared_error']
    criterion=trail.suggest_categorical('criterion',criterions)
    n_estimators=trail.suggest_int('n_estimators',50,500)
    min_samples_leaf=trail.suggest_float('min_samples_leaf',0.001,0.5)
    min_samples_split=trail.suggest_float('min_samples_split',0.001,1.0)
    min_weight_fraction_leaf=trail.suggest_float('min_weight_fraction_leaf',0.001,0.5)
    max_depth=trail.suggest_int('max_depth',2,50)
    n_jobs = trail.suggest_int('n_jobs',2,4)
#     verbose= trail.suggest_int('verbose',1,100)
    ccp_alpha=trail.suggest_float('ccp_alpha',0.5,0.9)
#     min_impurity_decrease=trail.suggest_float('min_impurity_decrease',0.001,100.0)
    
    params={x:y for (x,y) in trail.params.items() if x!='number_of_splits'}
    reg=RandomForestRegressor(**params)
    reg.fit(X_train, y_train)
    return (round(r2_score(y_test,reg.predict(X_test)),4))
    

In [None]:
def bossting_objectiveSVR(trail):
    result=[]
    coef0 = trail.suggest_float("coef0", 0.0, 10.0)
    tol = trail.suggest_float("tol", 0.001, 1.0)
    epsilon = trail.suggest_float("epsilon", 0.1, 10.0)
    C = trail.suggest_float("C", 1.0, 10.0)
    degree = trail.suggest_int('degree',3,10)
    max_iter = trail.suggest_int('max_iter',-1,100)
    cache_size = trail.suggest_int('cache_size',10,400)

    params={x:y for (x,y) in trail.params.items() if x!='number_of_splits'}
    xgb_reg=SVR(**params)
    xgb_reg.fit(X_train, y_train)
    return (round(r2_score(y_test,xgb_reg.predict(X_test)),4))



In [None]:
def bossting_objectiveGradientBoostingRegressor(trail):
    result=[]
    learning_rate=trail.suggest_float("learning_rate", 1e-3, 1e-1,log=True)
    random_state=42
    losses=['squared_error', 'absolute_error', 'huber', 'quantile']
    # max_features=trail.suggest_float('max_features',0.001,1.0)
    alpha=trail.suggest_float('alpha',0.5,0.9)
    # ccp_alpha=trail.suggest_float('ccp_alpha',0.0,1)
    criterions=['friedman_mse', 'squared_error']
    loss=trail.suggest_categorical('loss',losses)
    criterion=trail.suggest_categorical('criterion',criterions)
    n_estimators=trail.suggest_int('n_estimators',50,500)
    # subsample=trail.suggest_float('subsample',0.001,1.0)
    min_samples_leaf=trail.suggest_float('min_samples_leaf',0.001,0.5)
    min_samples_split=trail.suggest_float('min_samples_split',0.001,1.0)
    min_weight_fraction_leaf=trail.suggest_float('min_weight_fraction_leaf',0.001,0.5)
    max_depth=trail.suggest_int('max_depth',2,50)
    min_impurity_decrease=trail.suggest_float('min_impurity_decrease',0.001,100.0)
    
    params={x:y for (x,y) in trail.params.items() if x!='number_of_splits'}
    
    reg=GradientBoostingRegressor(random_state=random_state,**params)
    reg.fit(X_train, y_train)
    return (round(r2_score(y_test,reg.predict(X_test)),4))
    

In [None]:
optuna_Array = [bossting_objectiveDecisionTreeRegressor,bossting_objectiveAdaBoostRegressor,
               bossting_objectiveCatBoostRegressor,bossting_objectiveKNeighborsRegressor,
               bossting_objectiveRandomForestRegressor,bossting_objectiveSVR,bossting_objectiveGradientBoostingRegressor]

In [None]:
study_array = []
for item in optuna_Array:
    study = optuna.create_study(direction="maximize")
    study.optimize(item, n_trials=5000)
    study_array.append(study)

In [None]:
for item in study_array:
    print('r2: ' + str(item.best_value))
    print('Best parameter: ' + str(item.best_params))

In [None]:
optuna_Array[0]

In [None]:
for item in study_array:
    fig = optuna.visualization.plot_param_importances(item)
    fig.show()

In [None]:
def saveResults():
    a = open('Results/Efficiency.txt', 'w')
    a.write(study_array)
    a.close()

def getResults():
    f = open("Results/Efficiency.txt", "r")
    print(f)