# Municipality Analysis

In [0]:
#Cargando librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
import warnings
warnings.filterwarnings('ignore')

In [2]:
drive.mount('/content/drive')
df = pd.read_csv('/content/drive/My Drive/files/nutritive.csv')
df = df.drop(columns=['Unnamed: 0'])

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import re
from unicodedata import normalize
def cleanText(inputString):
    return (re.sub(
        r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", 
        normalize( "NFD", inputString), 0, re.I
    )).lower()

def clean_l3(row):
    row.l3 = cleanText(row.l3)
    return row

def clean_alpha(inputString):
    return re.sub('[^A-Za-z0-9]+', '', inputString)

## Sell and Rent Counting

In [5]:
df = df.where(df['operation_type'] == 'Venta').dropna(subset=['operation_type'])
df = df.apply(lambda x: clean_l3(x), axis=1)
df.shape

(613054, 25)

In [0]:
df_train = df.copy()
    
df_train = df_train[df_train['price']<df_train['price'].quantile(0.99)]
df_train = df_train[df_train['price'] > df_train['price'].quantile(0.1)]
df_train = df_train[df_train['surface_total'] > 0]
df_train = df_train[~np.isnan(df_train['price']) & df_train['price'] > 0]
df_train = df_train[df_train['surface_total'] > 0]

df_train['price'] = np.log(df_train['price'])

In [7]:
df_train.head(1)

Unnamed: 0,id,ad_type,start_date,end_date,created_on,lat,lon,l1,l2,l3,l4,l5,l6,rooms,bedrooms,bathrooms,surface_total,surface_covered,price,currency,price_period,title,description,property_type,operation_type
0,HSOmvzoNx39wqCR/yL57kw==,Propiedad,2018-09-14,2019-09-23,2018-09-14,3.439,-76.541,Colombia,Valle del Cauca,cali,,,,12.0,12.0,6.0,202.0,360.0,20.030119,COP,,Casa En Venta En Cali Miraflores,"Para inversin, actualmente rentando, casa de 4...",Casa,Venta


In [0]:
df_final = df_train.copy()

In [0]:
n = 10 #First n most frequent municipalities
series = df_train['l3'].value_counts()[:n]
municipalities = list(series.index)
freq = list(series)

In [0]:
df_balanced = pd.DataFrame(columns=list(df_train.columns))

In [0]:
#To make undersampling
n_under = min(freq) #Undersampling min number
df_final = df_train[df_train['l3'].isin(municipalities)]

for x in municipalities:
    temp_df = df_final[df_final['l3'] == x].sample(n = n_under, random_state=4)
    df_balanced = pd.concat([df_balanced, temp_df], ignore_index=True)
    

In [0]:
df_balanced.drop(columns=['id',"ad_type",'start_date', 'start_date','created_on','end_date',
                 'l1','l2', 'l4','l6', 'l5', 
                 'rooms', 'price_period', 'surface_covered',
                 'operation_type','currency', 'title', 'description'], inplace=True)

In [0]:
df_train = df_balanced.copy()

## Machine Learning - Initial Model

In [0]:
X_df = df_train.drop('price', axis=1).copy()
Y_df = df_train['price'].copy()

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
X_train_or, X_test_or, Y_train_or, Y_test_or = train_test_split(
    X_df, Y_df, test_size=0.2, random_state=5000)

In [0]:
# Para eliminar SettingWithCopyWarning

X_train = X_train_or.copy()
X_test = X_test_or.copy()
Y_train = Y_train_or.copy()
Y_test = Y_test_or.copy()

In [0]:
from sklearn.preprocessing import (PowerTransformer, StandardScaler, 
                                   MinMaxScaler, LabelEncoder, OneHotEncoder)

### Power Transformer Labels (Box-Cox)

Normalize and reduce scale

In [21]:
box_cox = PowerTransformer(method='box-cox') # Box-Cox object instance
box_cox.fit(X_train["surface_total"].values.reshape(-1,1)) # Box-Cox, array 2 dimensions
X_train.loc[:,"surface_total"] = box_cox.transform(X_train["surface_total"].values.reshape(-1,1)) # Transformamos el array
X_train.head()

Unnamed: 0,lat,lon,l3,bedrooms,bathrooms,surface_total,property_type
64916,7.1,-73.116997,bucaramanga,5.0,2.0,0.190543,Apartamento
20641,11.023675,-74.860947,barranquilla,,3.0,-0.092228,Casa
29793,5.072902,-75.523653,manizales,4.0,2.0,0.861274,Casa
45451,7.893,-72.488,cucuta,2.0,2.0,-0.714284,Apartamento
62604,,,bucaramanga,4.0,4.0,0.768529,Apartamento


### Standard Scaler
El Standard Scaler sirve para centrar una distribución en 0 y que tenga desviación estándar unitaria.

 $$\mathbf{Z} = \frac{\mathbf{X} - \mathbf{\mu}}{\mathbf{\sigma}}$$


In [22]:
# Categorical Class
print("Classes: ", X_train['property_type'].unique())
X_train.head()

Classes:  ['Apartamento' 'Casa' 'Lote' 'Finca' 'Otro' 'Parqueadero' 'Oficina'
 'Local comercial' 'Depósito']


Unnamed: 0,lat,lon,l3,bedrooms,bathrooms,surface_total,property_type
64916,7.1,-73.116997,bucaramanga,5.0,2.0,0.190543,Apartamento
20641,11.023675,-74.860947,barranquilla,,3.0,-0.092228,Casa
29793,5.072902,-75.523653,manizales,4.0,2.0,0.861274,Casa
45451,7.893,-72.488,cucuta,2.0,2.0,-0.714284,Apartamento
62604,,,bucaramanga,4.0,4.0,0.768529,Apartamento


In [0]:
le_l3 = LabelEncoder()
le_l3.fit(X_train['l3'])
X_train['l3'] = le_l3.transform(X_train['l3'])

In [0]:
ohe_l3 = OneHotEncoder()
ohe_l3.fit(X_train["l3"].values[:,None])
out = ohe_l3.transform(X_train["l3"].values[:,None]) 
out_array = out.toarray()
ohe_cols_op = []
for category in ohe_l3.categories_[0]:
    current_class = le_l3.classes_[int(category)]
    col_add = "op_" + current_class
    ohe_cols_op.append(col_add)
    X_train[col_add] = out_array[:,int(category)]

X_train = X_train.drop("l3", axis =1)

In [0]:
le_proper_type = LabelEncoder()
le_proper_type.fit(X_train['property_type'])
X_train['property_type'] = le_proper_type.transform(X_train['property_type'])

In [0]:
ohe_proper_type = OneHotEncoder()
ohe_proper_type.fit(X_train["property_type"].values[:,None])
out = ohe_proper_type.transform(X_train["property_type"].values[:,None]) 
out_array = out.toarray()
ohe_cols_op2 = []
for category in ohe_proper_type.categories_[0]:
    current_class = le_proper_type.classes_[int(category)]
    col_add = "op_" + current_class
    ohe_cols_op2.append(col_add)
    X_train[col_add] = out_array[:,int(category)]

X_train = X_train.drop("property_type", axis =1)

### Tratamiento de datos faltantes o NaN

In [0]:
from sklearn.impute import SimpleImputer

In [0]:
# Using Simple Imputer
si_bedrooms = SimpleImputer(strategy='mean')
si_bedrooms.fit(X_train["bedrooms"].values[:,None])
X_train["bedrooms"] = si_bedrooms.transform(X_train["bedrooms"].values[:,None])

si_lat = SimpleImputer(strategy='mean')
si_lat.fit(X_train["lat"].values[:,None])
X_train["lat"] = si_lat.transform(X_train["lat"].values[:,None])

si_lon = SimpleImputer(strategy='mean')
si_lon.fit(X_train["lon"].values[:,None])
X_train["lon"] = si_lon.transform(X_train["lon"].values[:,None])

si_bath = SimpleImputer(strategy='mean')
si_bath.fit(X_train["bathrooms"].values[:,None])
X_train["bathrooms"] = si_bath.transform(X_train["bathrooms"].values[:,None])

In [29]:
#Otras transformaciones
cols2scale = ['bedrooms', 'bathrooms']
se_cols2scale = StandardScaler()
se_cols2scale.fit(X_train[cols2scale])
X_train[cols2scale] = se_cols2scale.transform(X_train[cols2scale])
X_train.head()

Unnamed: 0,lat,lon,bedrooms,bathrooms,surface_total,op_barranquilla,op_bogota d.c,op_bucaramanga,op_cali,op_cartagena,op_cucuta,op_envigado,op_manizales,op_medellin,op_pereira,op_Apartamento,op_Casa,op_Depósito,op_Finca,op_Local comercial,op_Lote,op_Oficina,op_Otro,op_Parqueadero
64916,7.1,-73.116997,1.013676,-0.60538,0.190543,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20641,11.023675,-74.860947,5.776931e-16,0.134606,-0.092228,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29793,5.072902,-75.523653,0.3632511,-0.60538,0.861274,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
45451,7.893,-72.488,-0.9375981,-0.60538,-0.714284,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62604,6.747057,-74.861314,0.3632511,0.874593,0.768529,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
X_test['surface_total'] = box_cox.transform(X_test['surface_total'].values[:,None])


In [0]:
X_test['property_type'] = le_proper_type.transform(X_test['property_type'])

In [0]:
X_test['l3'] = le_l3.transform(X_test['l3'])

In [0]:
temp_test = ohe_l3.transform(X_test["l3"].values[:,None]).toarray()
for idx, col in enumerate(ohe_cols_op):
    X_test[col] = temp_test[:,idx]

temp_test = ohe_proper_type.transform(X_test["property_type"].values[:,None]).toarray()

for idx, col in enumerate(ohe_cols_op2):
    X_test[col] = temp_test[:,idx]
    
    


X_test = X_test.drop("property_type", axis=1)
X_test = X_test.drop("l3", axis=1)

In [0]:
X_test['bedrooms'] = si_bedrooms.transform(X_test["bedrooms"].values[:,None])
X_test['lat'] = si_lat.transform(X_test['lat'].values[:, None])
X_test['lon'] = si_lon.transform(X_test['lon'].values[:, None])
X_test['bathrooms'] = si_bath.transform(X_test['bathrooms'].values[:,None])

In [0]:
X_test[cols2scale] = se_cols2scale.transform(X_test[cols2scale])

In [0]:
assert (X_test.columns.values == X_train.columns.values).all()

In [37]:
X_train

Unnamed: 0,lat,lon,bedrooms,bathrooms,surface_total,op_barranquilla,op_bogota d.c,op_bucaramanga,op_cali,op_cartagena,op_cucuta,op_envigado,op_manizales,op_medellin,op_pereira,op_Apartamento,op_Casa,op_Depósito,op_Finca,op_Local comercial,op_Lote,op_Oficina,op_Otro,op_Parqueadero
64916,7.100000,-73.116997,1.013676e+00,-0.605380,0.190543,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20641,11.023675,-74.860947,5.776931e-16,0.134606,-0.092228,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29793,5.072902,-75.523653,3.632511e-01,-0.605380,0.861274,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
45451,7.893000,-72.488000,-9.375981e-01,-0.605380,-0.714284,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62604,6.747057,-74.861314,3.632511e-01,0.874593,0.768529,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64898,7.092000,-73.111000,-2.871735e-01,-0.605380,-0.284716,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64641,7.120000,-73.112000,-2.871735e-01,0.134606,-0.339826,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55905,4.812486,-75.669839,5.776931e-16,-0.605380,-0.992690,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34242,5.065670,-75.519837,5.776931e-16,-1.345367,-0.476691,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [0]:
#Rename columns
new_col = []

for col in list(X_train.columns):
    new_col.append(clean_alpha(col))
    
    
X_train.columns = new_col
X_test.columns = new_col

## Measure of Error of the Models


In [0]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# measure error 
def error(Y_test, Y_estimate):
    rmse = mean_squared_error(Y_test, Y_estimate)
    mae = mean_absolute_error(Y_test, Y_estimate)
    r2 = r2_score(Y_test, Y_estimate)
    print("Root Mean Square Error (RMSE)= {:.3f}".format(rmse),
    "\nMean Absolute Error (MAE)= {:.3f}".format(mae),
    "\nR^2 = {:.3f}".format(r2))


In [0]:
from sklearn.model_selection import cross_val_score
# function to get cross validation scores
def get_cv_scores(model):
    scores = cross_val_score(model,
                             X_train,
                             Y_train,
                             cv=5,
                             scoring='r2')
    
    print('CV Mean: ', np.mean(scores))
    print('STD: ', np.std(scores))
    print('\n')

# Bagging

In [40]:
import sklearn
from sklearn.ensemble import BaggingRegressor
model1 = sklearn.ensemble.BaggingRegressor(n_estimators = 20,
                                           max_samples = 20, max_features = 24).fit(X_train, Y_train)

prediction1 = model1.predict(X_test)
error(Y_test, prediction1)

Root Mean Square Error (RMSE)= 0.275 
Mean Absolute Error (MAE)= 0.389 
R^2 = 0.557


## Bagging with ExtraTree

In [41]:
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import ExtraTreeRegressor
extra_tree = ExtraTreeRegressor(splitter = "best" ,random_state=1000)
model2 = BaggingRegressor(extra_tree, random_state=1000, n_estimators=60).fit(X_train, Y_train)
prediction2 = model2.predict(X_test)
error(Y_test, prediction2)

Root Mean Square Error (RMSE)= 0.126 
Mean Absolute Error (MAE)= 0.232 
R^2 = 0.798


In [0]:
# p = 2**Y_test
# p2 = 2**Y_test_estimate
# mae = mean_absolute_error(p, p2)
# print(mae)

## Lasso Regression

In [43]:
import sklearn
from sklearn.linear_model import  Lasso

model3 = sklearn.linear_model.Lasso(alpha = 0.1).fit(X_train, Y_train)
prediction3 = model3.predict(X_test)
error(Y_test, prediction3)

Root Mean Square Error (RMSE)= 0.338 
Mean Absolute Error (MAE)= 0.438 
R^2 = 0.456


## LightGBM

In [44]:
import lightgbm as lgb

lgb_train = lgb.Dataset(X_train, Y_train)
lgb_eval = lgb.Dataset(X_test, Y_test, reference=lgb_train)

params = {
    'boosting_type': 'gbdt','objective': 'regression', 'metric': {'l2', 'l1'},'num_leaves': 31,
    'learning_rate': 0.05, 'feature_fraction': 0.7,'bagging_fraction': 0.8, 'bagging_freq': 5,  'verbose': 0
}

model4 = lgb.train(params,
                lgb_train,
                num_boost_round=6000,
                valid_sets=lgb_eval,
                early_stopping_rounds=10000)

prediction4 = model4.predict(X_test, num_iteration=model4.best_iteration)
error(Y_test, prediction4)

[1;30;43mSe truncaron las últimas líneas 5000 del resultado de transmisión.[0m
[1007]	valid_0's l1: 0.256307	valid_0's l2: 0.132151
[1008]	valid_0's l1: 0.256304	valid_0's l2: 0.132148
[1009]	valid_0's l1: 0.256302	valid_0's l2: 0.132151
[1010]	valid_0's l1: 0.256287	valid_0's l2: 0.132154
[1011]	valid_0's l1: 0.256271	valid_0's l2: 0.132136
[1012]	valid_0's l1: 0.25628	valid_0's l2: 0.132159
[1013]	valid_0's l1: 0.256279	valid_0's l2: 0.132167
[1014]	valid_0's l1: 0.256268	valid_0's l2: 0.132169
[1015]	valid_0's l1: 0.256255	valid_0's l2: 0.132155
[1016]	valid_0's l1: 0.256251	valid_0's l2: 0.132154
[1017]	valid_0's l1: 0.256254	valid_0's l2: 0.13216
[1018]	valid_0's l1: 0.256267	valid_0's l2: 0.132173
[1019]	valid_0's l1: 0.256252	valid_0's l2: 0.132158
[1020]	valid_0's l1: 0.256235	valid_0's l2: 0.132143
[1021]	valid_0's l1: 0.256227	valid_0's l2: 0.132131
[1022]	valid_0's l1: 0.256228	valid_0's l2: 0.132131
[1023]	valid_0's l1: 0.256236	valid_0's l2: 0.13214
[1024]	valid_0's l1: 

### Lasso CV

In [45]:
from sklearn.linear_model import  LassoCV
model5 = LassoCV().fit(X_train, Y_train)
prediction5 = model5.predict(X_test)
error(Y_test, prediction5)

Root Mean Square Error (RMSE)= 0.257 
Mean Absolute Error (MAE)= 0.354 
R^2 = 0.586


## Ridge

In [46]:
from sklearn.linear_model import Ridge
model6 = Ridge(alpha=1).fit(X_train, Y_train)
prediction6 = model6.predict(X_test)
error(Y_test, prediction6)

Root Mean Square Error (RMSE)= 0.257 
Mean Absolute Error (MAE)= 0.354 
R^2 = 0.587


## Linear

In [47]:
from sklearn.linear_model import LinearRegression
model7 = LinearRegression().fit(X_train, Y_train)
prediction7 = model7.predict(X_test)
error(Y_test, prediction7)

Root Mean Square Error (RMSE)= 0.257 
Mean Absolute Error (MAE)= 0.354 
R^2 = 0.587


# Elastic-Net

In [48]:
from sklearn.linear_model import ElasticNet
model8 = ElasticNet(alpha=1, l1_ratio=0.5).fit(X_train, Y_train)
prediction8 = model8.predict(X_test)
error(Y_test, prediction8)

Root Mean Square Error (RMSE)= 0.616 
Mean Absolute Error (MAE)= 0.625 
R^2 = 0.009


## XGboost

In [49]:
import xgboost as xgb
model9 = xgb.XGBRegressor(objective ='reg:squarederror', colsample_bytree = 0.9,
                          learning_rate = 0.9, max_depth = 10, alpha = 30, n_estimators = 30).fit(X_train,Y_train)
prediction9 = model9.predict(X_test)
error(Y_test, prediction9)

Root Mean Square Error (RMSE)= 0.155 
Mean Absolute Error (MAE)= 0.266 
R^2 = 0.750


# Tuning of Hyperparameters

## Tuning of Ligthlgb

In [51]:
!pip install optuna

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/6c/32/b8de89bd281c9799365a374daa3f66b94f61363be24a8647a38aa5498cfb/optuna-1.2.0.tar.gz (146kB)
[K     |████████████████████████████████| 153kB 8.6MB/s 
[?25hCollecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/60/1e/cabc75a189de0fbb2841d0975243e59bde8b7822bacbb95008ac6fe9ad47/alembic-1.4.2.tar.gz (1.1MB)
[K     |████████████████████████████████| 1.1MB 17.3MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/90/e4/624f02aa2fcbf6efcd9d6bf90f92836a2ae46bc4376a824e317d10506fc8/cliff-3.0.0-py3-none-any.whl (79kB)
[K     |████████████████████████████████| 81kB 11.2MB/s 
[?25hCollecting colorlog
  Downloading https://files.pythonhosted.org/packages/00/0d/22c73c2eccb21dd3498df7d22c0b1d4a30f5a

In [52]:
import numpy as np
import sklearn.datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import optuna.integration.lightgbm as lgb


dtrain = lgb.Dataset(X_train, label=Y_train)
dval = lgb.Dataset(X_test, label=Y_test)

params = {
    "objective": "regression",
    "metric": "binary_logloss",
    "verbosity": -1,
    "boosting_type": "gbdt",
    'metric':  'l2'
}

best_params, tuning_history = dict(), list()

model = lgb.train(
    params,
    dtrain,
    valid_sets=[dtrain, dval],
    best_params=best_params,
    tuning_history=tuning_history,
    verbose_eval=600,
    early_stopping_rounds=100,
)

prediction = model.predict(X_test, num_iteration=model.best_iteration)



tune_feature_fraction, val_score: inf:   0%|          | 0/7 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.110777	valid_1's l2: 0.136411


tune_feature_fraction, val_score: 0.132871:  14%|#4        | 1/7 [00:03<00:21,  3.58s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.100582	valid_1's l2: 0.132871


[32m[I 2020-03-22 04:42:11,099][0m Finished trial#0 resulted in value: 0.13287119113087528. Current best value is 0.13287119113087528 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 0.132871:  14%|#4        | 1/7 [00:03<00:21,  3.58s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.105714	valid_1's l2: 0.134372


tune_feature_fraction, val_score: 0.130379:  29%|##8       | 2/7 [00:06<00:17,  3.50s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0931379	valid_1's l2: 0.130379


[32m[I 2020-03-22 04:42:14,430][0m Finished trial#1 resulted in value: 0.1303787493447953. Current best value is 0.1303787493447953 with parameters: {'feature_fraction': 0.5}.[0m
tune_feature_fraction, val_score: 0.130379:  29%|##8       | 2/7 [00:07<00:17,  3.50s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.102672	valid_1's l2: 0.132604


tune_feature_fraction, val_score: 0.128673:  43%|####2     | 3/7 [00:10<00:13,  3.43s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0904752	valid_1's l2: 0.128673


[32m[I 2020-03-22 04:42:17,695][0m Finished trial#2 resulted in value: 0.1286729350189791. Current best value is 0.1286729350189791 with parameters: {'feature_fraction': 0.6}.[0m
tune_feature_fraction, val_score: 0.128673:  43%|####2     | 3/7 [00:10<00:13,  3.43s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0996403	valid_1's l2: 0.132043


tune_feature_fraction, val_score: 0.128100:  57%|#####7    | 4/7 [00:13<00:10,  3.41s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0872014	valid_1's l2: 0.1281


[32m[I 2020-03-22 04:42:21,042][0m Finished trial#3 resulted in value: 0.12810033904861962. Current best value is 0.12810033904861962 with parameters: {'feature_fraction': 0.7}.[0m
tune_feature_fraction, val_score: 0.128100:  57%|#####7    | 4/7 [00:13<00:10,  3.41s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0982082	valid_1's l2: 0.131464


tune_feature_fraction, val_score: 0.128100:  71%|#######1  | 5/7 [00:16<00:06,  3.39s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0856219	valid_1's l2: 0.128345


[32m[I 2020-03-22 04:42:24,390][0m Finished trial#4 resulted in value: 0.12834466987917706. Current best value is 0.12810033904861962 with parameters: {'feature_fraction': 0.7}.[0m
tune_feature_fraction, val_score: 0.128100:  71%|#######1  | 5/7 [00:16<00:06,  3.39s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0974047	valid_1's l2: 0.130845


tune_feature_fraction, val_score: 0.127776:  86%|########5 | 6/7 [00:20<00:03,  3.40s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0845689	valid_1's l2: 0.127776


[32m[I 2020-03-22 04:42:27,817][0m Finished trial#5 resulted in value: 0.12777563247171544. Current best value is 0.12777563247171544 with parameters: {'feature_fraction': 0.8999999999999999}.[0m
tune_feature_fraction, val_score: 0.127776:  86%|########5 | 6/7 [00:20<00:03,  3.40s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0957207	valid_1's l2: 0.131409


tune_feature_fraction, val_score: 0.127511: 100%|##########| 7/7 [00:23<00:00,  3.47s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0829098	valid_1's l2: 0.127511


[32m[I 2020-03-22 04:42:31,461][0m Finished trial#6 resulted in value: 0.1275106993219683. Current best value is 0.1275106993219683 with parameters: {'feature_fraction': 1.0}.[0m
tune_feature_fraction, val_score: 0.127511: 100%|##########| 7/7 [00:24<00:00,  3.44s/it]
tune_num_leaves, val_score: 0.127511:   0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0390404	valid_1's l2: 0.122736
Early stopping, best iteration is:
[590]	training's l2: 0.0394607	valid_1's l2: 0.122673


tune_num_leaves, val_score: 0.122673:   5%|5         | 1/20 [00:07<02:13,  7.03s/it][32m[I 2020-03-22 04:42:38,595][0m Finished trial#0 resulted in value: 0.12267338794089168. Current best value is 0.12267338794089168 with parameters: {'num_leaves': 232}.[0m
tune_num_leaves, val_score: 0.122673:   5%|5         | 1/20 [00:07<02:13,  7.03s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0419121	valid_1's l2: 0.122615
Early stopping, best iteration is:
[644]	training's l2: 0.0399221	valid_1's l2: 0.122549


tune_num_leaves, val_score: 0.122549:  10%|#         | 2/20 [00:13<02:05,  6.99s/it][32m[I 2020-03-22 04:42:45,499][0m Finished trial#1 resulted in value: 0.12254939954224113. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  10%|#         | 2/20 [00:14<02:05,  6.99s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0664369	valid_1's l2: 0.124569
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0525541	valid_1's l2: 0.122982


tune_num_leaves, val_score: 0.122549:  15%|#5        | 3/20 [00:19<01:50,  6.51s/it][32m[I 2020-03-22 04:42:50,877][0m Finished trial#2 resulted in value: 0.12298157192400054. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  15%|#5        | 3/20 [00:19<01:50,  6.51s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0580576	valid_1's l2: 0.124554
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0438546	valid_1's l2: 0.123654


tune_num_leaves, val_score: 0.122549:  20%|##        | 4/20 [00:25<01:42,  6.42s/it][32m[I 2020-03-22 04:42:57,079][0m Finished trial#3 resulted in value: 0.12365440407997347. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  20%|##        | 4/20 [00:25<01:42,  6.42s/it]

Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[432]	training's l2: 0.054394	valid_1's l2: 0.123439


tune_num_leaves, val_score: 0.122549:  25%|##5       | 5/20 [00:30<01:28,  5.88s/it][32m[I 2020-03-22 04:43:01,699][0m Finished trial#4 resulted in value: 0.1234386270482197. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  25%|##5       | 5/20 [00:30<01:28,  5.88s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0717388	valid_1's l2: 0.124796
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0574803	valid_1's l2: 0.123134


tune_num_leaves, val_score: 0.122549:  30%|###       | 6/20 [00:35<01:18,  5.59s/it][32m[I 2020-03-22 04:43:06,621][0m Finished trial#5 resulted in value: 0.12313437099096898. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  30%|###       | 6/20 [00:35<01:18,  5.59s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0490358	valid_1's l2: 0.123212
Early stopping, best iteration is:
[518]	training's l2: 0.0530699	valid_1's l2: 0.123111


tune_num_leaves, val_score: 0.122549:  35%|###5      | 7/20 [00:39<01:09,  5.38s/it][32m[I 2020-03-22 04:43:11,506][0m Finished trial#6 resulted in value: 0.12311079306935384. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  35%|###5      | 7/20 [00:40<01:09,  5.38s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.146884	valid_1's l2: 0.158609


tune_num_leaves, val_score: 0.122549:  40%|####      | 8/20 [00:42<00:53,  4.44s/it][32m[I 2020-03-22 04:43:13,760][0m Finished trial#7 resulted in value: 0.15113558400409724. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  40%|####      | 8/20 [00:42<00:53,  4.44s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.137988	valid_1's l2: 0.151136
Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0416175	valid_1's l2: 0.123028
Early stopping, best iteration is:
[520]	training's l2: 0.0454634	valid_1's l2: 0.122923


tune_num_leaves, val_score: 0.122549:  45%|####5     | 9/20 [00:48<00:53,  4.86s/it][32m[I 2020-03-22 04:43:19,595][0m Finished trial#8 resulted in value: 0.1229225158675498. Current best value is 0.12254939954224113 with parameters: {'num_leaves': 205}.[0m
tune_num_leaves, val_score: 0.122549:  45%|####5     | 9/20 [00:48<00:53,  4.86s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0553316	valid_1's l2: 0.122789
Early stopping, best iteration is:
[771]	training's l2: 0.0482657	valid_1's l2: 0.122319


tune_num_leaves, val_score: 0.122319:  50%|#####     | 10/20 [00:53<00:51,  5.15s/it][32m[I 2020-03-22 04:43:25,430][0m Finished trial#9 resulted in value: 0.12231931277807828. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  50%|#####     | 10/20 [00:53<00:51,  5.15s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0981065	valid_1's l2: 0.131876


tune_num_leaves, val_score: 0.122319:  55%|#####5    | 11/20 [00:57<00:41,  4.66s/it]

Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.085213	valid_1's l2: 0.128537


[32m[I 2020-03-22 04:43:28,935][0m Finished trial#10 resulted in value: 0.1285372590141697. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  55%|#####5    | 11/20 [00:57<00:41,  4.66s/it]

Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[442]	training's l2: 0.0439261	valid_1's l2: 0.123221


tune_num_leaves, val_score: 0.122319:  60%|######    | 12/20 [01:03<00:40,  5.09s/it][32m[I 2020-03-22 04:43:35,023][0m Finished trial#11 resulted in value: 0.12322104734447116. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  60%|######    | 12/20 [01:03<00:40,  5.09s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0507701	valid_1's l2: 0.123395
Early stopping, best iteration is:
[638]	training's l2: 0.0490054	valid_1's l2: 0.123334


tune_num_leaves, val_score: 0.122319:  65%|######5   | 13/20 [01:09<00:36,  5.23s/it][32m[I 2020-03-22 04:43:40,584][0m Finished trial#12 resulted in value: 0.12333369841139466. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  65%|######5   | 13/20 [01:09<00:36,  5.23s/it]

Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[484]	training's l2: 0.0490938	valid_1's l2: 0.122753


tune_num_leaves, val_score: 0.122319:  70%|#######   | 14/20 [01:14<00:31,  5.25s/it][32m[I 2020-03-22 04:43:45,872][0m Finished trial#13 resulted in value: 0.12275258681257924. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  70%|#######   | 14/20 [01:14<00:31,  5.25s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0577419	valid_1's l2: 0.124557
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0439949	valid_1's l2: 0.123709


tune_num_leaves, val_score: 0.122319:  75%|#######5  | 15/20 [01:20<00:27,  5.58s/it][32m[I 2020-03-22 04:43:52,221][0m Finished trial#14 resulted in value: 0.12370862535087633. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  75%|#######5  | 15/20 [01:20<00:27,  5.58s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.070289	valid_1's l2: 0.125225
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0560593	valid_1's l2: 0.123619


tune_num_leaves, val_score: 0.122319:  80%|########  | 16/20 [01:25<00:21,  5.44s/it][32m[I 2020-03-22 04:43:57,326][0m Finished trial#15 resulted in value: 0.12361913017168374. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  80%|########  | 16/20 [01:25<00:21,  5.44s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0377951	valid_1's l2: 0.122718
Early stopping, best iteration is:
[571]	training's l2: 0.0390682	valid_1's l2: 0.122531


tune_num_leaves, val_score: 0.122319:  85%|########5 | 17/20 [01:33<00:17,  5.98s/it][32m[I 2020-03-22 04:44:04,592][0m Finished trial#16 resulted in value: 0.12253067580942462. Current best value is 0.12231931277807828 with parameters: {'num_leaves': 126}.[0m
tune_num_leaves, val_score: 0.122319:  85%|########5 | 17/20 [01:33<00:17,  5.98s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0513608	valid_1's l2: 0.122511
Early stopping, best iteration is:
[711]	training's l2: 0.0467668	valid_1's l2: 0.12224


tune_num_leaves, val_score: 0.122240:  90%|######### | 18/20 [01:38<00:11,  5.98s/it][32m[I 2020-03-22 04:44:10,561][0m Finished trial#17 resulted in value: 0.12223978525522382. Current best value is 0.12223978525522382 with parameters: {'num_leaves': 148}.[0m
tune_num_leaves, val_score: 0.122240:  90%|######### | 18/20 [01:39<00:11,  5.98s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0522359	valid_1's l2: 0.123121
Early stopping, best iteration is:
[624]	training's l2: 0.0510456	valid_1's l2: 0.122993


tune_num_leaves, val_score: 0.122240:  95%|#########5| 19/20 [01:44<00:05,  5.78s/it][32m[I 2020-03-22 04:44:15,877][0m Finished trial#18 resulted in value: 0.12299279500969683. Current best value is 0.12223978525522382 with parameters: {'num_leaves': 148}.[0m
tune_num_leaves, val_score: 0.122240:  95%|#########5| 19/20 [01:44<00:05,  5.78s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0629126	valid_1's l2: 0.124796
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0488421	valid_1's l2: 0.123468


tune_num_leaves, val_score: 0.122240: 100%|##########| 20/20 [01:49<00:00,  5.75s/it][32m[I 2020-03-22 04:44:21,553][0m Finished trial#19 resulted in value: 0.12346778680499075. Current best value is 0.12223978525522382 with parameters: {'num_leaves': 148}.[0m
tune_num_leaves, val_score: 0.122240: 100%|##########| 20/20 [01:50<00:00,  5.50s/it]
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:   0%|          | 0/10 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[414]	training's l2: 0.0595744	valid_1's l2: 0.124632


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  10%|#         | 1/10 [00:04<00:43,  4.87s/it][32m[I 2020-03-22 04:44:26,528][0m Finished trial#0 resulted in value: 0.12463196731627567. Current best value is 0.12463196731627567 with parameters: {'bagging_fraction': 0.7410567275231684, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  10%|#         | 1/10 [00:04<00:43,  4.87s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.05189	valid_1's l2: 0.126949
Early stopping, best iteration is:
[538]	training's l2: 0.0547061	valid_1's l2: 0.126584


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  20%|##        | 2/10 [00:13<00:46,  5.85s/it][32m[I 2020-03-22 04:44:34,673][0m Finished trial#1 resulted in value: 0.12658410530763448. Current best value is 0.12463196731627567 with parameters: {'bagging_fraction': 0.7410567275231684, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  20%|##        | 2/10 [00:13<00:46,  5.85s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.051453	valid_1's l2: 0.125611
Early stopping, best iteration is:
[632]	training's l2: 0.0500894	valid_1's l2: 0.125482


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  30%|###       | 3/10 [00:22<00:47,  6.81s/it][32m[I 2020-03-22 04:44:43,719][0m Finished trial#2 resulted in value: 0.1254824516923666. Current best value is 0.12463196731627567 with parameters: {'bagging_fraction': 0.7410567275231684, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  30%|###       | 3/10 [00:22<00:47,  6.81s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0520416	valid_1's l2: 0.126533
Early stopping, best iteration is:
[628]	training's l2: 0.0508539	valid_1's l2: 0.126234


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  40%|####      | 4/10 [00:31<00:45,  7.58s/it][32m[I 2020-03-22 04:44:53,110][0m Finished trial#3 resulted in value: 0.12623378102454988. Current best value is 0.12463196731627567 with parameters: {'bagging_fraction': 0.7410567275231684, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  40%|####      | 4/10 [00:31<00:45,  7.58s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0567864	valid_1's l2: 0.12999
Early stopping, best iteration is:
[544]	training's l2: 0.059474	valid_1's l2: 0.129324


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  50%|#####     | 5/10 [00:40<00:39,  7.97s/it][32m[I 2020-03-22 04:45:01,972][0m Finished trial#4 resulted in value: 0.12932392723559438. Current best value is 0.12463196731627567 with parameters: {'bagging_fraction': 0.7410567275231684, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  50%|#####     | 5/10 [00:40<00:39,  7.97s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.051521	valid_1's l2: 0.126095
Early stopping, best iteration is:
[546]	training's l2: 0.0542942	valid_1's l2: 0.125847


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  60%|######    | 6/10 [00:48<00:31,  7.99s/it][32m[I 2020-03-22 04:45:10,024][0m Finished trial#5 resulted in value: 0.12584687176404707. Current best value is 0.12463196731627567 with parameters: {'bagging_fraction': 0.7410567275231684, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  60%|######    | 6/10 [00:48<00:31,  7.99s/it]

Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[400]	training's l2: 0.0681383	valid_1's l2: 0.127986


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  70%|#######   | 7/10 [00:55<00:23,  7.76s/it][32m[I 2020-03-22 04:45:17,239][0m Finished trial#6 resulted in value: 0.1279857848703484. Current best value is 0.12463196731627567 with parameters: {'bagging_fraction': 0.7410567275231684, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  70%|#######   | 7/10 [00:55<00:23,  7.76s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0495187	valid_1's l2: 0.124622
Early stopping, best iteration is:
[603]	training's l2: 0.0493856	valid_1's l2: 0.124597


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  80%|########  | 8/10 [01:03<00:15,  7.90s/it][32m[I 2020-03-22 04:45:25,456][0m Finished trial#7 resulted in value: 0.12459740438816545. Current best value is 0.12459740438816545 with parameters: {'bagging_fraction': 0.8452132868980973, 'bagging_freq': 3}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  80%|########  | 8/10 [01:03<00:15,  7.90s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0494444	valid_1's l2: 0.124071
Early stopping, best iteration is:
[645]	training's l2: 0.0475412	valid_1's l2: 0.12394


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  90%|######### | 9/10 [01:12<00:08,  8.10s/it][32m[I 2020-03-22 04:45:34,037][0m Finished trial#8 resulted in value: 0.12393952324368537. Current best value is 0.12393952324368537 with parameters: {'bagging_fraction': 0.8521500121973649, 'bagging_freq': 3}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240:  90%|######### | 9/10 [01:12<00:08,  8.10s/it]

Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[320]	training's l2: 0.073111	valid_1's l2: 0.129531


tune_bagging_fraction_and_bagging_freq, val_score: 0.122240: 100%|##########| 10/10 [01:18<00:00,  7.41s/it][32m[I 2020-03-22 04:45:39,818][0m Finished trial#9 resulted in value: 0.12953097155217658. Current best value is 0.12393952324368537 with parameters: {'bagging_fraction': 0.8521500121973649, 'bagging_freq': 3}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 0.122240: 100%|##########| 10/10 [01:18<00:00,  7.83s/it]
tune_feature_fraction, val_score: 0.122240:   0%|          | 0/3 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0521008	valid_1's l2: 0.121552
Early stopping, best iteration is:
[656]	training's l2: 0.0496457	valid_1's l2: 0.121428


tune_feature_fraction, val_score: 0.121428:  33%|###3      | 1/3 [00:05<00:10,  5.34s/it][32m[I 2020-03-22 04:45:45,262][0m Finished trial#0 resulted in value: 0.12142788680948345. Current best value is 0.12142788680948345 with parameters: {'feature_fraction': 0.92}.[0m
tune_feature_fraction, val_score: 0.121428:  33%|###3      | 1/3 [00:05<00:10,  5.34s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0521008	valid_1's l2: 0.121552
Early stopping, best iteration is:
[656]	training's l2: 0.0496457	valid_1's l2: 0.121428


tune_feature_fraction, val_score: 0.121428:  67%|######6   | 2/3 [00:10<00:05,  5.36s/it][32m[I 2020-03-22 04:45:50,656][0m Finished trial#1 resulted in value: 0.12142788680948345. Current best value is 0.12142788680948345 with parameters: {'feature_fraction': 0.92}.[0m
tune_feature_fraction, val_score: 0.121428:  67%|######6   | 2/3 [00:10<00:05,  5.36s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0510699	valid_1's l2: 0.122689
Early stopping, best iteration is:
[690]	training's l2: 0.0473562	valid_1's l2: 0.122579


tune_feature_fraction, val_score: 0.121428: 100%|##########| 3/3 [00:16<00:00,  5.46s/it][32m[I 2020-03-22 04:45:56,366][0m Finished trial#2 resulted in value: 0.12257918887923182. Current best value is 0.12142788680948345 with parameters: {'feature_fraction': 0.92}.[0m
tune_feature_fraction, val_score: 0.121428: 100%|##########| 3/3 [00:16<00:00,  5.52s/it]
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:   0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0525326	valid_1's l2: 0.122922
Early stopping, best iteration is:
[696]	training's l2: 0.0487429	valid_1's l2: 0.122834


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:   5%|5         | 1/20 [00:05<01:45,  5.55s/it][32m[I 2020-03-22 04:46:02,022][0m Finished trial#0 resulted in value: 0.12283418097845109. Current best value is 0.12283418097845109 with parameters: {'lambda_l1': 4.428322713349182e-08, 'lambda_l2': 1.82380836857909e-07}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:   5%|5         | 1/20 [00:05<01:45,  5.55s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0521252	valid_1's l2: 0.122254
Early stopping, best iteration is:
[693]	training's l2: 0.0484242	valid_1's l2: 0.12184


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  10%|#         | 2/20 [00:11<01:40,  5.58s/it][32m[I 2020-03-22 04:46:07,662][0m Finished trial#1 resulted in value: 0.12183996670502642. Current best value is 0.12183996670502642 with parameters: {'lambda_l1': 0.005023226046310405, 'lambda_l2': 4.762943876741629e-06}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  10%|#         | 2/20 [00:11<01:40,  5.58s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0525099	valid_1's l2: 0.122053
Early stopping, best iteration is:
[635]	training's l2: 0.0509126	valid_1's l2: 0.121835


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  15%|#5        | 3/20 [00:16<01:32,  5.47s/it][32m[I 2020-03-22 04:46:12,872][0m Finished trial#2 resulted in value: 0.12183483121229122. Current best value is 0.12183483121229122 with parameters: {'lambda_l1': 0.005449240340608189, 'lambda_l2': 4.073789283798795e-06}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  15%|#5        | 3/20 [00:16<01:32,  5.47s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0524146	valid_1's l2: 0.122447
Early stopping, best iteration is:
[790]	training's l2: 0.0453786	valid_1's l2: 0.12206


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  20%|##        | 4/20 [00:22<01:32,  5.77s/it][32m[I 2020-03-22 04:46:19,346][0m Finished trial#3 resulted in value: 0.12206009292107646. Current best value is 0.12183483121229122 with parameters: {'lambda_l1': 0.005449240340608189, 'lambda_l2': 4.073789283798795e-06}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  20%|##        | 4/20 [00:22<01:32,  5.77s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0527844	valid_1's l2: 0.12273
Early stopping, best iteration is:
[784]	training's l2: 0.0455691	valid_1's l2: 0.122468


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  25%|##5       | 5/20 [00:29<01:28,  5.92s/it][32m[I 2020-03-22 04:46:25,602][0m Finished trial#4 resulted in value: 0.1224676421774026. Current best value is 0.12183483121229122 with parameters: {'lambda_l1': 0.005449240340608189, 'lambda_l2': 4.073789283798795e-06}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  25%|##5       | 5/20 [00:29<01:28,  5.92s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0564969	valid_1's l2: 0.122258
Early stopping, best iteration is:
[814]	training's l2: 0.0479624	valid_1's l2: 0.121746


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  30%|###       | 6/20 [00:35<01:25,  6.13s/it][32m[I 2020-03-22 04:46:32,239][0m Finished trial#5 resulted in value: 0.12174576872088312. Current best value is 0.12174576872088312 with parameters: {'lambda_l1': 4.190425579201308e-06, 'lambda_l2': 6.022768032793006}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  30%|###       | 6/20 [00:35<01:25,  6.13s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0528471	valid_1's l2: 0.122524
Early stopping, best iteration is:
[689]	training's l2: 0.049015	valid_1's l2: 0.122098


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  35%|###5      | 7/20 [00:41<01:17,  5.99s/it][32m[I 2020-03-22 04:46:37,902][0m Finished trial#6 resulted in value: 0.12209839322572219. Current best value is 0.12174576872088312 with parameters: {'lambda_l1': 4.190425579201308e-06, 'lambda_l2': 6.022768032793006}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  35%|###5      | 7/20 [00:41<01:17,  5.99s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0525241	valid_1's l2: 0.122333
Early stopping, best iteration is:
[769]	training's l2: 0.0459314	valid_1's l2: 0.121982


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  40%|####      | 8/20 [00:47<01:12,  6.04s/it][32m[I 2020-03-22 04:46:44,066][0m Finished trial#7 resulted in value: 0.12198152230884983. Current best value is 0.12174576872088312 with parameters: {'lambda_l1': 4.190425579201308e-06, 'lambda_l2': 6.022768032793006}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  40%|####      | 8/20 [00:47<01:12,  6.04s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0523337	valid_1's l2: 0.122513
Early stopping, best iteration is:
[814]	training's l2: 0.043716	valid_1's l2: 0.12188


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  45%|####5     | 9/20 [00:54<01:07,  6.17s/it][32m[I 2020-03-22 04:46:50,544][0m Finished trial#8 resulted in value: 0.12187975282497619. Current best value is 0.12174576872088312 with parameters: {'lambda_l1': 4.190425579201308e-06, 'lambda_l2': 6.022768032793006}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  45%|####5     | 9/20 [00:54<01:07,  6.17s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.05201	valid_1's l2: 0.122759
Early stopping, best iteration is:
[714]	training's l2: 0.0472415	valid_1's l2: 0.122362


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  50%|#####     | 10/20 [00:59<01:00,  6.03s/it][32m[I 2020-03-22 04:46:56,250][0m Finished trial#9 resulted in value: 0.12236150649301034. Current best value is 0.12174576872088312 with parameters: {'lambda_l1': 4.190425579201308e-06, 'lambda_l2': 6.022768032793006}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  50%|#####     | 10/20 [00:59<01:00,  6.03s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0651812	valid_1's l2: 0.124298
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0550364	valid_1's l2: 0.123487


tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  55%|#####5    | 11/20 [01:07<00:59,  6.58s/it][32m[I 2020-03-22 04:47:04,089][0m Finished trial#10 resulted in value: 0.12348713609356128. Current best value is 0.12174576872088312 with parameters: {'lambda_l1': 4.190425579201308e-06, 'lambda_l2': 6.022768032793006}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121428:  55%|#####5    | 11/20 [01:07<00:59,  6.58s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0544398	valid_1's l2: 0.121876
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0419963	valid_1's l2: 0.121333


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  60%|######    | 12/20 [01:14<00:53,  6.69s/it][32m[I 2020-03-22 04:47:11,032][0m Finished trial#11 resulted in value: 0.12133250459475407. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  60%|######    | 12/20 [01:14<00:53,  6.69s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0559736	valid_1's l2: 0.122707
Early stopping, best iteration is:
[890]	training's l2: 0.0463549	valid_1's l2: 0.122219


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  65%|######5   | 13/20 [01:21<00:47,  6.74s/it][32m[I 2020-03-22 04:47:17,903][0m Finished trial#12 resulted in value: 0.1222193433824723. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  65%|######5   | 13/20 [01:21<00:47,  6.74s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0518408	valid_1's l2: 0.12216
Early stopping, best iteration is:
[661]	training's l2: 0.0493004	valid_1's l2: 0.122107


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  70%|#######   | 14/20 [01:26<00:38,  6.34s/it][32m[I 2020-03-22 04:47:23,319][0m Finished trial#13 resulted in value: 0.12210685214832555. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  70%|#######   | 14/20 [01:26<00:38,  6.34s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.05731	valid_1's l2: 0.122875
Early stopping, best iteration is:
[863]	training's l2: 0.0476523	valid_1's l2: 0.122257


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  75%|#######5  | 15/20 [01:33<00:32,  6.56s/it][32m[I 2020-03-22 04:47:30,405][0m Finished trial#14 resulted in value: 0.12225681981295791. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  75%|#######5  | 15/20 [01:34<00:32,  6.56s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0527372	valid_1's l2: 0.122586
Early stopping, best iteration is:
[875]	training's l2: 0.0431012	valid_1's l2: 0.122206


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  80%|########  | 16/20 [01:40<00:26,  6.68s/it][32m[I 2020-03-22 04:47:37,345][0m Finished trial#15 resulted in value: 0.12220645143971562. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  80%|########  | 16/20 [01:40<00:26,  6.68s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0522922	valid_1's l2: 0.122679
Early stopping, best iteration is:
[619]	training's l2: 0.0514358	valid_1's l2: 0.122582


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  85%|########5 | 17/20 [01:46<00:18,  6.26s/it][32m[I 2020-03-22 04:47:42,614][0m Finished trial#16 resulted in value: 0.12258158732212578. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  85%|########5 | 17/20 [01:46<00:18,  6.26s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0529632	valid_1's l2: 0.122528
Early stopping, best iteration is:
[777]	training's l2: 0.0459048	valid_1's l2: 0.122045


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  90%|######### | 18/20 [01:52<00:12,  6.27s/it][32m[I 2020-03-22 04:47:48,914][0m Finished trial#17 resulted in value: 0.12204467924103994. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  90%|######### | 18/20 [01:52<00:12,  6.27s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0521492	valid_1's l2: 0.122834
Early stopping, best iteration is:
[654]	training's l2: 0.0498318	valid_1's l2: 0.122455


tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  95%|#########5| 19/20 [01:57<00:06,  6.01s/it][32m[I 2020-03-22 04:47:54,337][0m Finished trial#18 resulted in value: 0.12245512850074665. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333:  95%|#########5| 19/20 [01:57<00:06,  6.01s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0529921	valid_1's l2: 0.122312
Early stopping, best iteration is:
[741]	training's l2: 0.047597	valid_1's l2: 0.121957


tune_lambda_l1_and_lambda_l2, val_score: 0.121333: 100%|##########| 20/20 [02:03<00:00,  6.03s/it][32m[I 2020-03-22 04:48:00,393][0m Finished trial#19 resulted in value: 0.12195716603361996. Current best value is 0.12133250459475407 with parameters: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 0.121333: 100%|##########| 20/20 [02:04<00:00,  6.20s/it]
tune_min_child_samples, val_score: 0.121333:   0%|          | 0/5 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0449969	valid_1's l2: 0.120197
Early stopping, best iteration is:
[852]	training's l2: 0.0363789	valid_1's l2: 0.119332


tune_min_child_samples, val_score: 0.119332:  20%|##        | 1/5 [00:05<00:23,  5.89s/it][32m[I 2020-03-22 04:48:06,402][0m Finished trial#0 resulted in value: 0.11933227183910013. Current best value is 0.11933227183910013 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 0.119332:  20%|##        | 1/5 [00:06<00:23,  5.89s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0485273	valid_1's l2: 0.121427
Early stopping, best iteration is:
[784]	training's l2: 0.0421195	valid_1's l2: 0.120836


tune_min_child_samples, val_score: 0.119332:  40%|####      | 2/5 [00:11<00:17,  5.84s/it][32m[I 2020-03-22 04:48:12,128][0m Finished trial#1 resulted in value: 0.12083562939776069. Current best value is 0.11933227183910013 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 0.119332:  40%|####      | 2/5 [00:11<00:17,  5.84s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0568028	valid_1's l2: 0.122947
Early stopping, best iteration is:
[819]	training's l2: 0.0489815	valid_1's l2: 0.122268


tune_min_child_samples, val_score: 0.119332:  60%|######    | 3/5 [00:18<00:12,  6.14s/it][32m[I 2020-03-22 04:48:18,947][0m Finished trial#2 resulted in value: 0.12226819904423855. Current best value is 0.11933227183910013 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 0.119332:  60%|######    | 3/5 [00:18<00:12,  6.14s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.064117	valid_1's l2: 0.12547
Early stopping, best iteration is:
[754]	training's l2: 0.058723	valid_1's l2: 0.124933


tune_min_child_samples, val_score: 0.119332:  80%|########  | 4/5 [00:26<00:06,  6.62s/it][32m[I 2020-03-22 04:48:26,703][0m Finished trial#3 resulted in value: 0.12493258656945583. Current best value is 0.11933227183910013 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 0.119332:  80%|########  | 4/5 [00:26<00:06,  6.62s/it]

Training until validation scores don't improve for 100 rounds.
[600]	training's l2: 0.0729386	valid_1's l2: 0.126443
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 0.0609215	valid_1's l2: 0.12554


tune_min_child_samples, val_score: 0.119332: 100%|##########| 5/5 [00:37<00:00,  8.12s/it][32m[I 2020-03-22 04:48:38,312][0m Finished trial#4 resulted in value: 0.12553981967069378. Current best value is 0.11933227183910013 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 0.119332: 100%|##########| 5/5 [00:37<00:00,  7.58s/it]


In [53]:
print("Number of finished trials: {}".format(len(tuning_history)))
print("Best params:", best_params)
print("  Params: ")
for key, value in best_params.items():
    print("    {}: {}".format(key, value))


error(Y_test, prediction)

Number of finished trials: 65
Best params: {'lambda_l1': 0.45476949522521215, 'lambda_l2': 0.00045451482766904625, 'num_leaves': 148, 'feature_fraction': 0.92, 'bagging_fraction': 1.0, 'bagging_freq': 0, 'min_child_samples': 5}
  Params: 
    lambda_l1: 0.45476949522521215
    lambda_l2: 0.00045451482766904625
    num_leaves: 148
    feature_fraction: 0.92
    bagging_fraction: 1.0
    bagging_freq: 0
    min_child_samples: 5
Root Mean Square Error (RMSE)= 0.119 
Mean Absolute Error (MAE)= 0.233 
R^2 = 0.808


## Tunning of XGBRegresor

In [54]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBRegressor

params = {
        'min_child_weight': [1, 5, 6, 7],
        'gamma': [5, 6, 7, 8, 9],
        'alpha':[50, 100, 150],
        'subsample': [ 0.7, 0.9, 1.0],
        'colsample_bytree': [0.75 ,0.8, 0.9],
        'max_depth': [29, 30, 32],
        'random_state' : np.random.randint(10000, size=20)
        }
xgb = XGBRegressor(learning_rate=0.9, n_estimators=30, objective='reg:squarederror')

skf = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 42)

random_search_xb = RandomizedSearchCV(xgb, param_distributions=params,
                                   n_iter= 5 , n_jobs=-1,cv = 2, verbose=3)

random_search_xb.fit(X_train, Y_train)


Fitting 2 folds for each of 5 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:   50.8s finished


RandomizedSearchCV(cv=2, error_score=nan,
                   estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                          colsample_bylevel=1,
                                          colsample_bynode=1,
                                          colsample_bytree=1, gamma=0,
                                          importance_type='gain',
                                          learning_rate=0.9, max_delta_step=0,
                                          max_depth=3, min_child_weight=1,
                                          missing=None, n_estimators=30,
                                          n_jobs=1, nthread=None,
                                          objective='reg:squarederror',
                                          random_state=0, reg_a...
                                        'colsample_bytree': [0.75, 0.8, 0.9],
                                        'gamma': [5, 6, 7, 8, 9],
                                        'max

In [55]:
best_params_xboosts = random_search_xb.best_params_
print('\n Best hyperparameters:',best_params_xboosts)


 Best hyperparameters: {'subsample': 1.0, 'random_state': 3635, 'min_child_weight': 5, 'max_depth': 29, 'gamma': 8, 'colsample_bytree': 0.9, 'alpha': 150}


In [56]:
ypred = random_search_xb.predict(X_test)
error(Y_test, ypred)

Root Mean Square Error (RMSE)= 0.158 
Mean Absolute Error (MAE)= 0.280 
R^2 = 0.746


## Tuning ExtraTrees

In [57]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import BaggingRegressor
params = {
        'max_depth': [29, 30, 32],
        'random_state' : np.random.randint(10000, size=20),
        'max_features' : ['auto', 'sqrt', 'log2'],
        }
extr = ExtraTreeRegressor(splitter='best')

skf = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 42)

random_search_et = RandomizedSearchCV(extr, param_distributions=params,
                                   n_iter= 5 , n_jobs=-1,cv = 2, verbose=3)

random_search_et.fit(X_train, Y_train)


Fitting 2 folds for each of 5 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:    0.7s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    1.0s finished


RandomizedSearchCV(cv=2, error_score=nan,
                   estimator=ExtraTreeRegressor(ccp_alpha=0.0, criterion='mse',
                                                max_depth=None,
                                                max_features='auto',
                                                max_leaf_nodes=None,
                                                min_impurity_decrease=0.0,
                                                min_impurity_split=None,
                                                min_samples_leaf=1,
                                                min_samples_split=2,
                                                min_weight_fraction_leaf=0.0,
                                                random_state=None,
                                                splitter='best'),
                   iid='deprecated', n_iter=5, n_jobs=-1,
                   param_distributions={'max_depth': [29, 30, 32],
                                        'max_features':

In [58]:
best_params_et = random_search_et.best_params_
print('\n Best hyperparameters:',best_params_et)


 Best hyperparameters: {'random_state': 9502, 'max_features': 'log2', 'max_depth': 30}


In [59]:
ypred = random_search_et.predict(X_test)
error(Y_test, ypred)

Root Mean Square Error (RMSE)= 0.232 
Mean Absolute Error (MAE)= 0.297 
R^2 = 0.627


### Tuning BaggingRegresor

In [66]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import BaggingRegressor
params = {
        'random_state' : np.random.randint(10000, size=20),
        'n_estimators' : np.random.randint(100, size=20),
        }
reg = BaggingRegressor()

skf = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 42)

random_search_b = RandomizedSearchCV(reg, param_distributions=params,
                                   n_iter= 1 ,cv = 2,  n_jobs=-1, verbose=3)

random_search_b.fit(X_train, Y_train)


Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    5.6s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    5.6s finished


RandomizedSearchCV(cv=2, error_score=nan,
                   estimator=BaggingRegressor(base_estimator=None,
                                              bootstrap=True,
                                              bootstrap_features=False,
                                              max_features=1.0, max_samples=1.0,
                                              n_estimators=10, n_jobs=None,
                                              oob_score=False,
                                              random_state=None, verbose=0,
                                              warm_start=False),
                   iid='deprecated', n_iter=1, n_jobs=-1,
                   param_distributions={'n_estimators': array([24, 26, 74, 34, 46, 55, 64, 73, 60, 33, 32, 67, 72, 68, 13, 82, 72,
       27, 74, 66]),
                                        'random_state': array([ 997, 7075,   33,  709, 7537,  202, 3364, 9735, 5046, 3140, 3980,
       8397,  736, 6471, 7149, 4996, 5474, 5148, 5035, 99

In [67]:
best_params_b = random_search_b.best_params_
print('\n Best hyperparameters:',best_params_b)


 Best hyperparameters: {'random_state': 5035, 'n_estimators': 27}


In [68]:
ypred = random_search_b.predict(X_test)
error(Y_test, ypred)

Root Mean Square Error (RMSE)= 0.127 
Mean Absolute Error (MAE)= 0.234 
R^2 = 0.795


# Mixing of Best Models 

###  mixing xboost and bagging

In [60]:
bes_xgb = XGBRegressor(subsample = 0.9, random_state = 3061,
                       min_child_weight = 7, max_depth =  32,
                       gamma =  7,colsample_bytree = 0.8,
                       alpha = 150, learning_rate=0.9, 
                       n_estimators=30)#, objective='reg:squarederror')
baaggin = BaggingRegressor(base_estimator=bes_xgb, random_state = 5627, n_estimators = 10)
baaggin.fit(X_train, Y_train)



BaggingRegressor(base_estimator=XGBRegressor(alpha=150, base_score=0.5,
                                             booster='gbtree',
                                             colsample_bylevel=1,
                                             colsample_bynode=1,
                                             colsample_bytree=0.8, gamma=7,
                                             importance_type='gain',
                                             learning_rate=0.9,
                                             max_delta_step=0, max_depth=32,
                                             min_child_weight=7, missing=None,
                                             n_estimators=30, n_jobs=1,
                                             nthread=None,
                                             objective='reg:linear',
                                             random_state=3061, reg_alpha=0,
                                             reg_lambda=1, scale_pos_weight=1,
             

In [61]:
ypred = baaggin.predict(X_test)
error(Y_test, ypred)

Root Mean Square Error (RMSE)= 0.138 
Mean Absolute Error (MAE)= 0.263 
R^2 = 0.778


### mixing bagging and extra_tree 

In [64]:
best_extr = ExtraTreeRegressor(random_state = 9502, max_features = 'log2', max_depth = 30, splitter='best')
baaggin2 = BaggingRegressor(base_estimator=best_extr, random_state = 5627, n_estimators = 10)
baaggin2.fit(X_train, Y_train)

BaggingRegressor(base_estimator=ExtraTreeRegressor(ccp_alpha=0.0,
                                                   criterion='mse',
                                                   max_depth=30,
                                                   max_features='log2',
                                                   max_leaf_nodes=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   random_state=9502,
                                                   splitter='best'),
                 bootstrap=True, bootstrap_features=False, max_features=1.0,
                 max_samples=1.0, n_estimators=10, n_jobs=None,

In [65]:
ypred = baaggin2.predict(X_test)
error(Y_test, ypred)

Root Mean Square Error (RMSE)= 0.133 
Mean Absolute Error (MAE)= 0.243 
R^2 = 0.786
