In [2]:
#!pip install stop-words

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("processed_data.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
from ast import literal_eval

df["review_translate_sentences_lemma"] = df.apply(lambda row: " ".join(literal_eval(row["review_translate_sentences_lemma"])[0]), axis=1 )

In [4]:
df['dataset_name'].value_counts()

rozetka                            391784
tripadvisor_restaurants_ukraine    191191
tripadvisor_hotels_ukraine          79932
Name: dataset_name, dtype: int64

In [6]:
#Note: 
df["rating"].value_counts()

5.0    433247
4.0    108040
3.0     47286
1.0     43488
2.0     30846
Name: rating, dtype: int64

In [9]:
train_val_test_col = pd.read_csv("train_val_test_indices.csv")

In [10]:
train_val_test_col.head()

Unnamed: 0,index,split
0,0,train
1,1,test
2,2,train
3,3,train
4,4,train


In [11]:
train_val_test_col["split"].value_counts()

train    530324
test      66293
val       66290
Name: split, dtype: int64

In [12]:
df["train_val_test"] = train_val_test_col["split"]

In [14]:
from stop_words import get_stop_words
from string import punctuation

ua_stop_words = get_stop_words("ukrainian")

stop_words_and_punctuation = ua_stop_words + list(punctuation)

In [15]:
stop_words_and_punctuation = [w if len(w) < 2 else w.replace('\'', '') for w in stop_words_and_punctuation]

## bag-of-words + lemmatization, stopwords cleaning, no punctuation to tf idf vectorizer

In [16]:
import joblib
import os
from sklearn.metrics import confusion_matrix, f1_score
import json
import os.path


def save_model(model, model_name, data_trained):
    """
    model: pipeline instance
    model_name: string
    data_trained: string "all_data", "rozetka", "tripadvisor_hotels_ukraine" or "tripadvisor_restaurants_ukraine"
    """
    joblib.dump(model, 'models/' + model_name + '/' + data_trained + '_trained_model.pkl', compress = 1)
    


def get_metrics(model, data_trained, df, model_name="default"):
    """
    model: pipeline instance
    data_trained: string "all_data", "rozetka", "tripadvisor_hotels_ukraine" or "tripadvisor_restaurants_ukraine"
    df: full original DataFrame with all data for all websites
    model_name: default can any model but xgb cause xgb has class encoding 0,1,2,3,4 not 1,2,3,4,5
    """
    
    metrics_dict = {}
    
    if data_trained == "all_data":
        
        cur_df = df
        
        if "predictions" in cur_df.columns:
            del cur_df["predictions"]
            
        cur_df["predictions"] = model.predict(cur_df["review_translate_sentences_lemma"])
        
        if model_name == "xgb":
            cur_df["predictions"] = cur_df["predictions"] + 1
            #cur_df["rating"] = cur_df["rating"] + 1
            

        for data_src in ["all_data", "rozetka", "tripadvisor_hotels_ukraine", "tripadvisor_restaurants_ukraine"]:
            y_train_pred = None
            y_val_pred = None
            y_test_pred = None
            y_train = None
            y_val = None
            y_test = None
            
            if data_src == "all_data":
                cur_sub_df = cur_df
            else:
                cur_sub_df = cur_df[cur_df['dataset_name'] == data_src]
                
            y_train_pred = cur_sub_df[cur_sub_df['train_val_test'] == "train"]["predictions"]
            y_val_pred = cur_sub_df[cur_sub_df['train_val_test'] == "val"]["predictions"]
            y_test_pred = cur_sub_df[cur_sub_df['train_val_test'] == "test"]["predictions"]

            y_train = cur_sub_df[cur_sub_df['train_val_test'] == "train"]["rating"]
            y_val = cur_sub_df[cur_sub_df['train_val_test'] == "val"]["rating"]
            y_test = cur_sub_df[cur_sub_df['train_val_test'] == "test"]["rating"]
            
            
            f1_train = f1_score(y_train_pred, y_train, average="micro")
            f1_val = f1_score(y_val_pred, y_val, average="micro")
            f1_test = f1_score(y_test_pred, y_test, average="micro")


            conf_matrix_train = confusion_matrix(y_train, y_train_pred, labels=[1,2,3,4,5])
            conf_matrix_val = confusion_matrix(y_val, y_val_pred, labels=[1,2,3,4,5])
            conf_matrix_test = confusion_matrix(y_test, y_test_pred, labels=[1,2,3,4,5])
            
            cur_metrics_dict = {
                
                "trained_on_data": data_trained,
                "evaluated_on_data": data_src,
                
                "f1_micro_train": f1_train,
                "f1_micro_val": f1_val,
                "f1_micro_test": f1_test,

                "conf_matrix_train": conf_matrix_train.tolist(),
                "conf_matrix_val": conf_matrix_val.tolist(),
                "conf_matrix_test": conf_matrix_test.tolist()

            }
            
            metrics_dict["trained_on_" + data_trained + "__eval_on_" + data_src] = cur_metrics_dict
            
    else:
        
        cur_sub_df = df[df['dataset_name'] == data_trained]
        if "predictions" in cur_sub_df.columns:
            del cur_sub_df["predictions"]
            
        cur_sub_df["predictions"] = model.predict(cur_sub_df["review_translate_sentences_lemma"])
        
        if model_name in ["xgb", "lgbm"]:
            cur_sub_df["predictions"] = cur_sub_df["predictions"] + 1
            #cur_sub_df["rating"] = cur_sub_df["rating"] + 1
        
        y_train_pred = None
        y_val_pred = None
        y_test_pred = None
        y_train = None
        y_val = None
        y_test = None

        y_train_pred = cur_sub_df[cur_sub_df['train_val_test'] == "train"]["predictions"]
        y_val_pred = cur_sub_df[cur_sub_df['train_val_test'] == "val"]["predictions"]
        y_test_pred = cur_sub_df[cur_sub_df['train_val_test'] == "test"]["predictions"]

        y_train = cur_sub_df[cur_sub_df['train_val_test'] == "train"]["rating"]
        y_val = cur_sub_df[cur_sub_df['train_val_test'] == "val"]["rating"]
        y_test = cur_sub_df[cur_sub_df['train_val_test'] == "test"]["rating"]
        
        


        f1_train = f1_score(y_train_pred, y_train, average="micro")
        f1_val = f1_score(y_val_pred, y_val, average="micro")
        f1_test = f1_score(y_test_pred, y_test, average="micro")


        conf_matrix_train = confusion_matrix(y_train, y_train_pred, labels=[1,2,3,4,5])
        conf_matrix_val = confusion_matrix(y_val, y_val_pred, labels=[1,2,3,4,5])
        conf_matrix_test = confusion_matrix(y_test, y_test_pred, labels=[1,2,3,4,5])
        
        cur_metrics_dict = {
                
                "trained_on_data": data_trained,
                "evaluated_on_data": data_trained,
                
                "f1_micro_train": f1_train,
                "f1_micro_val": f1_val,
                "f1_micro_test": f1_test,

                "conf_matrix_train": conf_matrix_train.tolist(),
                "conf_matrix_val": conf_matrix_val.tolist(),
                "conf_matrix_test": conf_matrix_test.tolist()

            }
        
        
        metrics_dict["trained_on_" + data_trained + "__eval_on_" + data_trained] = cur_metrics_dict
        
    
    return metrics_dict
            
        
        
def save_model_and_metrics(model, model_name, data_trained, df):
    """
    model: pipeline instance
    model_name: string
    data_trained: string "all_data", "rozetka", "tripadvisor_hotels_ukraine" or "tripadvisor_restaurants_ukraine"
    df: full original DataFrame with all data for all websites
    """
    
    if not os.path.exists('models/' + model_name):
        os.mkdir('models/' + model_name)
    
    save_model(model, model_name, data_trained)
    metrics = get_metrics(model, data_trained, df, model_name)
     
    with open('models/' + model_name + '/' + model_name + "_trained_on_" + data_trained + "_metrics.json", 'w') as fp:
        json.dump(metrics, fp)
    
        
 

# XGBoost

In [None]:
"""
Important: XGB has class encoding 0,1,2,3,4 NOT 1,2,3,4,5 
"""

In [84]:
import xgboost as xgb
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import f1_score


from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
from skopt import forest_minimize



def train_xgb_model(data_trained, df, model_name):
    """
    data_trained: string "all_data", "rozetka", "tripadvisor_hotels_ukraine" or "tripadvisor_restaurants_ukraine"
    df: full original DataFrame with all data for all websites
    """

    df_tmp = df.copy()
    # modify rating to match 0-4 encoding pattern needed for the model
    df_tmp['rating'] = df_tmp['rating'].astype(int) - 1
    
    if data_trained == "all_data":
        cur_df = df_tmp
    else:
        cur_df = df_tmp[df_tmp['dataset_name'] == data_trained]
        
    train_df = cur_df[cur_df['train_val_test'] == "train"]
    val_df = cur_df[cur_df['train_val_test'] == "val"]
    test_df = cur_df[cur_df['train_val_test'] == "test"]


    #from sklearn.ensemble import GradientBoostingRegressor
    #from sklearn.model_selection import cross_val_score


    X_train, y_train = train_df["review_translate_sentences_lemma"], train_df["rating"]
    X_val, y_val = val_df["review_translate_sentences_lemma"], val_df["rating"]
    X_test, y_test = test_df["review_translate_sentences_lemma"], val_df["rating"]

    pipeline_clf = Pipeline(
        [
            ("vect", TfidfVectorizer(stop_words=stop_words_and_punctuation)),
            ("clf", xgb.XGBClassifier(random_state=1, objective='multi:softmax', n_jobs=3))
        ]
    )

    space  = [Real(0.7, 0.95, name="vect__max_df"),
              Integer(5, 50, name="vect__min_df"),
              Integer(1, 2, name="ngrams"),
              Integer(3, 25, name="clf__max_depth"),
              Real(1, 9, name="clf__gamma"),
              Integer(40,180, name="clf__reg_alpha"),
              Real(0, 1, name="clf__reg_lambda"),
              Real(0.5,1, name="clf__colsample_bytree"),
              Integer(0,10, name="clf__min_child_weight"),
              Integer(30,300, name="clf__n_estimators")

             ]
    
    @use_named_args(space)
    def objective(**params):
        print(params)

        params_modified = params.copy()
        params_modified["vect__ngram_range"] = (1, params['ngrams'])
        del params_modified['ngrams']

        print(params_modified)

        pipeline_clf.set_params(**params_modified)
        pipeline_clf.fit(X_train, y_train)

        y_val_pred = pipeline_clf.predict(X_val)
        f1_val = f1_score(y_val_pred, y_val, average="micro")


        # return negantive f1 because by minimizing it we will improve the model
        return -f1_val
    
    res_gp = forest_minimize(objective, space, n_calls=70, random_state=0, verbose=True)

    print(res_gp)
    
    #[0.8482111545562545, 5, 2]
    
    
    params = {"vect__max_df": float(res_gp['x'][0]) ,"vect__min_df": int(res_gp['x'][1]) ,
                               "vect__ngram_range": (1, int(res_gp['x'][2])), "clf__max_depth": int(res_gp['x'][3]), 
                               "clf__gamma": float(res_gp['x'][4]), "clf__reg_alpha": int(res_gp['x'][5]),
                               "clf__reg_lambda": float(res_gp['x'][6]), "clf__colsample_bytree": float(res_gp['x'][7]),
                               "clf__min_child_weight": int(res_gp['x'][8]), "clf__n_estimators": int(res_gp['x'][9])}
    
    with open('models/' + model_name + '/' + model_name + "_trained_on_" + data_trained + "_parameters.json", 'w') as fp:
        json.dump(params, fp)

    
    # refit
    pipeline_clf.set_params(**params)
    pipeline_clf.fit(X_train, y_train)
    
    return pipeline_clf
    
    

logistic_regression_pipeline = None
model_name = "xgb"

for data_src in ["tripadvisor_hotels_ukraine", "tripadvisor_restaurants_ukraine", "all_data", "rozetka"
                ]:
    print("Starting:")
    print(model_name + " " + data_src)
    print("\n")
    
    if not os.path.exists('models/' + model_name):
        os.mkdir('models/' + model_name)
        
    pipeline = train_xgb_model(data_src, df, model_name)
    save_model_and_metrics(pipeline, model_name, data_src, df)
    

Starting:
xgb tripadvisor_hotels_ukraine


Iteration No: 1 started. Evaluating function at random point.
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'ngrams': 2, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118}
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118, 'vect__ngram_range': (1, 2)}
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 26.6071
Function value obtained: -0.5572
Current minimum: -0.5572
Iteration No: 2 started. Evaluating function at random point.
{'vect__max_df': 0.7681640736450283, 'vect__min_df': 6, 'ngrams': 1, 'clf__max_depth': 10, 'clf__gamma': 5.23115935

Iteration No: 11 ended. Search finished for the next optimal point.
Time taken: 21.0237
Function value obtained: -0.5610
Current minimum: -0.5616
Iteration No: 12 started. Searching for the next optimal point.
{'vect__max_df': 0.814925020860342, 'vect__min_df': 33, 'ngrams': 1, 'clf__max_depth': 10, 'clf__gamma': 6.767455436935602, 'clf__reg_alpha': 66, 'clf__reg_lambda': 0.905949695527239, 'clf__colsample_bytree': 0.7413145415601301, 'clf__min_child_weight': 3, 'clf__n_estimators': 36}
{'vect__max_df': 0.814925020860342, 'vect__min_df': 33, 'clf__max_depth': 10, 'clf__gamma': 6.767455436935602, 'clf__reg_alpha': 66, 'clf__reg_lambda': 0.905949695527239, 'clf__colsample_bytree': 0.7413145415601301, 'clf__min_child_weight': 3, 'clf__n_estimators': 36, 'vect__ngram_range': (1, 1)}
Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 7.6783
Function value obtained: -0.5589
Current minimum: -0.5616
Iteration No: 13 started. Searching for the next optimal point.
{

Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 4.9152
Function value obtained: -0.5539
Current minimum: -0.5619
Iteration No: 23 started. Searching for the next optimal point.
{'vect__max_df': 0.7566557337199771, 'vect__min_df': 8, 'ngrams': 1, 'clf__max_depth': 4, 'clf__gamma': 1.635023709548884, 'clf__reg_alpha': 75, 'clf__reg_lambda': 0.8273793555385663, 'clf__colsample_bytree': 0.9781356848784108, 'clf__min_child_weight': 2, 'clf__n_estimators': 41}
{'vect__max_df': 0.7566557337199771, 'vect__min_df': 8, 'clf__max_depth': 4, 'clf__gamma': 1.635023709548884, 'clf__reg_alpha': 75, 'clf__reg_lambda': 0.8273793555385663, 'clf__colsample_bytree': 0.9781356848784108, 'clf__min_child_weight': 2, 'clf__n_estimators': 41, 'vect__ngram_range': (1, 1)}
Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 5.5389
Function value obtained: -0.5582
Current minimum: -0.5619
Iteration No: 24 started. Searching for the next optimal point.
{'

Iteration No: 33 ended. Search finished for the next optimal point.
Time taken: 11.1510
Function value obtained: -0.5592
Current minimum: -0.5619
Iteration No: 34 started. Searching for the next optimal point.
{'vect__max_df': 0.9075898579578658, 'vect__min_df': 36, 'ngrams': 1, 'clf__max_depth': 3, 'clf__gamma': 4.190367146778331, 'clf__reg_alpha': 50, 'clf__reg_lambda': 0.6062890225960446, 'clf__colsample_bytree': 0.8985338813078849, 'clf__min_child_weight': 0, 'clf__n_estimators': 59}
{'vect__max_df': 0.9075898579578658, 'vect__min_df': 36, 'clf__max_depth': 3, 'clf__gamma': 4.190367146778331, 'clf__reg_alpha': 50, 'clf__reg_lambda': 0.6062890225960446, 'clf__colsample_bytree': 0.8985338813078849, 'clf__min_child_weight': 0, 'clf__n_estimators': 59, 'vect__ngram_range': (1, 1)}
Iteration No: 34 ended. Search finished for the next optimal point.
Time taken: 5.4285
Function value obtained: -0.5584
Current minimum: -0.5619
Iteration No: 35 started. Searching for the next optimal point.

Iteration No: 44 ended. Search finished for the next optimal point.
Time taken: 10.6673
Function value obtained: -0.5614
Current minimum: -0.5619
Iteration No: 45 started. Searching for the next optimal point.
{'vect__max_df': 0.7286642788935723, 'vect__min_df': 41, 'ngrams': 1, 'clf__max_depth': 6, 'clf__gamma': 2.8881338917178687, 'clf__reg_alpha': 104, 'clf__reg_lambda': 0.653481037158875, 'clf__colsample_bytree': 0.9988397163487226, 'clf__min_child_weight': 0, 'clf__n_estimators': 92}
{'vect__max_df': 0.7286642788935723, 'vect__min_df': 41, 'clf__max_depth': 6, 'clf__gamma': 2.8881338917178687, 'clf__reg_alpha': 104, 'clf__reg_lambda': 0.653481037158875, 'clf__colsample_bytree': 0.9988397163487226, 'clf__min_child_weight': 0, 'clf__n_estimators': 92, 'vect__ngram_range': (1, 1)}
Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 15.4883
Function value obtained: -0.5592
Current minimum: -0.5619
Iteration No: 46 started. Searching for the next optimal poi

Iteration No: 55 ended. Search finished for the next optimal point.
Time taken: 7.7125
Function value obtained: -0.5594
Current minimum: -0.5619
Iteration No: 56 started. Searching for the next optimal point.
{'vect__max_df': 0.8272328180412019, 'vect__min_df': 46, 'ngrams': 1, 'clf__max_depth': 3, 'clf__gamma': 1.9771753518438617, 'clf__reg_alpha': 82, 'clf__reg_lambda': 0.34542562799218685, 'clf__colsample_bytree': 0.9068387213537451, 'clf__min_child_weight': 1, 'clf__n_estimators': 269}
{'vect__max_df': 0.8272328180412019, 'vect__min_df': 46, 'clf__max_depth': 3, 'clf__gamma': 1.9771753518438617, 'clf__reg_alpha': 82, 'clf__reg_lambda': 0.34542562799218685, 'clf__colsample_bytree': 0.9068387213537451, 'clf__min_child_weight': 1, 'clf__n_estimators': 269, 'vect__ngram_range': (1, 1)}
Iteration No: 56 ended. Search finished for the next optimal point.
Time taken: 21.5116
Function value obtained: -0.5580
Current minimum: -0.5619
Iteration No: 57 started. Searching for the next optimal 

Iteration No: 66 ended. Search finished for the next optimal point.
Time taken: 5.2716
Function value obtained: -0.5612
Current minimum: -0.5619
Iteration No: 67 started. Searching for the next optimal point.
{'vect__max_df': 0.8438356508106002, 'vect__min_df': 45, 'ngrams': 1, 'clf__max_depth': 3, 'clf__gamma': 2.348574691769236, 'clf__reg_alpha': 40, 'clf__reg_lambda': 0.9987689523037375, 'clf__colsample_bytree': 0.5125818253742627, 'clf__min_child_weight': 5, 'clf__n_estimators': 87}
{'vect__max_df': 0.8438356508106002, 'vect__min_df': 45, 'clf__max_depth': 3, 'clf__gamma': 2.348574691769236, 'clf__reg_alpha': 40, 'clf__reg_lambda': 0.9987689523037375, 'clf__colsample_bytree': 0.5125818253742627, 'clf__min_child_weight': 5, 'clf__n_estimators': 87, 'vect__ngram_range': (1, 1)}
Iteration No: 67 ended. Search finished for the next optimal point.
Time taken: 4.9974
Function value obtained: -0.5626
Current minimum: -0.5626
Iteration No: 68 started. Searching for the next optimal point.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Starting:
xgb tripadvisor_restaurants_ukraine


Iteration No: 1 started. Evaluating function at random point.
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'ngrams': 2, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118}
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118, 'vect__ngram_range': (1, 2)}


  % sorted(inconsistent)


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 42.4426
Function value obtained: -0.5897
Current minimum: -0.5897
Iteration No: 2 started. Evaluating function at random point.
{'vect__max_df': 0.7681640736450283, 'vect__min_df': 6, 'ngrams': 1, 'clf__max_depth': 10, 'clf__gamma': 5.231159358023237, 'clf__reg_alpha': 128, 'clf__reg_lambda': 0.39278479610082984, 'clf__colsample_bytree': 0.9180393817686889, 'clf__min_child_weight': 8, 'clf__n_estimators': 295}
{'vect__max_df': 0.7681640736450283, 'vect__min_df': 6, 'clf__max_depth': 10, 'clf__gamma': 5.231159358023237, 'clf__reg_alpha': 128, 'clf__reg_lambda': 0.39278479610082984, 'clf__colsample_bytree': 0.9180393817686889, 'clf__min_child_weight': 8, 'clf__n_estimators': 295, 'vect__ngram_range': (1, 1)}
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 160.1328
Function value obtained: -0.5889
Current minimum: -0.5897
Iteration No: 3 started. Evaluating function at random point.
{'vect__max_df': 0.

Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 68.2915
Function value obtained: -0.5917
Current minimum: -0.5934
Iteration No: 13 started. Searching for the next optimal point.
{'vect__max_df': 0.9222782459525762, 'vect__min_df': 48, 'ngrams': 1, 'clf__max_depth': 18, 'clf__gamma': 8.124691520308847, 'clf__reg_alpha': 44, 'clf__reg_lambda': 0.39049811287960245, 'clf__colsample_bytree': 0.8794330854058747, 'clf__min_child_weight': 9, 'clf__n_estimators': 120}
{'vect__max_df': 0.9222782459525762, 'vect__min_df': 48, 'clf__max_depth': 18, 'clf__gamma': 8.124691520308847, 'clf__reg_alpha': 44, 'clf__reg_lambda': 0.39049811287960245, 'clf__colsample_bytree': 0.8794330854058747, 'clf__min_child_weight': 9, 'clf__n_estimators': 120, 'vect__ngram_range': (1, 1)}
Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 103.9132
Function value obtained: -0.5929
Current minimum: -0.5934
Iteration No: 14 started. Searching for the next optima

Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 79.5046
Function value obtained: -0.5926
Current minimum: -0.5950
Iteration No: 24 started. Searching for the next optimal point.
{'vect__max_df': 0.9421167455707857, 'vect__min_df': 46, 'ngrams': 2, 'clf__max_depth': 4, 'clf__gamma': 1.3680143915973053, 'clf__reg_alpha': 62, 'clf__reg_lambda': 0.4816013304493978, 'clf__colsample_bytree': 0.8727158559247455, 'clf__min_child_weight': 9, 'clf__n_estimators': 165}
{'vect__max_df': 0.9421167455707857, 'vect__min_df': 46, 'clf__max_depth': 4, 'clf__gamma': 1.3680143915973053, 'clf__reg_alpha': 62, 'clf__reg_lambda': 0.4816013304493978, 'clf__colsample_bytree': 0.8727158559247455, 'clf__min_child_weight': 9, 'clf__n_estimators': 165, 'vect__ngram_range': (1, 2)}
Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 43.7865
Function value obtained: -0.5919
Current minimum: -0.5950
Iteration No: 25 started. Searching for the next optimal p

Iteration No: 34 ended. Search finished for the next optimal point.
Time taken: 111.2635
Function value obtained: -0.5997
Current minimum: -0.5997
Iteration No: 35 started. Searching for the next optimal point.
{'vect__max_df': 0.7261187254265539, 'vect__min_df': 16, 'ngrams': 1, 'clf__max_depth': 25, 'clf__gamma': 1.188761259892849, 'clf__reg_alpha': 101, 'clf__reg_lambda': 0.718250537986963, 'clf__colsample_bytree': 0.7103444702086815, 'clf__min_child_weight': 3, 'clf__n_estimators': 91}
{'vect__max_df': 0.7261187254265539, 'vect__min_df': 16, 'clf__max_depth': 25, 'clf__gamma': 1.188761259892849, 'clf__reg_alpha': 101, 'clf__reg_lambda': 0.718250537986963, 'clf__colsample_bytree': 0.7103444702086815, 'clf__min_child_weight': 3, 'clf__n_estimators': 91, 'vect__ngram_range': (1, 1)}
Iteration No: 35 ended. Search finished for the next optimal point.
Time taken: 87.7834
Function value obtained: -0.5976
Current minimum: -0.5997
Iteration No: 36 started. Searching for the next optimal po

Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 64.7614
Function value obtained: -0.5974
Current minimum: -0.5997
Iteration No: 46 started. Searching for the next optimal point.
{'vect__max_df': 0.8219698064999482, 'vect__min_df': 21, 'ngrams': 2, 'clf__max_depth': 15, 'clf__gamma': 1.2014251946064904, 'clf__reg_alpha': 42, 'clf__reg_lambda': 0.191880904842391, 'clf__colsample_bytree': 0.8497241551897272, 'clf__min_child_weight': 10, 'clf__n_estimators': 79}
{'vect__max_df': 0.8219698064999482, 'vect__min_df': 21, 'clf__max_depth': 15, 'clf__gamma': 1.2014251946064904, 'clf__reg_alpha': 42, 'clf__reg_lambda': 0.191880904842391, 'clf__colsample_bytree': 0.8497241551897272, 'clf__min_child_weight': 10, 'clf__n_estimators': 79, 'vect__ngram_range': (1, 2)}
Iteration No: 46 ended. Search finished for the next optimal point.
Time taken: 70.5331
Function value obtained: -0.5985
Current minimum: -0.5997
Iteration No: 47 started. Searching for the next optimal p

Iteration No: 56 ended. Search finished for the next optimal point.
Time taken: 46.5868
Function value obtained: -0.5946
Current minimum: -0.5997
Iteration No: 57 started. Searching for the next optimal point.
{'vect__max_df': 0.9476913238616362, 'vect__min_df': 25, 'ngrams': 1, 'clf__max_depth': 25, 'clf__gamma': 4.261962602442782, 'clf__reg_alpha': 60, 'clf__reg_lambda': 0.8787240317142794, 'clf__colsample_bytree': 0.5574300320439686, 'clf__min_child_weight': 4, 'clf__n_estimators': 185}
{'vect__max_df': 0.9476913238616362, 'vect__min_df': 25, 'clf__max_depth': 25, 'clf__gamma': 4.261962602442782, 'clf__reg_alpha': 60, 'clf__reg_lambda': 0.8787240317142794, 'clf__colsample_bytree': 0.5574300320439686, 'clf__min_child_weight': 4, 'clf__n_estimators': 185, 'vect__ngram_range': (1, 1)}
Iteration No: 57 ended. Search finished for the next optimal point.
Time taken: 165.2195
Function value obtained: -0.5946
Current minimum: -0.5997
Iteration No: 58 started. Searching for the next optimal 

Iteration No: 67 ended. Search finished for the next optimal point.
Time taken: 313.8682
Function value obtained: -0.5978
Current minimum: -0.5997
Iteration No: 68 started. Searching for the next optimal point.
{'vect__max_df': 0.829783349493806, 'vect__min_df': 5, 'ngrams': 2, 'clf__max_depth': 8, 'clf__gamma': 1.1935312046519906, 'clf__reg_alpha': 51, 'clf__reg_lambda': 0.8334325657379361, 'clf__colsample_bytree': 0.7594046206079421, 'clf__min_child_weight': 10, 'clf__n_estimators': 62}
{'vect__max_df': 0.829783349493806, 'vect__min_df': 5, 'clf__max_depth': 8, 'clf__gamma': 1.1935312046519906, 'clf__reg_alpha': 51, 'clf__reg_lambda': 0.8334325657379361, 'clf__colsample_bytree': 0.7594046206079421, 'clf__min_child_weight': 10, 'clf__n_estimators': 62, 'vect__ngram_range': (1, 2)}
Iteration No: 68 ended. Search finished for the next optimal point.
Time taken: 33.2077
Function value obtained: -0.5942
Current minimum: -0.5997
Iteration No: 69 started. Searching for the next optimal poin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Starting:
xgb all_data


Iteration No: 1 started. Evaluating function at random point.
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'ngrams': 2, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118}
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118, 'vect__ngram_range': (1, 2)}


  % sorted(inconsistent)


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 142.1214
Function value obtained: -0.6626
Current minimum: -0.6626
Iteration No: 2 started. Evaluating function at random point.
{'vect__max_df': 0.7681640736450283, 'vect__min_df': 6, 'ngrams': 1, 'clf__max_depth': 10, 'clf__gamma': 5.231159358023237, 'clf__reg_alpha': 128, 'clf__reg_lambda': 0.39278479610082984, 'clf__colsample_bytree': 0.9180393817686889, 'clf__min_child_weight': 8, 'clf__n_estimators': 295}
{'vect__max_df': 0.7681640736450283, 'vect__min_df': 6, 'clf__max_depth': 10, 'clf__gamma': 5.231159358023237, 'clf__reg_alpha': 128, 'clf__reg_lambda': 0.39278479610082984, 'clf__colsample_bytree': 0.9180393817686889, 'clf__min_child_weight': 8, 'clf__n_estimators': 295, 'vect__ngram_range': (1, 1)}
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 569.0946
Function value obtained: -0.6610
Current minimum: -0.6626
Iteration No: 3 started. Evaluating function at random point.
{'vect__max_df': 0

Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 66.5493
Function value obtained: -0.6595
Current minimum: -0.6650
Iteration No: 13 started. Searching for the next optimal point.
{'vect__max_df': 0.9106907110454832, 'vect__min_df': 45, 'ngrams': 1, 'clf__max_depth': 7, 'clf__gamma': 1.3251216622299138, 'clf__reg_alpha': 52, 'clf__reg_lambda': 0.9911141692408407, 'clf__colsample_bytree': 0.8482814870900379, 'clf__min_child_weight': 1, 'clf__n_estimators': 80}
{'vect__max_df': 0.9106907110454832, 'vect__min_df': 45, 'clf__max_depth': 7, 'clf__gamma': 1.3251216622299138, 'clf__reg_alpha': 52, 'clf__reg_lambda': 0.9911141692408407, 'clf__colsample_bytree': 0.8482814870900379, 'clf__min_child_weight': 1, 'clf__n_estimators': 80, 'vect__ngram_range': (1, 1)}
Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 107.2587
Function value obtained: -0.6635
Current minimum: -0.6650
Iteration No: 14 started. Searching for the next optimal po

Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 152.2080
Function value obtained: -0.6637
Current minimum: -0.6650
Iteration No: 24 started. Searching for the next optimal point.
{'vect__max_df': 0.8206470871935969, 'vect__min_df': 18, 'ngrams': 1, 'clf__max_depth': 25, 'clf__gamma': 5.132707396612013, 'clf__reg_alpha': 98, 'clf__reg_lambda': 0.039290848363026216, 'clf__colsample_bytree': 0.8515942052327836, 'clf__min_child_weight': 9, 'clf__n_estimators': 199}
{'vect__max_df': 0.8206470871935969, 'vect__min_df': 18, 'clf__max_depth': 25, 'clf__gamma': 5.132707396612013, 'clf__reg_alpha': 98, 'clf__reg_lambda': 0.039290848363026216, 'clf__colsample_bytree': 0.8515942052327836, 'clf__min_child_weight': 9, 'clf__n_estimators': 199, 'vect__ngram_range': (1, 1)}
Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 868.8927
Function value obtained: -0.6643
Current minimum: -0.6650
Iteration No: 25 started. Searching for the next opt

Iteration No: 34 ended. Search finished for the next optimal point.
Time taken: 713.7370
Function value obtained: -0.6655
Current minimum: -0.6695
Iteration No: 35 started. Searching for the next optimal point.
{'vect__max_df': 0.932980058238462, 'vect__min_df': 36, 'ngrams': 2, 'clf__max_depth': 24, 'clf__gamma': 2.4331836355598133, 'clf__reg_alpha': 180, 'clf__reg_lambda': 0.2194098058406228, 'clf__colsample_bytree': 0.578529820676607, 'clf__min_child_weight': 2, 'clf__n_estimators': 223}
{'vect__max_df': 0.932980058238462, 'vect__min_df': 36, 'clf__max_depth': 24, 'clf__gamma': 2.4331836355598133, 'clf__reg_alpha': 180, 'clf__reg_lambda': 0.2194098058406228, 'clf__colsample_bytree': 0.578529820676607, 'clf__min_child_weight': 2, 'clf__n_estimators': 223, 'vect__ngram_range': (1, 2)}
Iteration No: 35 ended. Search finished for the next optimal point.
Time taken: 695.7389
Function value obtained: -0.6645
Current minimum: -0.6695
Iteration No: 36 started. Searching for the next optimal

Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 283.9174
Function value obtained: -0.6676
Current minimum: -0.6695
Iteration No: 46 started. Searching for the next optimal point.
{'vect__max_df': 0.7281971528807554, 'vect__min_df': 18, 'ngrams': 2, 'clf__max_depth': 13, 'clf__gamma': 1.851513264194445, 'clf__reg_alpha': 41, 'clf__reg_lambda': 0.3036383988722106, 'clf__colsample_bytree': 0.7894018227026649, 'clf__min_child_weight': 0, 'clf__n_estimators': 297}
{'vect__max_df': 0.7281971528807554, 'vect__min_df': 18, 'clf__max_depth': 13, 'clf__gamma': 1.851513264194445, 'clf__reg_alpha': 41, 'clf__reg_lambda': 0.3036383988722106, 'clf__colsample_bytree': 0.7894018227026649, 'clf__min_child_weight': 0, 'clf__n_estimators': 297, 'vect__ngram_range': (1, 2)}
Iteration No: 46 ended. Search finished for the next optimal point.
Time taken: 895.8397
Function value obtained: -0.6688
Current minimum: -0.6695
Iteration No: 47 started. Searching for the next optimal

Iteration No: 56 ended. Search finished for the next optimal point.
Time taken: 419.5507
Function value obtained: -0.6664
Current minimum: -0.6695
Iteration No: 57 started. Searching for the next optimal point.
{'vect__max_df': 0.8437035279555534, 'vect__min_df': 18, 'ngrams': 1, 'clf__max_depth': 23, 'clf__gamma': 1.0397234109625586, 'clf__reg_alpha': 40, 'clf__reg_lambda': 0.7975286545913046, 'clf__colsample_bytree': 0.9051021554151271, 'clf__min_child_weight': 6, 'clf__n_estimators': 50}
{'vect__max_df': 0.8437035279555534, 'vect__min_df': 18, 'clf__max_depth': 23, 'clf__gamma': 1.0397234109625586, 'clf__reg_alpha': 40, 'clf__reg_lambda': 0.7975286545913046, 'clf__colsample_bytree': 0.9051021554151271, 'clf__min_child_weight': 6, 'clf__n_estimators': 50, 'vect__ngram_range': (1, 1)}
Iteration No: 57 ended. Search finished for the next optimal point.
Time taken: 222.8991
Function value obtained: -0.6674
Current minimum: -0.6695
Iteration No: 58 started. Searching for the next optimal

Iteration No: 67 ended. Search finished for the next optimal point.
Time taken: 442.9033
Function value obtained: -0.6659
Current minimum: -0.6695
Iteration No: 68 started. Searching for the next optimal point.
{'vect__max_df': 0.7131473165148869, 'vect__min_df': 32, 'ngrams': 2, 'clf__max_depth': 9, 'clf__gamma': 1.4821550599008182, 'clf__reg_alpha': 59, 'clf__reg_lambda': 0.4952717188129123, 'clf__colsample_bytree': 0.7618023225753111, 'clf__min_child_weight': 0, 'clf__n_estimators': 182}
{'vect__max_df': 0.7131473165148869, 'vect__min_df': 32, 'clf__max_depth': 9, 'clf__gamma': 1.4821550599008182, 'clf__reg_alpha': 59, 'clf__reg_lambda': 0.4952717188129123, 'clf__colsample_bytree': 0.7618023225753111, 'clf__min_child_weight': 0, 'clf__n_estimators': 182, 'vect__ngram_range': (1, 2)}
Iteration No: 68 ended. Search finished for the next optimal point.
Time taken: 341.7247
Function value obtained: -0.6661
Current minimum: -0.6695
Iteration No: 69 started. Searching for the next optimal

Starting:
xgb rozetka


Iteration No: 1 started. Evaluating function at random point.
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'ngrams': 2, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118}
{'vect__max_df': 0.8482111545562545, 'vect__min_df': 5, 'clf__max_depth': 6, 'clf__gamma': 7.778013910273005, 'clf__reg_alpha': 61, 'clf__reg_lambda': 0.38438170729269994, 'clf__colsample_bytree': 0.6487673032722362, 'clf__min_child_weight': 8, 'clf__n_estimators': 118, 'vect__ngram_range': (1, 2)}


  % sorted(inconsistent)


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 66.8401
Function value obtained: -0.7190
Current minimum: -0.7190
Iteration No: 2 started. Evaluating function at random point.
{'vect__max_df': 0.7681640736450283, 'vect__min_df': 6, 'ngrams': 1, 'clf__max_depth': 10, 'clf__gamma': 5.231159358023237, 'clf__reg_alpha': 128, 'clf__reg_lambda': 0.39278479610082984, 'clf__colsample_bytree': 0.9180393817686889, 'clf__min_child_weight': 8, 'clf__n_estimators': 295}
{'vect__max_df': 0.7681640736450283, 'vect__min_df': 6, 'clf__max_depth': 10, 'clf__gamma': 5.231159358023237, 'clf__reg_alpha': 128, 'clf__reg_lambda': 0.39278479610082984, 'clf__colsample_bytree': 0.9180393817686889, 'clf__min_child_weight': 8, 'clf__n_estimators': 295, 'vect__ngram_range': (1, 1)}
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 262.3438
Function value obtained: -0.7181
Current minimum: -0.7190
Iteration No: 3 started. Evaluating function at random point.
{'vect__max_df': 0.

Iteration No: 12 ended. Search finished for the next optimal point.
Time taken: 86.3727
Function value obtained: -0.7184
Current minimum: -0.7203
Iteration No: 13 started. Searching for the next optimal point.
{'vect__max_df': 0.8625198955398626, 'vect__min_df': 20, 'ngrams': 2, 'clf__max_depth': 8, 'clf__gamma': 3.3749523132974386, 'clf__reg_alpha': 50, 'clf__reg_lambda': 0.6234872888982677, 'clf__colsample_bytree': 0.8106775979980763, 'clf__min_child_weight': 1, 'clf__n_estimators': 243}
{'vect__max_df': 0.8625198955398626, 'vect__min_df': 20, 'clf__max_depth': 8, 'clf__gamma': 3.3749523132974386, 'clf__reg_alpha': 50, 'clf__reg_lambda': 0.6234872888982677, 'clf__colsample_bytree': 0.8106775979980763, 'clf__min_child_weight': 1, 'clf__n_estimators': 243, 'vect__ngram_range': (1, 2)}
Iteration No: 13 ended. Search finished for the next optimal point.
Time taken: 202.9822
Function value obtained: -0.7202
Current minimum: -0.7203
Iteration No: 14 started. Searching for the next optimal 

Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 146.4732
Function value obtained: -0.7190
Current minimum: -0.7204
Iteration No: 24 started. Searching for the next optimal point.
{'vect__max_df': 0.8657395840922868, 'vect__min_df': 30, 'ngrams': 2, 'clf__max_depth': 15, 'clf__gamma': 1.3233122433062174, 'clf__reg_alpha': 100, 'clf__reg_lambda': 0.5949075879467026, 'clf__colsample_bytree': 0.5080706841686089, 'clf__min_child_weight': 0, 'clf__n_estimators': 118}
{'vect__max_df': 0.8657395840922868, 'vect__min_df': 30, 'clf__max_depth': 15, 'clf__gamma': 1.3233122433062174, 'clf__reg_alpha': 100, 'clf__reg_lambda': 0.5949075879467026, 'clf__colsample_bytree': 0.5080706841686089, 'clf__min_child_weight': 0, 'clf__n_estimators': 118, 'vect__ngram_range': (1, 2)}
Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 112.3301
Function value obtained: -0.7194
Current minimum: -0.7204
Iteration No: 25 started. Searching for the next opt

Iteration No: 34 ended. Search finished for the next optimal point.
Time taken: 186.3960
Function value obtained: -0.7194
Current minimum: -0.7205
Iteration No: 35 started. Searching for the next optimal point.
{'vect__max_df': 0.8974910416959262, 'vect__min_df': 48, 'ngrams': 2, 'clf__max_depth': 18, 'clf__gamma': 8.039590902017368, 'clf__reg_alpha': 101, 'clf__reg_lambda': 0.9839021417880869, 'clf__colsample_bytree': 0.5907622437991593, 'clf__min_child_weight': 0, 'clf__n_estimators': 106}
{'vect__max_df': 0.8974910416959262, 'vect__min_df': 48, 'clf__max_depth': 18, 'clf__gamma': 8.039590902017368, 'clf__reg_alpha': 101, 'clf__reg_lambda': 0.9839021417880869, 'clf__colsample_bytree': 0.5907622437991593, 'clf__min_child_weight': 0, 'clf__n_estimators': 106, 'vect__ngram_range': (1, 2)}
Iteration No: 35 ended. Search finished for the next optimal point.
Time taken: 139.6562
Function value obtained: -0.7183
Current minimum: -0.7205
Iteration No: 36 started. Searching for the next optim

Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 323.1193
Function value obtained: -0.7192
Current minimum: -0.7216
Iteration No: 46 started. Searching for the next optimal point.
{'vect__max_df': 0.7728532904416603, 'vect__min_df': 49, 'ngrams': 2, 'clf__max_depth': 22, 'clf__gamma': 3.2319125407319373, 'clf__reg_alpha': 159, 'clf__reg_lambda': 0.6774795478141792, 'clf__colsample_bytree': 0.7996129994988088, 'clf__min_child_weight': 7, 'clf__n_estimators': 282}
{'vect__max_df': 0.7728532904416603, 'vect__min_df': 49, 'clf__max_depth': 22, 'clf__gamma': 3.2319125407319373, 'clf__reg_alpha': 159, 'clf__reg_lambda': 0.6774795478141792, 'clf__colsample_bytree': 0.7996129994988088, 'clf__min_child_weight': 7, 'clf__n_estimators': 282, 'vect__ngram_range': (1, 2)}
Iteration No: 46 ended. Search finished for the next optimal point.
Time taken: 389.8802
Function value obtained: -0.7206
Current minimum: -0.7216
Iteration No: 47 started. Searching for the next opt

Iteration No: 56 ended. Search finished for the next optimal point.
Time taken: 411.0289
Function value obtained: -0.7201
Current minimum: -0.7216
Iteration No: 57 started. Searching for the next optimal point.
{'vect__max_df': 0.7258111452024905, 'vect__min_df': 14, 'ngrams': 2, 'clf__max_depth': 6, 'clf__gamma': 2.81731555204806, 'clf__reg_alpha': 120, 'clf__reg_lambda': 0.22680364056783336, 'clf__colsample_bytree': 0.9539441368382089, 'clf__min_child_weight': 10, 'clf__n_estimators': 249}
{'vect__max_df': 0.7258111452024905, 'vect__min_df': 14, 'clf__max_depth': 6, 'clf__gamma': 2.81731555204806, 'clf__reg_alpha': 120, 'clf__reg_lambda': 0.22680364056783336, 'clf__colsample_bytree': 0.9539441368382089, 'clf__min_child_weight': 10, 'clf__n_estimators': 249, 'vect__ngram_range': (1, 2)}
Iteration No: 57 ended. Search finished for the next optimal point.
Time taken: 161.2448
Function value obtained: -0.7183
Current minimum: -0.7216
Iteration No: 58 started. Searching for the next optim

Iteration No: 67 ended. Search finished for the next optimal point.
Time taken: 98.7803
Function value obtained: -0.7201
Current minimum: -0.7216
Iteration No: 68 started. Searching for the next optimal point.
{'vect__max_df': 0.7082256223126163, 'vect__min_df': 26, 'ngrams': 2, 'clf__max_depth': 18, 'clf__gamma': 5.203028175515531, 'clf__reg_alpha': 146, 'clf__reg_lambda': 0.4172728943094559, 'clf__colsample_bytree': 0.5464515375954189, 'clf__min_child_weight': 7, 'clf__n_estimators': 265}
{'vect__max_df': 0.7082256223126163, 'vect__min_df': 26, 'clf__max_depth': 18, 'clf__gamma': 5.203028175515531, 'clf__reg_alpha': 146, 'clf__reg_lambda': 0.4172728943094559, 'clf__colsample_bytree': 0.5464515375954189, 'clf__min_child_weight': 7, 'clf__n_estimators': 265, 'vect__ngram_range': (1, 2)}
Iteration No: 68 ended. Search finished for the next optimal point.
Time taken: 282.2062
Function value obtained: -0.7182
Current minimum: -0.7216
Iteration No: 69 started. Searching for the next optima

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
