In [65]:
import pandas as pd
import numpy as np
import re
import sklearn.metrics as metrics
import pymorphy2
import nltk
from bs4 import BeautifulSoup

In [45]:
from scipy.spatial.distance import cdist
from scipy import sparse
from functools import reduce
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split, cross_validate, KFold, cross_val_score, GroupKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import SGDClassifier
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from gensim.summarization.bm25 import get_bm25_weights, iter_bm25_bow
from sklearn.cluster import DBSCAN
from sklearn.ensemble import IsolationForest 
from sklearn.cluster import AgglomerativeClustering

In [3]:
def get_df():
    
    titles_df = pd.read_csv('./data/docs_titles.tsv/docs_titles.tsv', sep='\t')
    docs_id_test = pd.read_csv('./data/test_groups.csv', sep=',')
    docs_id_train = pd.read_csv('./data/train_groups.csv', sep=',')

    info = pd.concat([docs_id_train, docs_id_test])
    info.reset_index(drop=True)

    titles = pd.merge(titles_df, info[['group_id', 'doc_id', 'target']], on='doc_id', how='inner')
    titles['title'] += ' '
    titles['title'].fillna(' ', inplace=True)
    
    return titles

In [4]:
def titles_extraction():
    
    titles = get_df();

    titles = titles[['title', 'group_id']].groupby('group_id').sum()['title']

    titles = titles.apply(lambda x: cleaner(x))
    titles = titles.values

    return titles

In [5]:
def cleaning(titles, group_num, bad_words):
    titles = titles.lower()
    titles = re.sub(r'\W', '  ', titles)
    
    for i in bad_words[group_num]:
            titles = titles.replace(i, ' ')
    titles = re.sub(r'\s+', ' ', titles)
    return titles

In [6]:
# стеммниг для русского языка (взял в интернете)
class Porter:
    PERFECTIVEGROUND =  re.compile(u"((ив|ивши|ившись|ыв|ывши|ывшись)|((?<=[ая])(в|вши|вшись)))$")
    REFLEXIVE = re.compile(u"(с[яь])$")
    ADJECTIVE = re.compile(u"(ее|ие|ые|ое|ими|ыми|ей|ий|ый|ой|ем|им|ым|ом|его|ого|ему|ому|их|ых|ую|юю|ая|яя|ою|ею)$")
    PARTICIPLE = re.compile(u"((ивш|ывш|ующ)|((?<=[ая])(ем|нн|вш|ющ|щ)))$")
    VERB = re.compile(u"((ила|ыла|ена|ейте|уйте|ите|или|ыли|ей|уй|ил|ыл|им|ым|ен|ило|ыло|ено|ят|ует|уют|ит|ыт|ены|ить|ыть|ишь|ую|ю)|((?<=[ая])(ла|на|ете|йте|ли|й|л|ем|н|ло|но|ет|ют|ны|ть|ешь|нно)))$")
    NOUN = re.compile(u"(а|ев|ов|ие|ье|е|иями|ями|ами|еи|ии|и|ией|ей|ой|ий|й|иям|ям|ием|ем|ам|ом|о|у|ах|иях|ях|ы|ь|ию|ью|ю|ия|ья|я)$")
    RVRE = re.compile(u"^(.*?[аеиоуыэюя])(.*)$")
    DERIVATIONAL = re.compile(u".*[^аеиоуыэюя]+[аеиоуыэюя].*ость?$")
    DER = re.compile(u"ость?$")
    SUPERLATIVE = re.compile(u"(ейше|ейш)$")
    I = re.compile(u"и$")
    P = re.compile(u"ь$")
    NN = re.compile(u"нн$")

    def stem(string):
        
        changed = ''
        
        string = string.lower()

        for word in string.split():
            if not word.isdigit():
                word = word.replace(u'ё', u'е')
                m = re.match(Porter.RVRE, word)

                if m and m.groups():
                    pre = m.group(1)
                    rv = m.group(2)
                    temp = Porter.PERFECTIVEGROUND.sub('', rv, 1)
                    if temp == rv:
                        rv = Porter.REFLEXIVE.sub('', rv, 1)
                        temp = Porter.ADJECTIVE.sub('', rv, 1)
                        if temp != rv:
                            rv = temp
                            rv = Porter.PARTICIPLE.sub('', rv, 1)
                        else:
                            temp = Porter.VERB.sub('', rv, 1)
                            if temp == rv:
                                rv = Porter.NOUN.sub('', rv, 1)
                            else:
                                rv = temp
                    else:
                        rv = temp

                    rv = Porter.I.sub('', rv, 1)

                    if re.match(Porter.DERIVATIONAL, rv):
                        rv = Porter.DER.sub('', rv, 1)

                    temp = Porter.P.sub('', rv, 1)
                    if temp == rv:
                        rv = Porter.SUPERLATIVE.sub('', rv, 1)
                        rv = Porter.NN.sub(u'н', rv, 1)
                    else:
                        rv = temp
                    word = pre+rv

            changed += word + ' '

        return changed

In [7]:
# эта и 3 следующие функции используются для подсчета расстояний без тф идф и косинусной метрики

def features_create(mode, groups_titledata):
    
    if mode == 'train':
        y = []      
    X = []
    groups_train = []
    
    for new_group in groups_titledata:
        docs = groups_titledata[new_group]
        
        for k, info in enumerate(docs):
            
            doc_id = info[0]
            title = info[1]
            
            if mode == 'train':
                target_id = info[2]
                y.append(target_id)
                
            groups_train.append(new_group)
            all_dist = []
            words = set(title.strip().split())
            
            for j in range(0, len(docs)):
                if k == j:
                    continue
                info = docs[j]
                doc_id_j = info[0]
                title_j = info[1]

                words_j = set(title_j.strip().split())
                all_dist.append(len(words.intersection(words_j)))
                
            X.append(sorted(all_dist, reverse=True)[0:25])
            
    X = np.array(X)
    
    if mode == 'train':
        y = np.array(y)
    
    groups_train = np.array(groups_train)

    if mode == 'train':
        print(X.shape, y.shape, groups_train.shape)
        return X, y, groups_train
    else:
        print(X.shape, groups_train.shape)
        return X, groups_train

In [8]:
def tuple_x(a1, a2):
    return a1, a2

In [9]:
def title_info_dict(mode, doc_to_title):
    
    data = pd.read_csv('./data/{}_groups.csv'.format(mode))

    titledata = {}
    
    for i in range(len(data)):
        
        new_doc = data.iloc[i]
        doc_group = new_doc['group_id']
        doc_id = new_doc['doc_id']
            
        title = doc_to_title[doc_id]
        
        if doc_group not in titledata:
            titledata[doc_group] = []
            
        if mode == 'train':
            titledata[doc_group].append((doc_id, title, new_doc['target']))
        else:
            titledata[doc_group].append((doc_id, title))
        
    return titledata

In [10]:
def easy_launch():
    
    doc_to_title = {}
    with open('./data/unversal_table.csv', encoding = 'utf-8') as f:
        for num_line, line in enumerate(f):
            if num_line == 0:
                continue

            line = line.replace('\t', ',')
            data = line.strip().split(',')

            doc_id = int(data[0])
            if len(data) == 1:
                title = ''
            else:
                title = data[1]
            doc_to_title[doc_id] = title
            
    print('doc titles dict len = {}'.format(len(doc_to_title)))
    
    train_titledata = title_info_dict('train', doc_to_title)
    test_titledata = title_info_dict('test', doc_to_title)
    
    X_train, y_train, groups_train = features_create('train', train_titledata)
    X_test, groups_test = features_create('test', test_titledata)
    
    return X_train, y_train, X_test, groups_train, groups_test

In [11]:
# 3  функции для перебора комбинаций параметров
def flatten(x):
    
    result = []
    for elem in x:
        if hasattr(elem, "__iter__") and not isinstance(elem, str):
            result.extend(flatten(elem))
        else:
            result.append(elem)
            
    return result

In [12]:
def list_concat(list1, list2):
    
    len1 = len(list1)
    len2 = len(list2)
    
    return [[list1[i], list2[j]] for i in range(len1) for j in range(len2)]

In [13]:
def combinations(params):
    
    list_ = []
    for value in params.values():
        list_.append(value)

    tmp = reduce(lambda x, y: list_concat(x, y), list_)

    res = []
    for elem in tmp:
        param_list = flatten(elem)
        param_dict = dict(zip(params.keys(), param_list))
        res.append(param_dict)
        
    return res

In [14]:
def frange(start, stop, step):
    i = start
    while i < stop:
        yield i
        i += step

In [15]:
def validation(X_train, train_target, model, params, folds_gen_func, folds_num=5, thresholds=[0.32], **kwargs):
    
    scaler = StandardScaler()
    scaler.fit(X_train) 
    
    main_res = []
    for param_set in combinations(params):
        
        print(param_set)        
        exact_model = model(**param_set) 
        
        fold_generator = folds_gen_func(folds_num)
        
        for th in thresholds:
#           th = 0.27
            print('th = ', th)
            res = []
            for train_index, test_index in fold_generator.split(X_train, train_target, **kwargs):

                exact_model.fit(scaler.transform(X_train[train_index]), train_target[train_index])

                y_pred = [0 if val < th else 1 for val in exact_model.predict_proba(scaler.transform(X_train[test_index]))[:,1]]

                score = metrics.f1_score(train_target[test_index],\
                                                y_pred)
    #               print('threshold = {}, score = {}'.format(th, score))

                res.append(score)
        #                                       exact_model.predict(scaler.transform(X_train[test_index]))))


            mean = sum(res)/len(res)
            print(mean)
#             print('threshold = {}, score = {}'.format(th, mean))
            main_res.append((mean, param_set, th))
    
    best = main_res[np.argmax([res[0] for res in main_res])]
    print('--------max-------')
    print(best)
    
    return best

In [16]:
# Сохраняет решение

def save_submission(y_pred):

    data = pd.read_csv('data/test_groups.csv')
    print('len data = ', len(data))
    data['target'] = y_pred
    
    data = data.drop(['group_id', 'doc_id'], axis=1)

    data.to_csv("submission.csv", index=False)
    
    info = np.unique(data['target'], return_counts=True)
    
    if info[0].shape[0] > 1:
        
        print('0: {}, 1: {}'.format(info[1][0], info[1][1]))
        if info[1][1] > 6000 or info[1][1] < 2500:
            print('Your submisson is shit')
#         elif info[1][1] > 4500:
#             print('Your submisson is probably shit')
    else:
        print('There are only {} in submission'.format(info[0][0]))
        
    return data

In [17]:
def predict(X_train, X_test, train_target, model, scaler=None, th=0.30, **kwargs):
    
    curr_model = model(**kwargs)
    
    if scaler is not None:
        
        your_scaler = scaler()
        your_scaler.fit(X_train)
        X_train = your_scaler.transform(X_train)
        X_test = your_scaler.transform(X_test)
        
    curr_model.fit(X_train, train_target)
#     y_pred = curr_model.predict(X_test)
#     th = 0.35
    print('Threshold = ', th )#,'Params = ', *kwargs.values)
    y_pred = [0 if val < th else 1 for val in curr_model.predict_proba(X_test)[:,1]]
    return y_pred

In [18]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /Users/misha/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [19]:
morph = pymorphy2.MorphAnalyzer()
lemmatizer = WordNetLemmatizer()

In [20]:
# Лемматизация 

def str_parser(words_): 
    global j 
    new_string = '' 

    for i in re.findall(r'\b[а-я]{1,20}\b', words_): 
        new_string += (morph.parse(i)[0].normal_form) + ' ' 

    for i in re.findall(r'\b[a-z]{1,20}\b', words_): 
        new_string += (lemmatizer.lemmatize(i)) + ' ' 

    j += 1 
    if(j % 1000 == 0): 
        print(j, '/28317 loaded') 

    return new_string

In [21]:
def lemmatization(df, col_name='title'):
    
    return df[col_name].apply(lambda x: str_parser(x))

In [22]:
def stemming_titles(df, col_name='title'):
    stemming = Porter
    return df[col_name].apply(lambda x: stemming.stem(x))

In [23]:
def get_train_test_():

    df_train = pd.read_csv('data/core_train.csv')
    df_test = pd.read_csv('data/core_test.csv')
    df_train.fillna(' ', inplace=True)
    df_test.fillna(' ', inplace=True)
    titles_df = pd.concat([df_train, df_test],ignore_index = True)
    titles_df.drop(columns = {'Unnamed: 0'}, inplace = True)

    return titles_df

In [24]:
# Рассчитывает расстояния между документами берет 25 наименьших и сохраняет numpy ndarray в файл

def features_save_new(vec_type=1, way='tfidf', **kwargs):
    
    df = pd.read_csv('./data/unversal_table.csv')
    df.fillna(' ', inplace=True)
    corpus = df['title'].values
#     corpus = df[df.group_id == group_num]['title'].values
#     print(corpus)
    if way == 'countw':
        
        if vec_type == 1:
            vectorizer = CountVectorizer(max_features=max_f)
            X = vectorizer.fit_transform(corpus)

        elif vec_type ==2:
            vectorizer2 = CountVectorizer(max_features=max_f)
            X = vectorizer2.fit_transform(corpus)
            
    elif way == 'tfidf': 
        
#         vectorizer = TfidfVectorizer(**kwargs)
#         X = vectorizer.fit_transform(corpus)
        X = get_bm25_weights(corpus, n_jobs=-1)
    X = X.toarray()
    
    length = 0
    
    for group_num in np.unique(df['group_id']):
        
        group_length = len(df[df.group_id==group_num])
        group_titles = X[length:length + group_length]
        length += group_length
        
        features = count_distances(group_titles)

        np.save('group_features/{}'.format(group_num), features)
        if group_num % 50 == 0:
            print('Скачалась группа:', group_num)
        
    return features

In [56]:
# Рассчитывает расстояния между документами берет 25 наименьших и сохраняет numpy ndarray в файл

def features_save_new(vec_type=1, way='tfidf', **kwargs):
    
    df = pd.read_csv('./data/unversal_table.csv')
    df.fillna(' ', inplace=True)
    
    corpus = df['title'].values
    lens = df['len'].values
#     s = df['_num'].values
#     hrefs = df['href_num'].values
    
#     corpus = df[df.group_id == group_num]['title'].values
#     print(corpus)
    if way == 'countw':
        
        if vec_type == 1:
            vectorizer = CountVectorizer(max_features=max_f)
            X = vectorizer.fit_transform(corpus)

        elif vec_type ==2:
            vectorizer2 = CountVectorizer(max_features=max_f)
            X = vectorizer2.fit_transform(corpus)
            
    elif way == 'tfidf': 
        
        vectorizer = TfidfVectorizer(**kwargs)
        X = vectorizer.fit_transform(corpus)
#         X = get_bm25_weights(corpus, n_jobs=-1)
    X = X.toarray()
    print(X.shape, lens.shape)
    length = 0
    
    for group_num in np.unique(df['group_id']):
        
        group_length = len(df[df.group_id==group_num])
        
        group_titles = X[length:length + group_length]
        body_len = lens[length:length + group_length]
#         s_num = s[length:length + group_length]
#         hrefs_num = hrefs[length:length + group_length]
        
        length += group_length
        
        features = count_distances(group_titles)

        features = np.hstack((features, body_len[:, np.newaxis]))
        
        np.save('group_features/{}'.format(group_num), features)
        if group_num % 50 == 0:
            print('Скачалась группа:', group_num)
        
    return features

In [26]:
dbscan = DBSCAN(eps = 0.65, metric = 'cosine')
clf = IsolationForest()

In [27]:
# Рассчет расстояний по матрице встречаемости (косинусная метрика)

def count_distances(docs):
#     print(docs)cosine
# correlation
    distance = cdist(docs, docs, 'correlation')
#     distance = cdist(docs, docs, 'jaccard')
#     distance = cdist(docs, docs, 'cosine')
#     print(distance,'----------')
    res = np.asarray([np.concatenate((vec[:num],vec[num+1:])) for num, vec in enumerate(distance)])
    
    stats = np.hstack((np.mean(res, axis=1)[:, np.newaxis], np.std(res, axis=1)[:, np.newaxis], \
                       np.median(res, axis=1)[:, np.newaxis]))
    
    res = np.sort(res)[:, :25]
#     stats = np.hstack((np.mean(res, axis=1)[:, np.newaxis], np.std(res, axis=1)[:, np.newaxis], \
#                        np.median(res, axis=1)[:, np.newaxis]))
    res = np.hstack((res, stats))
    
#     res = np.flip(res, axis=1)[:, :25]
#     info = 15 - res.shape[1]
#     if info > 0:
#         z = np.zeros(shape=(res.shape[0], info))
#         res = np.hstack((z, res))
    
    return res

In [28]:
# def count_distances(docs):
# # DBSCAN ADDED new_feature: DBSCAN -> -1 ==> 0
# #                           DBSCAN ->  1 ==> 1
#     f = dbscan.fit_predict(docs)
    
#     distance = cdist(docs, docs, 'correlation')
#     res = np.asarray([np.concatenate((vec[:num],vec[num+1:])) for num, vec in enumerate(distance)])
#     res = np.nan_to_num(res, nan=0, posinf=1, neginf=0)
    
    
#     stats = np.hstack((np.mean(res,axis = 1)[:,np.newaxis], np.std(res, axis=1)[:,np.newaxis], \
#                      np.median(res,axis = 1)[:, np.newaxis]))
    
#     f2 = clf.fit_predict(res)
        
#     for i in range(len(f)):
        
#         f[i] = i

#     for i in range(len(f2)):
        
#         f2[i] = i

#     res = np.sort(res)[:, :25]
#     res = np.hstack((res, stats))
#     res = np.hstack((f[:, np.newaxis], res))
#     res = np.hstack((f2[:, np.newaxis], res))
    
#     return res

In [57]:
# Проходит по заданным группам и сохраняет признаки(расстояния)

def main_parser_and_saver(**kwargs):
    
#     for group_num in range(start_group, finish_group + 1):

#         features_save(group_num, vec_type=vec_type)
#         print('Скачалась группа:', group_num)
    features_save_new(**kwargs)
    return True

In [30]:
# Создает X_train, X_test, train_target

def prepare_data(**kwargs):
    
    X_train = all_group_feature_list(1, 129)
    X_test = all_group_feature_list(130, 309)
    
    d = pd.read_csv('./data/train_groups.csv')
    train_target = d['target']
    
    X_train = np.nan_to_num(X_train, nan=0, posinf=1, neginf=0)
    X_test = np.nan_to_num(X_test, nan=0, posinf=1, neginf=0)
    
    return X_train, train_target, X_test

In [31]:
# Загружает файлы с признаками документов по группам

def all_group_feature_list(start_group, finish_group):
    
    res = np.load('group_features/{}.npy'.format(start_group))
  
    for group_num in range(start_group + 1, finish_group + 1):
#         res += ndarray_to_list(np.load('group_features/{}.npy'.format(group_num)))
        
        t = np.load('group_features/{}.npy'.format(group_num))
#         print(res.shape, t.shape, group_num)
        res = np.vstack((res, t))
        
    return res

In [59]:
X_train_good, _ , X_test_good =  prepare_data()

In [None]:
X_train_good.shape

In [81]:
X_train_53, X_test_53 = X_train, X_test

In [258]:
X_train = np.hstack((X_train_53, np.mean(X_train_old, axis=1)[:, np.newaxis],\
                   np.std(X_train_old, axis=1)[:, np.newaxis], np.median(X_train_old, axis=1)[:, np.newaxis]))
X_test = np.hstack((X_test_53, np.mean(X_test_old, axis=1)[:, np.newaxis],\
                   np.std(X_test_old, axis=1)[:, np.newaxis], np.median(X_test_old, axis=1)[:, np.newaxis]))

In [58]:
%%time
# main_parser_and_saver(1, 129)
# ngram_range=(1,2)
# min_df=2, 
main_parser_and_saver(stop_words='english')

(28317, 33091) (28317,)
Скачалась группа: 50
Скачалась группа: 100
Скачалась группа: 150
Скачалась группа: 200
Скачалась группа: 250
Скачалась группа: 300
CPU times: user 2min 47s, sys: 6.29 s, total: 2min 53s
Wall time: 3min 28s


True

In [300]:
X_train_body, y_train, X_test_body = prepare_data()

In [61]:
X_train_old, y_train_old, X_test_old, groups_train_old, groups_test_old = easy_launch()
groups_train = pd.read_csv('data/train_groups.csv')['group_id']

doc titles dict len = 28026
(11690, 25) (11690,) (11690,)
(16627, 25) (16627,)


In [60]:
X_train = np.hstack((X_train_good, X_train_old))
X_test = np.hstack((X_test_good, X_test_old))

NameError: name 'X_train_old' is not defined

In [405]:
X_train.shape

(11690, 54)

In [407]:
%%time
best = validation(X_train, y_train, GradientBoostingClassifier, params, GroupKFold, groups=groups_train, thresholds=th)

{'learning_rate': 0.06, 'n_estimators': 150}
th =  0.4
0.7445936854101338
--------max-------
(0.7445936854101338, {'learning_rate': 0.06, 'n_estimators': 150}, 0.4)
CPU times: user 54 s, sys: 68.8 ms, total: 54.1 s
Wall time: 54.1 s


In [408]:
y_pred1 = predict(X_train, X_test, y_train, GradientBoostingClassifier, StandardScaler, best[2], **best[1])
data = save_submission(y_pred1)

Threshold =  0.4
len data =  16627
0: 11729, 1: 4898


In [436]:
best = validation(X_train, y_train, CatBoostClassifier, params, GroupKFold, groups=groups_train, thresholds=th)

{'learning_rate': 0.06, 'n_estimators': 150}
th =  0.34
0:	learn: 0.6468683	total: 74.5ms	remaining: 11.1s
1:	learn: 0.6055122	total: 81.5ms	remaining: 6.03s
2:	learn: 0.5704531	total: 88.3ms	remaining: 4.33s
3:	learn: 0.5392777	total: 95.3ms	remaining: 3.48s
4:	learn: 0.5060802	total: 103ms	remaining: 2.99s
5:	learn: 0.4831252	total: 110ms	remaining: 2.64s
6:	learn: 0.4605093	total: 117ms	remaining: 2.39s
7:	learn: 0.4460273	total: 124ms	remaining: 2.19s
8:	learn: 0.4295115	total: 131ms	remaining: 2.06s
9:	learn: 0.4176897	total: 138ms	remaining: 1.94s
10:	learn: 0.4064333	total: 146ms	remaining: 1.84s
11:	learn: 0.3968633	total: 153ms	remaining: 1.76s
12:	learn: 0.3888477	total: 161ms	remaining: 1.69s
13:	learn: 0.3826969	total: 168ms	remaining: 1.63s
14:	learn: 0.3771506	total: 175ms	remaining: 1.57s
15:	learn: 0.3727197	total: 182ms	remaining: 1.52s
16:	learn: 0.3678261	total: 189ms	remaining: 1.48s
17:	learn: 0.3637114	total: 196ms	remaining: 1.44s
18:	learn: 0.3591429	total: 203m

22:	learn: 0.3349122	total: 219ms	remaining: 1.21s
23:	learn: 0.3322883	total: 231ms	remaining: 1.21s
24:	learn: 0.3301780	total: 240ms	remaining: 1.2s
25:	learn: 0.3276415	total: 250ms	remaining: 1.19s
26:	learn: 0.3258595	total: 258ms	remaining: 1.18s
27:	learn: 0.3243666	total: 269ms	remaining: 1.17s
28:	learn: 0.3221467	total: 279ms	remaining: 1.17s
29:	learn: 0.3206705	total: 288ms	remaining: 1.15s
30:	learn: 0.3195920	total: 297ms	remaining: 1.14s
31:	learn: 0.3179188	total: 305ms	remaining: 1.13s
32:	learn: 0.3170773	total: 313ms	remaining: 1.11s
33:	learn: 0.3155846	total: 322ms	remaining: 1.1s
34:	learn: 0.3141604	total: 331ms	remaining: 1.09s
35:	learn: 0.3132116	total: 340ms	remaining: 1.08s
36:	learn: 0.3123653	total: 350ms	remaining: 1.07s
37:	learn: 0.3115486	total: 360ms	remaining: 1.06s
38:	learn: 0.3105977	total: 369ms	remaining: 1.05s
39:	learn: 0.3098810	total: 378ms	remaining: 1.04s
40:	learn: 0.3091373	total: 386ms	remaining: 1.03s
41:	learn: 0.3081933	total: 394ms

33:	learn: 0.3065418	total: 292ms	remaining: 998ms
34:	learn: 0.3052314	total: 302ms	remaining: 994ms
35:	learn: 0.3044627	total: 310ms	remaining: 980ms
36:	learn: 0.3032693	total: 319ms	remaining: 973ms
37:	learn: 0.3023669	total: 326ms	remaining: 960ms
38:	learn: 0.3012199	total: 335ms	remaining: 955ms
39:	learn: 0.3001265	total: 343ms	remaining: 942ms
40:	learn: 0.2994686	total: 352ms	remaining: 935ms
41:	learn: 0.2987241	total: 358ms	remaining: 922ms
42:	learn: 0.2980930	total: 368ms	remaining: 915ms
43:	learn: 0.2975187	total: 375ms	remaining: 903ms
44:	learn: 0.2966415	total: 384ms	remaining: 896ms
45:	learn: 0.2956300	total: 391ms	remaining: 884ms
46:	learn: 0.2952195	total: 400ms	remaining: 878ms
47:	learn: 0.2946629	total: 408ms	remaining: 867ms
48:	learn: 0.2941075	total: 415ms	remaining: 856ms
49:	learn: 0.2938473	total: 422ms	remaining: 843ms
50:	learn: 0.2929700	total: 430ms	remaining: 835ms
51:	learn: 0.2923109	total: 437ms	remaining: 824ms
52:	learn: 0.2920328	total: 446

54:	learn: 0.3116398	total: 413ms	remaining: 714ms
55:	learn: 0.3110140	total: 422ms	remaining: 708ms
56:	learn: 0.3106428	total: 430ms	remaining: 701ms
57:	learn: 0.3099832	total: 437ms	remaining: 694ms
58:	learn: 0.3092605	total: 446ms	remaining: 689ms
59:	learn: 0.3084539	total: 454ms	remaining: 681ms
60:	learn: 0.3079113	total: 463ms	remaining: 675ms
61:	learn: 0.3074251	total: 470ms	remaining: 667ms
62:	learn: 0.3070431	total: 477ms	remaining: 659ms
63:	learn: 0.3067084	total: 484ms	remaining: 651ms
64:	learn: 0.3064222	total: 492ms	remaining: 643ms
65:	learn: 0.3063232	total: 499ms	remaining: 635ms
66:	learn: 0.3058885	total: 506ms	remaining: 626ms
67:	learn: 0.3056319	total: 513ms	remaining: 618ms
68:	learn: 0.3052525	total: 520ms	remaining: 610ms
69:	learn: 0.3049580	total: 529ms	remaining: 604ms
70:	learn: 0.3045303	total: 536ms	remaining: 596ms
71:	learn: 0.3038648	total: 544ms	remaining: 590ms
72:	learn: 0.3034800	total: 551ms	remaining: 581ms
73:	learn: 0.3031356	total: 560

84:	learn: 0.2798223	total: 608ms	remaining: 465ms
85:	learn: 0.2794531	total: 618ms	remaining: 460ms
86:	learn: 0.2791271	total: 625ms	remaining: 453ms
87:	learn: 0.2787077	total: 633ms	remaining: 446ms
88:	learn: 0.2784991	total: 641ms	remaining: 439ms
89:	learn: 0.2782682	total: 647ms	remaining: 432ms
90:	learn: 0.2779564	total: 654ms	remaining: 424ms
91:	learn: 0.2775302	total: 662ms	remaining: 417ms
92:	learn: 0.2771445	total: 669ms	remaining: 410ms
93:	learn: 0.2768750	total: 676ms	remaining: 403ms
94:	learn: 0.2765687	total: 683ms	remaining: 396ms
95:	learn: 0.2764049	total: 691ms	remaining: 388ms
96:	learn: 0.2762798	total: 697ms	remaining: 381ms
97:	learn: 0.2760536	total: 704ms	remaining: 373ms
98:	learn: 0.2757895	total: 711ms	remaining: 366ms
99:	learn: 0.2754875	total: 717ms	remaining: 359ms
100:	learn: 0.2751746	total: 724ms	remaining: 351ms
101:	learn: 0.2750561	total: 731ms	remaining: 344ms
102:	learn: 0.2746040	total: 738ms	remaining: 337ms
103:	learn: 0.2743646	total:

95:	learn: 0.2974468	total: 821ms	remaining: 462ms
96:	learn: 0.2970059	total: 830ms	remaining: 453ms
97:	learn: 0.2964960	total: 840ms	remaining: 446ms
98:	learn: 0.2963299	total: 848ms	remaining: 437ms
99:	learn: 0.2959726	total: 857ms	remaining: 428ms
100:	learn: 0.2957971	total: 864ms	remaining: 419ms
101:	learn: 0.2953936	total: 873ms	remaining: 411ms
102:	learn: 0.2950767	total: 880ms	remaining: 402ms
103:	learn: 0.2947475	total: 889ms	remaining: 393ms
104:	learn: 0.2945797	total: 896ms	remaining: 384ms
105:	learn: 0.2941265	total: 904ms	remaining: 375ms
106:	learn: 0.2937533	total: 911ms	remaining: 366ms
107:	learn: 0.2934011	total: 921ms	remaining: 358ms
108:	learn: 0.2931211	total: 928ms	remaining: 349ms
109:	learn: 0.2927729	total: 936ms	remaining: 340ms
110:	learn: 0.2926353	total: 944ms	remaining: 332ms
111:	learn: 0.2925195	total: 952ms	remaining: 323ms
112:	learn: 0.2922895	total: 960ms	remaining: 314ms
113:	learn: 0.2920784	total: 970ms	remaining: 306ms
114:	learn: 0.291

111:	learn: 0.2773901	total: 1.03s	remaining: 349ms
112:	learn: 0.2770108	total: 1.04s	remaining: 340ms
113:	learn: 0.2766875	total: 1.05s	remaining: 331ms
114:	learn: 0.2763975	total: 1.06s	remaining: 322ms
115:	learn: 0.2760988	total: 1.06s	remaining: 312ms
116:	learn: 0.2759559	total: 1.07s	remaining: 303ms
117:	learn: 0.2756396	total: 1.08s	remaining: 293ms
118:	learn: 0.2754568	total: 1.09s	remaining: 284ms
119:	learn: 0.2753388	total: 1.1s	remaining: 275ms
120:	learn: 0.2751048	total: 1.11s	remaining: 265ms
121:	learn: 0.2747180	total: 1.11s	remaining: 256ms
122:	learn: 0.2745595	total: 1.12s	remaining: 246ms
123:	learn: 0.2742978	total: 1.13s	remaining: 237ms
124:	learn: 0.2740970	total: 1.14s	remaining: 228ms
125:	learn: 0.2738619	total: 1.15s	remaining: 219ms
126:	learn: 0.2737749	total: 1.16s	remaining: 209ms
127:	learn: 0.2733865	total: 1.16s	remaining: 200ms
128:	learn: 0.2728765	total: 1.17s	remaining: 191ms
129:	learn: 0.2727170	total: 1.18s	remaining: 182ms
130:	learn: 0

136:	learn: 0.2613991	total: 1.22s	remaining: 116ms
137:	learn: 0.2611149	total: 1.23s	remaining: 107ms
138:	learn: 0.2607114	total: 1.24s	remaining: 98.2ms
139:	learn: 0.2604574	total: 1.25s	remaining: 89.2ms
140:	learn: 0.2601871	total: 1.25s	remaining: 80.1ms
141:	learn: 0.2597187	total: 1.26s	remaining: 71.2ms
142:	learn: 0.2593489	total: 1.27s	remaining: 62.2ms
143:	learn: 0.2589083	total: 1.28s	remaining: 53.3ms
144:	learn: 0.2585858	total: 1.29s	remaining: 44.4ms
145:	learn: 0.2582026	total: 1.3s	remaining: 35.5ms
146:	learn: 0.2580299	total: 1.3s	remaining: 26.6ms
147:	learn: 0.2577364	total: 1.31s	remaining: 17.8ms
148:	learn: 0.2572171	total: 1.32s	remaining: 8.88ms
149:	learn: 0.2568855	total: 1.33s	remaining: 0us
0:	learn: 0.6418487	total: 13ms	remaining: 1.93s
1:	learn: 0.6020025	total: 22ms	remaining: 1.63s
2:	learn: 0.5678893	total: 30.4ms	remaining: 1.49s
3:	learn: 0.5319342	total: 39.8ms	remaining: 1.45s
4:	learn: 0.5004985	total: 47.5ms	remaining: 1.38s
5:	learn: 0.47

0:	learn: 0.6405204	total: 11.5ms	remaining: 1.72s
1:	learn: 0.5969502	total: 21.1ms	remaining: 1.56s
2:	learn: 0.5565319	total: 28.7ms	remaining: 1.41s
3:	learn: 0.5219895	total: 36.9ms	remaining: 1.34s
4:	learn: 0.4898463	total: 45.8ms	remaining: 1.33s
5:	learn: 0.4654321	total: 53.7ms	remaining: 1.29s
6:	learn: 0.4425472	total: 62.8ms	remaining: 1.28s
7:	learn: 0.4274495	total: 70.5ms	remaining: 1.25s
8:	learn: 0.4091716	total: 79.4ms	remaining: 1.24s
9:	learn: 0.3968522	total: 87ms	remaining: 1.22s
10:	learn: 0.3849980	total: 94.9ms	remaining: 1.2s
11:	learn: 0.3744906	total: 103ms	remaining: 1.18s
12:	learn: 0.3666724	total: 112ms	remaining: 1.18s
13:	learn: 0.3606028	total: 119ms	remaining: 1.16s
14:	learn: 0.3554669	total: 129ms	remaining: 1.16s
15:	learn: 0.3505178	total: 137ms	remaining: 1.15s
16:	learn: 0.3460535	total: 146ms	remaining: 1.15s
17:	learn: 0.3425644	total: 154ms	remaining: 1.13s
18:	learn: 0.3374267	total: 161ms	remaining: 1.11s
19:	learn: 0.3344264	total: 168ms

28:	learn: 0.3376678	total: 242ms	remaining: 1.01s
29:	learn: 0.3358765	total: 251ms	remaining: 1s
30:	learn: 0.3347515	total: 260ms	remaining: 998ms
31:	learn: 0.3332814	total: 268ms	remaining: 989ms
32:	learn: 0.3320626	total: 278ms	remaining: 987ms
33:	learn: 0.3306906	total: 286ms	remaining: 975ms
34:	learn: 0.3295059	total: 295ms	remaining: 970ms
35:	learn: 0.3287304	total: 303ms	remaining: 959ms
36:	learn: 0.3279004	total: 312ms	remaining: 953ms
37:	learn: 0.3268574	total: 320ms	remaining: 942ms
38:	learn: 0.3260072	total: 327ms	remaining: 932ms
39:	learn: 0.3250507	total: 335ms	remaining: 921ms
40:	learn: 0.3245140	total: 344ms	remaining: 914ms
41:	learn: 0.3240346	total: 351ms	remaining: 904ms
42:	learn: 0.3231989	total: 360ms	remaining: 896ms
43:	learn: 0.3223125	total: 368ms	remaining: 886ms
44:	learn: 0.3216100	total: 376ms	remaining: 877ms
45:	learn: 0.3204830	total: 383ms	remaining: 866ms
46:	learn: 0.3196656	total: 391ms	remaining: 856ms
47:	learn: 0.3190994	total: 399ms	

58:	learn: 0.2976358	total: 480ms	remaining: 740ms
59:	learn: 0.2969134	total: 491ms	remaining: 736ms
60:	learn: 0.2963575	total: 500ms	remaining: 729ms
61:	learn: 0.2959075	total: 508ms	remaining: 720ms
62:	learn: 0.2955682	total: 517ms	remaining: 714ms
63:	learn: 0.2951376	total: 525ms	remaining: 705ms
64:	learn: 0.2949008	total: 534ms	remaining: 698ms
65:	learn: 0.2945073	total: 541ms	remaining: 689ms
66:	learn: 0.2940475	total: 551ms	remaining: 682ms
67:	learn: 0.2935190	total: 558ms	remaining: 673ms
68:	learn: 0.2933414	total: 568ms	remaining: 667ms
69:	learn: 0.2930863	total: 575ms	remaining: 658ms
70:	learn: 0.2927735	total: 585ms	remaining: 651ms
71:	learn: 0.2920484	total: 592ms	remaining: 642ms
72:	learn: 0.2915507	total: 601ms	remaining: 634ms
73:	learn: 0.2910174	total: 609ms	remaining: 625ms
74:	learn: 0.2904554	total: 618ms	remaining: 618ms
75:	learn: 0.2901094	total: 625ms	remaining: 609ms
76:	learn: 0.2894560	total: 635ms	remaining: 602ms
77:	learn: 0.2892202	total: 642

75:	learn: 0.2812146	total: 616ms	remaining: 600ms
76:	learn: 0.2807364	total: 627ms	remaining: 595ms
77:	learn: 0.2803631	total: 635ms	remaining: 586ms
78:	learn: 0.2797340	total: 642ms	remaining: 577ms
79:	learn: 0.2790885	total: 651ms	remaining: 569ms
80:	learn: 0.2788191	total: 657ms	remaining: 560ms
81:	learn: 0.2784606	total: 665ms	remaining: 552ms
82:	learn: 0.2781928	total: 674ms	remaining: 544ms
83:	learn: 0.2775489	total: 682ms	remaining: 536ms
84:	learn: 0.2773697	total: 690ms	remaining: 528ms
85:	learn: 0.2769755	total: 698ms	remaining: 519ms
86:	learn: 0.2765255	total: 706ms	remaining: 511ms
87:	learn: 0.2761843	total: 713ms	remaining: 502ms
88:	learn: 0.2759243	total: 720ms	remaining: 494ms
89:	learn: 0.2756516	total: 728ms	remaining: 485ms
90:	learn: 0.2753207	total: 735ms	remaining: 477ms
91:	learn: 0.2747705	total: 743ms	remaining: 468ms
92:	learn: 0.2743689	total: 751ms	remaining: 460ms
93:	learn: 0.2740662	total: 758ms	remaining: 452ms
94:	learn: 0.2738317	total: 766

89:	learn: 0.2969415	total: 684ms	remaining: 456ms
90:	learn: 0.2967295	total: 693ms	remaining: 449ms
91:	learn: 0.2966421	total: 700ms	remaining: 441ms
92:	learn: 0.2962774	total: 709ms	remaining: 435ms
93:	learn: 0.2959102	total: 717ms	remaining: 427ms
94:	learn: 0.2953444	total: 724ms	remaining: 419ms
95:	learn: 0.2949784	total: 731ms	remaining: 411ms
96:	learn: 0.2948260	total: 738ms	remaining: 403ms
97:	learn: 0.2944630	total: 746ms	remaining: 396ms
98:	learn: 0.2939936	total: 753ms	remaining: 388ms
99:	learn: 0.2936642	total: 760ms	remaining: 380ms
100:	learn: 0.2934643	total: 767ms	remaining: 372ms
101:	learn: 0.2932554	total: 774ms	remaining: 364ms
102:	learn: 0.2929292	total: 781ms	remaining: 357ms
103:	learn: 0.2925772	total: 789ms	remaining: 349ms
104:	learn: 0.2922150	total: 795ms	remaining: 341ms
105:	learn: 0.2919617	total: 803ms	remaining: 333ms
106:	learn: 0.2915400	total: 810ms	remaining: 326ms
107:	learn: 0.2911502	total: 818ms	remaining: 318ms
108:	learn: 0.2906722	t

113:	learn: 0.2713280	total: 846ms	remaining: 267ms
114:	learn: 0.2709699	total: 856ms	remaining: 260ms
115:	learn: 0.2704897	total: 865ms	remaining: 254ms
116:	learn: 0.2702989	total: 873ms	remaining: 246ms
117:	learn: 0.2701001	total: 879ms	remaining: 238ms
118:	learn: 0.2699887	total: 886ms	remaining: 231ms
119:	learn: 0.2699523	total: 894ms	remaining: 224ms
120:	learn: 0.2695567	total: 902ms	remaining: 216ms
121:	learn: 0.2693385	total: 910ms	remaining: 209ms
122:	learn: 0.2689440	total: 918ms	remaining: 202ms
123:	learn: 0.2685818	total: 930ms	remaining: 195ms
124:	learn: 0.2685185	total: 937ms	remaining: 187ms
125:	learn: 0.2681350	total: 949ms	remaining: 181ms
126:	learn: 0.2678624	total: 958ms	remaining: 173ms
127:	learn: 0.2674752	total: 968ms	remaining: 166ms
128:	learn: 0.2672594	total: 977ms	remaining: 159ms
129:	learn: 0.2669626	total: 985ms	remaining: 151ms
130:	learn: 0.2665286	total: 994ms	remaining: 144ms
131:	learn: 0.2662834	total: 1s	remaining: 137ms
132:	learn: 0.2

126:	learn: 0.2880581	total: 1s	remaining: 182ms
127:	learn: 0.2877715	total: 1.01s	remaining: 174ms
128:	learn: 0.2873167	total: 1.02s	remaining: 167ms
129:	learn: 0.2871918	total: 1.03s	remaining: 159ms
130:	learn: 0.2868522	total: 1.04s	remaining: 151ms
131:	learn: 0.2864358	total: 1.05s	remaining: 143ms
132:	learn: 0.2861528	total: 1.06s	remaining: 135ms
133:	learn: 0.2857962	total: 1.06s	remaining: 127ms
134:	learn: 0.2854521	total: 1.07s	remaining: 119ms
135:	learn: 0.2850909	total: 1.08s	remaining: 111ms
136:	learn: 0.2848252	total: 1.09s	remaining: 104ms
137:	learn: 0.2843682	total: 1.1s	remaining: 95.7ms
138:	learn: 0.2840565	total: 1.11s	remaining: 87.8ms
139:	learn: 0.2836988	total: 1.12s	remaining: 79.9ms
140:	learn: 0.2834269	total: 1.13s	remaining: 71.9ms
141:	learn: 0.2831084	total: 1.14s	remaining: 64ms
142:	learn: 0.2826779	total: 1.15s	remaining: 56.1ms
143:	learn: 0.2824299	total: 1.15s	remaining: 48.1ms
144:	learn: 0.2822720	total: 1.16s	remaining: 40.1ms
145:	learn

146:	learn: 0.2666081	total: 1.21s	remaining: 24.8ms
147:	learn: 0.2662473	total: 1.22s	remaining: 16.6ms
148:	learn: 0.2659188	total: 1.23s	remaining: 8.27ms
149:	learn: 0.2654545	total: 1.24s	remaining: 0us
0:	learn: 0.6406933	total: 10.1ms	remaining: 1.51s
1:	learn: 0.5973543	total: 18.8ms	remaining: 1.39s
2:	learn: 0.5588684	total: 27ms	remaining: 1.32s
3:	learn: 0.5243670	total: 35.8ms	remaining: 1.3s
4:	learn: 0.4925494	total: 43.9ms	remaining: 1.27s
5:	learn: 0.4679750	total: 53.1ms	remaining: 1.27s
6:	learn: 0.4435945	total: 62ms	remaining: 1.27s
7:	learn: 0.4284941	total: 71.7ms	remaining: 1.27s
8:	learn: 0.4121397	total: 79.6ms	remaining: 1.25s
9:	learn: 0.3964024	total: 88ms	remaining: 1.23s
10:	learn: 0.3850110	total: 96.1ms	remaining: 1.21s
11:	learn: 0.3757437	total: 104ms	remaining: 1.2s
12:	learn: 0.3678498	total: 112ms	remaining: 1.18s
13:	learn: 0.3597016	total: 120ms	remaining: 1.17s
14:	learn: 0.3544360	total: 128ms	remaining: 1.15s
15:	learn: 0.3499357	total: 136ms

8:	learn: 0.4267648	total: 76ms	remaining: 1.19s
9:	learn: 0.4148323	total: 85.2ms	remaining: 1.19s
10:	learn: 0.4028916	total: 93.7ms	remaining: 1.18s
11:	learn: 0.3927212	total: 102ms	remaining: 1.18s
12:	learn: 0.3843390	total: 110ms	remaining: 1.16s
13:	learn: 0.3783014	total: 117ms	remaining: 1.14s
14:	learn: 0.3717995	total: 126ms	remaining: 1.13s
15:	learn: 0.3686115	total: 133ms	remaining: 1.12s
16:	learn: 0.3640709	total: 141ms	remaining: 1.1s
17:	learn: 0.3602041	total: 151ms	remaining: 1.11s
18:	learn: 0.3556482	total: 158ms	remaining: 1.09s
19:	learn: 0.3524949	total: 167ms	remaining: 1.08s
20:	learn: 0.3498797	total: 175ms	remaining: 1.07s
21:	learn: 0.3466369	total: 184ms	remaining: 1.07s
22:	learn: 0.3445876	total: 192ms	remaining: 1.06s
23:	learn: 0.3420417	total: 201ms	remaining: 1.05s
24:	learn: 0.3400316	total: 208ms	remaining: 1.04s
25:	learn: 0.3379399	total: 217ms	remaining: 1.03s
26:	learn: 0.3368671	total: 225ms	remaining: 1.02s
27:	learn: 0.3354714	total: 234ms

25:	learn: 0.3193125	total: 216ms	remaining: 1.03s
26:	learn: 0.3178824	total: 225ms	remaining: 1.03s
27:	learn: 0.3157164	total: 233ms	remaining: 1.01s
28:	learn: 0.3136575	total: 240ms	remaining: 1s
29:	learn: 0.3124248	total: 248ms	remaining: 990ms
30:	learn: 0.3108035	total: 256ms	remaining: 983ms
31:	learn: 0.3095156	total: 264ms	remaining: 974ms
32:	learn: 0.3083842	total: 272ms	remaining: 965ms
33:	learn: 0.3072427	total: 279ms	remaining: 953ms
34:	learn: 0.3059660	total: 288ms	remaining: 947ms
35:	learn: 0.3051611	total: 295ms	remaining: 935ms
36:	learn: 0.3043900	total: 304ms	remaining: 928ms
37:	learn: 0.3035204	total: 311ms	remaining: 918ms
38:	learn: 0.3028279	total: 322ms	remaining: 915ms
39:	learn: 0.3020930	total: 328ms	remaining: 903ms
40:	learn: 0.3013068	total: 336ms	remaining: 894ms
41:	learn: 0.3005650	total: 343ms	remaining: 883ms
42:	learn: 0.3000214	total: 351ms	remaining: 874ms
43:	learn: 0.2991174	total: 359ms	remaining: 864ms
44:	learn: 0.2984406	total: 367ms	

55:	learn: 0.3151142	total: 469ms	remaining: 788ms
56:	learn: 0.3144696	total: 478ms	remaining: 779ms
57:	learn: 0.3137121	total: 487ms	remaining: 772ms
58:	learn: 0.3134432	total: 494ms	remaining: 762ms
59:	learn: 0.3128569	total: 503ms	remaining: 754ms
60:	learn: 0.3121632	total: 512ms	remaining: 747ms
61:	learn: 0.3115791	total: 520ms	remaining: 739ms
62:	learn: 0.3110778	total: 529ms	remaining: 730ms
63:	learn: 0.3104900	total: 537ms	remaining: 721ms
64:	learn: 0.3101883	total: 546ms	remaining: 714ms
65:	learn: 0.3097764	total: 554ms	remaining: 705ms
66:	learn: 0.3093639	total: 562ms	remaining: 697ms
67:	learn: 0.3090174	total: 571ms	remaining: 688ms
68:	learn: 0.3086700	total: 579ms	remaining: 680ms
69:	learn: 0.3084447	total: 587ms	remaining: 671ms
70:	learn: 0.3078646	total: 597ms	remaining: 664ms
71:	learn: 0.3074321	total: 604ms	remaining: 655ms
72:	learn: 0.3068363	total: 613ms	remaining: 647ms
73:	learn: 0.3062831	total: 621ms	remaining: 637ms
74:	learn: 0.3059406	total: 629

69:	learn: 0.2930863	total: 613ms	remaining: 700ms
70:	learn: 0.2927735	total: 622ms	remaining: 693ms
71:	learn: 0.2920484	total: 631ms	remaining: 683ms
72:	learn: 0.2915507	total: 640ms	remaining: 675ms
73:	learn: 0.2910174	total: 649ms	remaining: 667ms
74:	learn: 0.2904554	total: 658ms	remaining: 658ms
75:	learn: 0.2901094	total: 666ms	remaining: 648ms
76:	learn: 0.2894560	total: 675ms	remaining: 640ms
77:	learn: 0.2892202	total: 683ms	remaining: 631ms
78:	learn: 0.2888274	total: 692ms	remaining: 622ms
79:	learn: 0.2885227	total: 701ms	remaining: 613ms
80:	learn: 0.2880861	total: 709ms	remaining: 604ms
81:	learn: 0.2875263	total: 718ms	remaining: 595ms
82:	learn: 0.2873340	total: 726ms	remaining: 586ms
83:	learn: 0.2867041	total: 734ms	remaining: 577ms
84:	learn: 0.2863190	total: 742ms	remaining: 568ms
85:	learn: 0.2860103	total: 751ms	remaining: 559ms
86:	learn: 0.2855341	total: 759ms	remaining: 549ms
87:	learn: 0.2850534	total: 767ms	remaining: 540ms
88:	learn: 0.2848841	total: 774

95:	learn: 0.2734198	total: 830ms	remaining: 467ms
96:	learn: 0.2731670	total: 840ms	remaining: 459ms
97:	learn: 0.2730394	total: 848ms	remaining: 450ms
98:	learn: 0.2725469	total: 857ms	remaining: 441ms
99:	learn: 0.2721563	total: 865ms	remaining: 432ms
100:	learn: 0.2718488	total: 874ms	remaining: 424ms
101:	learn: 0.2716469	total: 882ms	remaining: 415ms
102:	learn: 0.2713009	total: 890ms	remaining: 406ms
103:	learn: 0.2710635	total: 898ms	remaining: 397ms
104:	learn: 0.2707693	total: 906ms	remaining: 388ms
105:	learn: 0.2704363	total: 913ms	remaining: 379ms
106:	learn: 0.2698608	total: 921ms	remaining: 370ms
107:	learn: 0.2694098	total: 930ms	remaining: 362ms
108:	learn: 0.2690316	total: 940ms	remaining: 354ms
109:	learn: 0.2687215	total: 949ms	remaining: 345ms
110:	learn: 0.2684745	total: 959ms	remaining: 337ms
111:	learn: 0.2682056	total: 968ms	remaining: 328ms
112:	learn: 0.2680891	total: 977ms	remaining: 320ms
113:	learn: 0.2677007	total: 986ms	remaining: 311ms
114:	learn: 0.267

118:	learn: 0.2875166	total: 1.03s	remaining: 269ms
119:	learn: 0.2874572	total: 1.04s	remaining: 260ms
120:	learn: 0.2870628	total: 1.05s	remaining: 251ms
121:	learn: 0.2867691	total: 1.06s	remaining: 243ms
122:	learn: 0.2865018	total: 1.07s	remaining: 235ms
123:	learn: 0.2861028	total: 1.08s	remaining: 227ms
124:	learn: 0.2859442	total: 1.09s	remaining: 218ms
125:	learn: 0.2855841	total: 1.1s	remaining: 209ms
126:	learn: 0.2853284	total: 1.11s	remaining: 201ms
127:	learn: 0.2850064	total: 1.12s	remaining: 192ms
128:	learn: 0.2847666	total: 1.13s	remaining: 183ms
129:	learn: 0.2843654	total: 1.13s	remaining: 174ms
130:	learn: 0.2839876	total: 1.14s	remaining: 165ms
131:	learn: 0.2837530	total: 1.15s	remaining: 157ms
132:	learn: 0.2835633	total: 1.16s	remaining: 148ms
133:	learn: 0.2830814	total: 1.17s	remaining: 139ms
134:	learn: 0.2825859	total: 1.17s	remaining: 130ms
135:	learn: 0.2823249	total: 1.18s	remaining: 122ms
136:	learn: 0.2820856	total: 1.19s	remaining: 113ms
137:	learn: 0

0.7390614770251345
th =  0.39
0:	learn: 0.6468683	total: 9.22ms	remaining: 1.37s
1:	learn: 0.6055122	total: 17ms	remaining: 1.26s
2:	learn: 0.5704531	total: 24.5ms	remaining: 1.2s
3:	learn: 0.5392777	total: 32.5ms	remaining: 1.19s
4:	learn: 0.5060802	total: 39.7ms	remaining: 1.15s
5:	learn: 0.4831252	total: 48.1ms	remaining: 1.16s
6:	learn: 0.4605093	total: 55.3ms	remaining: 1.13s
7:	learn: 0.4460273	total: 64ms	remaining: 1.14s
8:	learn: 0.4295115	total: 71.6ms	remaining: 1.12s
9:	learn: 0.4176897	total: 80ms	remaining: 1.12s
10:	learn: 0.4064333	total: 88.3ms	remaining: 1.11s
11:	learn: 0.3968633	total: 96.9ms	remaining: 1.11s
12:	learn: 0.3888477	total: 105ms	remaining: 1.1s
13:	learn: 0.3826969	total: 113ms	remaining: 1.1s
14:	learn: 0.3771506	total: 123ms	remaining: 1.11s
15:	learn: 0.3727197	total: 133ms	remaining: 1.12s
16:	learn: 0.3678261	total: 143ms	remaining: 1.12s
17:	learn: 0.3637114	total: 150ms	remaining: 1.1s
18:	learn: 0.3591429	total: 157ms	remaining: 1.08s
19:	learn

28:	learn: 0.3221467	total: 206ms	remaining: 861ms
29:	learn: 0.3206705	total: 215ms	remaining: 862ms
30:	learn: 0.3195920	total: 224ms	remaining: 860ms
31:	learn: 0.3179188	total: 232ms	remaining: 855ms
32:	learn: 0.3170773	total: 239ms	remaining: 849ms
33:	learn: 0.3155846	total: 246ms	remaining: 839ms
34:	learn: 0.3141604	total: 253ms	remaining: 832ms
35:	learn: 0.3132116	total: 260ms	remaining: 824ms
36:	learn: 0.3123653	total: 267ms	remaining: 816ms
37:	learn: 0.3115486	total: 274ms	remaining: 808ms
38:	learn: 0.3105977	total: 282ms	remaining: 802ms
39:	learn: 0.3098810	total: 289ms	remaining: 794ms
40:	learn: 0.3091373	total: 295ms	remaining: 785ms
41:	learn: 0.3081933	total: 302ms	remaining: 777ms
42:	learn: 0.3077375	total: 309ms	remaining: 769ms
43:	learn: 0.3068101	total: 316ms	remaining: 761ms
44:	learn: 0.3062083	total: 322ms	remaining: 752ms
45:	learn: 0.3052735	total: 329ms	remaining: 743ms
46:	learn: 0.3048003	total: 336ms	remaining: 735ms
47:	learn: 0.3038439	total: 343

57:	learn: 0.2889820	total: 430ms	remaining: 681ms
58:	learn: 0.2885963	total: 437ms	remaining: 674ms
59:	learn: 0.2881033	total: 445ms	remaining: 668ms
60:	learn: 0.2875505	total: 452ms	remaining: 660ms
61:	learn: 0.2869288	total: 459ms	remaining: 652ms
62:	learn: 0.2864249	total: 466ms	remaining: 644ms
63:	learn: 0.2861084	total: 473ms	remaining: 636ms
64:	learn: 0.2857665	total: 481ms	remaining: 629ms
65:	learn: 0.2854876	total: 488ms	remaining: 622ms
66:	learn: 0.2852173	total: 496ms	remaining: 614ms
67:	learn: 0.2847502	total: 503ms	remaining: 607ms
68:	learn: 0.2843764	total: 511ms	remaining: 600ms
69:	learn: 0.2839872	total: 518ms	remaining: 592ms
70:	learn: 0.2836474	total: 525ms	remaining: 584ms
71:	learn: 0.2830473	total: 532ms	remaining: 577ms
72:	learn: 0.2826230	total: 540ms	remaining: 569ms
73:	learn: 0.2822872	total: 547ms	remaining: 562ms
74:	learn: 0.2817791	total: 555ms	remaining: 555ms
75:	learn: 0.2812146	total: 562ms	remaining: 547ms
76:	learn: 0.2807364	total: 570

71:	learn: 0.3038648	total: 607ms	remaining: 658ms
72:	learn: 0.3034800	total: 617ms	remaining: 651ms
73:	learn: 0.3031356	total: 626ms	remaining: 642ms
74:	learn: 0.3025530	total: 635ms	remaining: 635ms
75:	learn: 0.3022806	total: 644ms	remaining: 627ms
76:	learn: 0.3019481	total: 654ms	remaining: 620ms
77:	learn: 0.3015888	total: 661ms	remaining: 610ms
78:	learn: 0.3014325	total: 670ms	remaining: 602ms
79:	learn: 0.3008923	total: 678ms	remaining: 593ms
80:	learn: 0.3005745	total: 686ms	remaining: 584ms
81:	learn: 0.3001694	total: 693ms	remaining: 575ms
82:	learn: 0.2997926	total: 702ms	remaining: 567ms
83:	learn: 0.2992175	total: 710ms	remaining: 558ms
84:	learn: 0.2989152	total: 720ms	remaining: 550ms
85:	learn: 0.2984671	total: 726ms	remaining: 540ms
86:	learn: 0.2978964	total: 735ms	remaining: 532ms
87:	learn: 0.2973528	total: 743ms	remaining: 523ms
88:	learn: 0.2971252	total: 752ms	remaining: 516ms
89:	learn: 0.2969415	total: 759ms	remaining: 506ms
90:	learn: 0.2967295	total: 769

97:	learn: 0.2760536	total: 819ms	remaining: 435ms
98:	learn: 0.2757895	total: 830ms	remaining: 428ms
99:	learn: 0.2754875	total: 838ms	remaining: 419ms
100:	learn: 0.2751746	total: 846ms	remaining: 411ms
101:	learn: 0.2750561	total: 854ms	remaining: 402ms
102:	learn: 0.2746040	total: 862ms	remaining: 393ms
103:	learn: 0.2743646	total: 870ms	remaining: 385ms
104:	learn: 0.2741955	total: 879ms	remaining: 377ms
105:	learn: 0.2739077	total: 887ms	remaining: 368ms
106:	learn: 0.2736346	total: 896ms	remaining: 360ms
107:	learn: 0.2731683	total: 904ms	remaining: 352ms
108:	learn: 0.2727446	total: 914ms	remaining: 344ms
109:	learn: 0.2724391	total: 923ms	remaining: 336ms
110:	learn: 0.2722336	total: 931ms	remaining: 327ms
111:	learn: 0.2719140	total: 941ms	remaining: 319ms
112:	learn: 0.2716138	total: 950ms	remaining: 311ms
113:	learn: 0.2713280	total: 960ms	remaining: 303ms
114:	learn: 0.2709699	total: 967ms	remaining: 294ms
115:	learn: 0.2704897	total: 976ms	remaining: 286ms
116:	learn: 0.2

124:	learn: 0.2887239	total: 1.05s	remaining: 210ms
125:	learn: 0.2883732	total: 1.06s	remaining: 202ms
126:	learn: 0.2880581	total: 1.07s	remaining: 193ms
127:	learn: 0.2877715	total: 1.07s	remaining: 185ms
128:	learn: 0.2873167	total: 1.08s	remaining: 177ms
129:	learn: 0.2871918	total: 1.09s	remaining: 168ms
130:	learn: 0.2868522	total: 1.1s	remaining: 160ms
131:	learn: 0.2864358	total: 1.11s	remaining: 151ms
132:	learn: 0.2861528	total: 1.12s	remaining: 143ms
133:	learn: 0.2857962	total: 1.13s	remaining: 134ms
134:	learn: 0.2854521	total: 1.14s	remaining: 126ms
135:	learn: 0.2850909	total: 1.14s	remaining: 118ms
136:	learn: 0.2848252	total: 1.15s	remaining: 109ms
137:	learn: 0.2843682	total: 1.16s	remaining: 101ms
138:	learn: 0.2840565	total: 1.17s	remaining: 92.4ms
139:	learn: 0.2836988	total: 1.17s	remaining: 83.9ms
140:	learn: 0.2834269	total: 1.18s	remaining: 75.6ms
141:	learn: 0.2831084	total: 1.19s	remaining: 67.1ms
142:	learn: 0.2826779	total: 1.2s	remaining: 58.8ms
143:	lear

0:	learn: 0.6406933	total: 9.42ms	remaining: 1.4s
1:	learn: 0.5973543	total: 17.2ms	remaining: 1.27s
2:	learn: 0.5588684	total: 25.1ms	remaining: 1.23s
3:	learn: 0.5243670	total: 33ms	remaining: 1.2s
4:	learn: 0.4925494	total: 40.4ms	remaining: 1.17s
5:	learn: 0.4679750	total: 49.2ms	remaining: 1.18s
6:	learn: 0.4435945	total: 56.6ms	remaining: 1.16s
7:	learn: 0.4284941	total: 65.8ms	remaining: 1.17s
8:	learn: 0.4121397	total: 73.8ms	remaining: 1.16s
9:	learn: 0.3964024	total: 81.7ms	remaining: 1.14s
10:	learn: 0.3850110	total: 90.3ms	remaining: 1.14s
11:	learn: 0.3757437	total: 98.8ms	remaining: 1.14s
12:	learn: 0.3678498	total: 106ms	remaining: 1.12s
13:	learn: 0.3597016	total: 116ms	remaining: 1.12s
14:	learn: 0.3544360	total: 124ms	remaining: 1.11s
15:	learn: 0.3499357	total: 133ms	remaining: 1.11s
16:	learn: 0.3452655	total: 142ms	remaining: 1.11s
17:	learn: 0.3411178	total: 149ms	remaining: 1.09s
18:	learn: 0.3368378	total: 157ms	remaining: 1.08s
19:	learn: 0.3339277	total: 165ms

29:	learn: 0.3322441	total: 239ms	remaining: 956ms
30:	learn: 0.3315433	total: 250ms	remaining: 960ms
31:	learn: 0.3298786	total: 259ms	remaining: 954ms
32:	learn: 0.3288919	total: 267ms	remaining: 947ms
33:	learn: 0.3276050	total: 277ms	remaining: 943ms
34:	learn: 0.3262382	total: 285ms	remaining: 938ms
35:	learn: 0.3252084	total: 295ms	remaining: 934ms
36:	learn: 0.3241475	total: 302ms	remaining: 922ms
37:	learn: 0.3233605	total: 311ms	remaining: 918ms
38:	learn: 0.3225648	total: 319ms	remaining: 907ms
39:	learn: 0.3217714	total: 327ms	remaining: 900ms
40:	learn: 0.3209216	total: 335ms	remaining: 890ms
41:	learn: 0.3203762	total: 342ms	remaining: 879ms
42:	learn: 0.3198610	total: 350ms	remaining: 870ms
43:	learn: 0.3190340	total: 358ms	remaining: 864ms
44:	learn: 0.3181790	total: 366ms	remaining: 854ms
45:	learn: 0.3170837	total: 375ms	remaining: 847ms
46:	learn: 0.3165836	total: 382ms	remaining: 838ms
47:	learn: 0.3161037	total: 392ms	remaining: 832ms
48:	learn: 0.3153002	total: 399

48:	learn: 0.2957639	total: 421ms	remaining: 867ms
49:	learn: 0.2952144	total: 429ms	remaining: 858ms
50:	learn: 0.2947171	total: 439ms	remaining: 852ms
51:	learn: 0.2942075	total: 447ms	remaining: 843ms
52:	learn: 0.2936724	total: 455ms	remaining: 833ms
53:	learn: 0.2928554	total: 463ms	remaining: 824ms
54:	learn: 0.2922784	total: 471ms	remaining: 814ms
55:	learn: 0.2919343	total: 480ms	remaining: 806ms
56:	learn: 0.2914207	total: 488ms	remaining: 796ms
57:	learn: 0.2909085	total: 497ms	remaining: 788ms
58:	learn: 0.2902261	total: 505ms	remaining: 778ms
59:	learn: 0.2896472	total: 513ms	remaining: 769ms
60:	learn: 0.2890027	total: 522ms	remaining: 761ms
61:	learn: 0.2885459	total: 531ms	remaining: 753ms
62:	learn: 0.2880981	total: 539ms	remaining: 744ms
63:	learn: 0.2876602	total: 547ms	remaining: 735ms
64:	learn: 0.2871473	total: 555ms	remaining: 726ms
65:	learn: 0.2869425	total: 564ms	remaining: 718ms
66:	learn: 0.2864453	total: 573ms	remaining: 709ms
67:	learn: 0.2858856	total: 582

74:	learn: 0.3059406	total: 637ms	remaining: 637ms
75:	learn: 0.3054454	total: 647ms	remaining: 630ms
76:	learn: 0.3048968	total: 655ms	remaining: 621ms
77:	learn: 0.3043889	total: 664ms	remaining: 613ms
78:	learn: 0.3039659	total: 672ms	remaining: 604ms
79:	learn: 0.3037502	total: 682ms	remaining: 596ms
80:	learn: 0.3033084	total: 689ms	remaining: 587ms
81:	learn: 0.3027362	total: 699ms	remaining: 580ms
82:	learn: 0.3023406	total: 707ms	remaining: 571ms
83:	learn: 0.3018957	total: 716ms	remaining: 563ms
84:	learn: 0.3016399	total: 724ms	remaining: 554ms
85:	learn: 0.3011858	total: 733ms	remaining: 545ms
86:	learn: 0.3008598	total: 742ms	remaining: 537ms
87:	learn: 0.3004065	total: 750ms	remaining: 529ms
88:	learn: 0.3001097	total: 759ms	remaining: 520ms
89:	learn: 0.2996789	total: 768ms	remaining: 512ms
90:	learn: 0.2991978	total: 777ms	remaining: 504ms
91:	learn: 0.2988588	total: 785ms	remaining: 495ms
92:	learn: 0.2983626	total: 794ms	remaining: 487ms
93:	learn: 0.2980544	total: 803

91:	learn: 0.2837351	total: 854ms	remaining: 538ms
92:	learn: 0.2834376	total: 865ms	remaining: 530ms
93:	learn: 0.2831165	total: 877ms	remaining: 522ms
94:	learn: 0.2828628	total: 889ms	remaining: 515ms
95:	learn: 0.2826089	total: 898ms	remaining: 505ms
96:	learn: 0.2821383	total: 905ms	remaining: 495ms
97:	learn: 0.2818128	total: 915ms	remaining: 486ms
98:	learn: 0.2814229	total: 925ms	remaining: 477ms
99:	learn: 0.2810479	total: 935ms	remaining: 467ms
100:	learn: 0.2807621	total: 943ms	remaining: 457ms
101:	learn: 0.2804922	total: 950ms	remaining: 447ms
102:	learn: 0.2799805	total: 958ms	remaining: 437ms
103:	learn: 0.2797748	total: 965ms	remaining: 427ms
104:	learn: 0.2793761	total: 973ms	remaining: 417ms
105:	learn: 0.2790569	total: 982ms	remaining: 407ms
106:	learn: 0.2788742	total: 989ms	remaining: 397ms
107:	learn: 0.2784444	total: 997ms	remaining: 388ms
108:	learn: 0.2780947	total: 1s	remaining: 378ms
109:	learn: 0.2778421	total: 1.01s	remaining: 369ms
110:	learn: 0.2776680	to

124:	learn: 0.2647567	total: 1.02s	remaining: 203ms
125:	learn: 0.2644114	total: 1.02s	remaining: 195ms
126:	learn: 0.2642660	total: 1.03s	remaining: 187ms
127:	learn: 0.2640922	total: 1.04s	remaining: 179ms
128:	learn: 0.2638404	total: 1.05s	remaining: 171ms
129:	learn: 0.2634911	total: 1.06s	remaining: 163ms
130:	learn: 0.2632194	total: 1.06s	remaining: 155ms
131:	learn: 0.2628911	total: 1.07s	remaining: 146ms
132:	learn: 0.2623506	total: 1.08s	remaining: 138ms
133:	learn: 0.2620468	total: 1.09s	remaining: 130ms
134:	learn: 0.2618174	total: 1.1s	remaining: 122ms
135:	learn: 0.2615236	total: 1.1s	remaining: 114ms
136:	learn: 0.2613991	total: 1.11s	remaining: 105ms
137:	learn: 0.2611149	total: 1.12s	remaining: 97.3ms
138:	learn: 0.2607114	total: 1.13s	remaining: 89.2ms
139:	learn: 0.2604574	total: 1.13s	remaining: 81ms
140:	learn: 0.2601871	total: 1.14s	remaining: 72.8ms
141:	learn: 0.2597187	total: 1.15s	remaining: 64.7ms
142:	learn: 0.2593489	total: 1.16s	remaining: 56.6ms
143:	learn

142:	learn: 0.2802188	total: 1.21s	remaining: 59.4ms
143:	learn: 0.2799118	total: 1.22s	remaining: 51ms
144:	learn: 0.2796887	total: 1.24s	remaining: 42.6ms
145:	learn: 0.2792600	total: 1.24s	remaining: 34.1ms
146:	learn: 0.2788620	total: 1.25s	remaining: 25.6ms
147:	learn: 0.2784754	total: 1.26s	remaining: 17.1ms
148:	learn: 0.2782392	total: 1.27s	remaining: 8.53ms
149:	learn: 0.2778508	total: 1.28s	remaining: 0us
0:	learn: 0.6405204	total: 10.3ms	remaining: 1.53s
1:	learn: 0.5969502	total: 19.5ms	remaining: 1.45s
2:	learn: 0.5565319	total: 28.9ms	remaining: 1.41s
3:	learn: 0.5219895	total: 38.6ms	remaining: 1.41s
4:	learn: 0.4898463	total: 46.6ms	remaining: 1.35s
5:	learn: 0.4654321	total: 55.4ms	remaining: 1.33s
6:	learn: 0.4425472	total: 63.4ms	remaining: 1.29s
7:	learn: 0.4274495	total: 72.1ms	remaining: 1.28s
8:	learn: 0.4091716	total: 79.5ms	remaining: 1.25s
9:	learn: 0.3968522	total: 87ms	remaining: 1.22s
10:	learn: 0.3849980	total: 94.1ms	remaining: 1.19s
11:	learn: 0.3744906	

4:	learn: 0.5060802	total: 45.4ms	remaining: 1.32s
5:	learn: 0.4831252	total: 55.1ms	remaining: 1.32s
6:	learn: 0.4605093	total: 62.8ms	remaining: 1.28s
7:	learn: 0.4460273	total: 70.2ms	remaining: 1.25s
8:	learn: 0.4295115	total: 77.5ms	remaining: 1.21s
9:	learn: 0.4176897	total: 86.1ms	remaining: 1.21s
10:	learn: 0.4064333	total: 93.5ms	remaining: 1.18s
11:	learn: 0.3968633	total: 102ms	remaining: 1.17s
12:	learn: 0.3888477	total: 110ms	remaining: 1.16s
13:	learn: 0.3826969	total: 118ms	remaining: 1.14s
14:	learn: 0.3771506	total: 125ms	remaining: 1.12s
15:	learn: 0.3727197	total: 134ms	remaining: 1.12s
16:	learn: 0.3678261	total: 141ms	remaining: 1.1s
17:	learn: 0.3637114	total: 150ms	remaining: 1.1s
18:	learn: 0.3591429	total: 157ms	remaining: 1.08s
19:	learn: 0.3563613	total: 166ms	remaining: 1.08s
20:	learn: 0.3539734	total: 173ms	remaining: 1.06s
21:	learn: 0.3519335	total: 182ms	remaining: 1.06s
22:	learn: 0.3499149	total: 189ms	remaining: 1.04s
23:	learn: 0.3468423	total: 198m

25:	learn: 0.3276415	total: 213ms	remaining: 1.02s
26:	learn: 0.3258595	total: 221ms	remaining: 1.01s
27:	learn: 0.3243666	total: 230ms	remaining: 1s
28:	learn: 0.3221467	total: 238ms	remaining: 993ms
29:	learn: 0.3206705	total: 246ms	remaining: 982ms
30:	learn: 0.3195920	total: 255ms	remaining: 979ms
31:	learn: 0.3179188	total: 262ms	remaining: 967ms
32:	learn: 0.3170773	total: 271ms	remaining: 961ms
33:	learn: 0.3155846	total: 279ms	remaining: 952ms
34:	learn: 0.3141604	total: 288ms	remaining: 945ms
35:	learn: 0.3132116	total: 295ms	remaining: 935ms
36:	learn: 0.3123653	total: 304ms	remaining: 929ms
37:	learn: 0.3115486	total: 312ms	remaining: 919ms
38:	learn: 0.3105977	total: 321ms	remaining: 912ms
39:	learn: 0.3098810	total: 328ms	remaining: 902ms
40:	learn: 0.3091373	total: 336ms	remaining: 894ms
41:	learn: 0.3081933	total: 344ms	remaining: 885ms
42:	learn: 0.3077375	total: 353ms	remaining: 878ms
43:	learn: 0.3068101	total: 360ms	remaining: 868ms
44:	learn: 0.3062083	total: 368ms	

54:	learn: 0.2904242	total: 452ms	remaining: 781ms
55:	learn: 0.2899397	total: 462ms	remaining: 775ms
56:	learn: 0.2895620	total: 470ms	remaining: 767ms
57:	learn: 0.2889820	total: 480ms	remaining: 761ms
58:	learn: 0.2885963	total: 487ms	remaining: 751ms
59:	learn: 0.2881033	total: 496ms	remaining: 744ms
60:	learn: 0.2875505	total: 503ms	remaining: 734ms
61:	learn: 0.2869288	total: 513ms	remaining: 727ms
62:	learn: 0.2864249	total: 520ms	remaining: 718ms
63:	learn: 0.2861084	total: 530ms	remaining: 712ms
64:	learn: 0.2857665	total: 538ms	remaining: 703ms
65:	learn: 0.2854876	total: 548ms	remaining: 697ms
66:	learn: 0.2852173	total: 557ms	remaining: 690ms
67:	learn: 0.2847502	total: 566ms	remaining: 682ms
68:	learn: 0.2843764	total: 575ms	remaining: 674ms
69:	learn: 0.2839872	total: 583ms	remaining: 666ms
70:	learn: 0.2836474	total: 594ms	remaining: 661ms
71:	learn: 0.2830473	total: 604ms	remaining: 654ms
72:	learn: 0.2826230	total: 612ms	remaining: 646ms
73:	learn: 0.2822872	total: 621

73:	learn: 0.3031356	total: 705ms	remaining: 724ms
74:	learn: 0.3025530	total: 712ms	remaining: 712ms
75:	learn: 0.3022806	total: 722ms	remaining: 703ms
76:	learn: 0.3019481	total: 730ms	remaining: 692ms
77:	learn: 0.3015888	total: 740ms	remaining: 683ms
78:	learn: 0.3014325	total: 748ms	remaining: 672ms
79:	learn: 0.3008923	total: 756ms	remaining: 662ms
80:	learn: 0.3005745	total: 765ms	remaining: 651ms
81:	learn: 0.3001694	total: 773ms	remaining: 641ms
82:	learn: 0.2997926	total: 782ms	remaining: 631ms
83:	learn: 0.2992175	total: 791ms	remaining: 621ms
84:	learn: 0.2989152	total: 799ms	remaining: 611ms
85:	learn: 0.2984671	total: 807ms	remaining: 601ms
86:	learn: 0.2978964	total: 816ms	remaining: 591ms
87:	learn: 0.2973528	total: 824ms	remaining: 580ms
88:	learn: 0.2971252	total: 832ms	remaining: 570ms
89:	learn: 0.2969415	total: 839ms	remaining: 560ms
90:	learn: 0.2967295	total: 849ms	remaining: 550ms
91:	learn: 0.2966421	total: 856ms	remaining: 540ms
92:	learn: 0.2962774	total: 865

95:	learn: 0.2764049	total: 852ms	remaining: 479ms
96:	learn: 0.2762798	total: 863ms	remaining: 471ms
97:	learn: 0.2760536	total: 872ms	remaining: 462ms
98:	learn: 0.2757895	total: 880ms	remaining: 453ms
99:	learn: 0.2754875	total: 888ms	remaining: 444ms
100:	learn: 0.2751746	total: 896ms	remaining: 435ms
101:	learn: 0.2750561	total: 903ms	remaining: 425ms
102:	learn: 0.2746040	total: 912ms	remaining: 416ms
103:	learn: 0.2743646	total: 919ms	remaining: 406ms
104:	learn: 0.2741955	total: 928ms	remaining: 398ms
105:	learn: 0.2739077	total: 936ms	remaining: 389ms
106:	learn: 0.2736346	total: 945ms	remaining: 380ms
107:	learn: 0.2731683	total: 953ms	remaining: 371ms
108:	learn: 0.2727446	total: 962ms	remaining: 362ms
109:	learn: 0.2724391	total: 971ms	remaining: 353ms
110:	learn: 0.2722336	total: 979ms	remaining: 344ms
111:	learn: 0.2719140	total: 988ms	remaining: 335ms
112:	learn: 0.2716138	total: 996ms	remaining: 326ms
113:	learn: 0.2713280	total: 1s	remaining: 317ms
114:	learn: 0.270969

In [297]:
params = {'learning_rate': [0.06], 
'n_estimators': [73]} 
th = [0.34]

Threshold =  0.3
len data =  16627
0: 10767, 1: 5860


In [287]:
params = {'learning_rate': [0.06], 
#           'n_estimators': [120, 130, 140, 150]} 
#           'n_estimators': [50, 55, 60, 65, 70, 75,80,85, 90, 95, 100, 105, 110, 115, 120]}
          'n_estimators': [100, 150, 200, 250, 300]} 
th = [0.39 , 0.4] 

In [239]:
params = {'learning_rate': [0.05],
#           'n_estimators': [372, 374]}
          'n_estimators': [250]}
th = [0.3]

In [435]:
params = {'learning_rate': [0.06],
          'n_estimators' : [150]}
# th = [0.4]
# th = [0.26, 0.27, 0.28, 0.29, 0.3, 0.31,0.32,0.33]  
th = [0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42]

In [243]:
params = {'learning_rate': [0.06],
#           'n_estimators': [372, 374]}
          'n_estimators': [80]}
th = [0.38]

In [429]:
params = {'algorithm': ['ball_tree', 'brute', 'kd_tree'],
# params = {'algorithm': ['ball_tree'],
# params = {'algorithm': ['auto'],
#           'leaf_size': [5, 10, 20, 40],
#           'n_neighbors': [30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80],
          'n_neighbors': [10, 15, 20],
#           'weights': [smart_weights],
          'p': [1],
          'n_jobs': [-1]}
th = [0.35] 
# th = [0.26, 0.27, 0.28, 0.29, 0.3, 0.31,0.32,0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42]

In [433]:
# params = {'algorithm': ['brute'],
params = {'algorithm': ['ball_tree'],
# params = {'algorithm': ['auto'],
#           'leaf_size': [10],
          'n_neighbors': [75],
#           'weights': [smart_weights],
          'p': [1],
          'n_jobs': [-1]}
th = [0.27, 0.28, 0.29, 0.3,0.31,0.32,0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42]

In [421]:
th = [0.41] 
params = {'n_estimators': [100, 200, 300, 400, 800], 
#           'criterion': ['gini', 'entropy'], 
          'criterion': ['gini'], 
          'max_depth': [9], 
#           'max_depth': [10, 15, None], 
          'n_jobs': [-1]}

In [443]:
params = {'n_estimators': [100], 
          'criterion': ['entropy'], 
          'max_depth': [10], 
          'n_jobs': [-1]}
# th=[0.41]
th = [0.3,0.31,0.32,0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45]

In [445]:
params = {'n_estimators': [800], 
          'criterion': ['gini'], 
#           'criterion': ['gini'], 
          'max_depth': [9], 
#           'max_depth': [10, 15, None], 
          'n_jobs': [-1]}
th=[0.36]
# th = [0.26, 0.27, 0.28, 0.29, 0.3, 0.29, 0.3,0.31,0.32,0.33, 0.34, 0.35, 0.36, 0.37, 0.38]# 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45]

In [446]:
# for i in range(5):
best = validation(main_res_train, y_train, RandomForestClassifier, params, GroupKFold, groups=groups_train, thresholds=th)

{'n_estimators': 800, 'criterion': 'gini', 'max_depth': 9, 'n_jobs': -1}
th =  0.36
0.7415311086141798
--------max-------
(0.7415311086141798, {'n_estimators': 800, 'criterion': 'gini', 'max_depth': 9, 'n_jobs': -1}, 0.36)


In [73]:
y_pred3 = predict(X_train, X_test, y_train, RandomForestClassifier, StandardScaler, best[2], **best[1],)
data = save_submission(y_pred3)

Threshold =  0.39
len data =  16627
0: 11699, 1: 4928


In [434]:
best = validation(X_train, y_train, KNeighborsClassifier, params, GroupKFold, groups=groups_train, thresholds=th)

{'algorithm': 'ball_tree', 'n_neighbors': 75, 'p': 1, 'n_jobs': -1}
th =  0.27
0.7140179087677511
th =  0.28
0.7140179087677511
th =  0.29
0.7151617727398918
th =  0.3
0.7168017085958605
th =  0.31
0.7158697322613526
th =  0.32
0.7158697322613526
th =  0.33
0.7178909249189525
th =  0.34
0.7187334838034716
th =  0.35
0.7217532555870074
th =  0.36
0.7217532555870074
th =  0.37
0.7201643427182952
th =  0.38
0.7198592669838952
th =  0.39
0.7191443931375207
th =  0.4
0.7191443931375207
th =  0.41
0.7145801957876023
th =  0.42
0.7156183745610593
--------max-------
(0.7217532555870074, {'algorithm': 'ball_tree', 'n_neighbors': 75, 'p': 1, 'n_jobs': -1}, 0.35)


In [494]:
best = validation(main_res_train, y_train, SGDClassifier, params, GroupKFold, folds_num=15, groups=groups_train, thresholds=th)

{'loss': 'log', 'alpha': 0.01}
th =  0.26
0.7496873281062211
{'loss': 'log', 'alpha': 0.005}
th =  0.26
0.7511826264581546
{'loss': 'log', 'alpha': 0.001}
th =  0.26
0.7529907621753301
{'loss': 'log', 'alpha': 0.0005}
th =  0.26
0.7521764349044318
{'loss': 'log', 'alpha': 0.0001}
th =  0.26
0.7556957621124691
--------max-------
(0.7556957621124691, {'loss': 'log', 'alpha': 0.0001}, 0.26)


In [495]:
y_pred3 = predict(main_res_train, main_res_test, y_train, SGDClassifier, StandardScaler, best[2], **best[1],)
data = save_submission(y_pred3)

Threshold =  0.26
len data =  16627
0: 11479, 1: 5148


In [456]:
params = {'loss': ['log'],
          'alpha': [0.0001]}
#           'alpha': [0.01, 0.005, 0.001, 0.0005, 0.0001]}
th = [0.26]
# th = [0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3,0.31,0.32,0.33, 0.34]

In [249]:
word, num = np.unique(body[10024].split(), return_counts=True)

In [244]:
lambda b: list(zip(word, num))

<function __main__.<lambda>(x)>

In [253]:
j=0

In [395]:
def body_change(text):
    global j
    word, num = np.unique(text.split(), return_counts=True)
    words = sorted(list(zip(word, num)), key=lambda pair: pair[1], reverse=True)[:40]
    text = ' '.join(list(map(lambda x: x[0], words)))
    j+=1 
    print(j)
    return text

In [69]:
# titles_df = pd.DataFrame()
# titles_df['body'] = main_df['body'].apply(lambda x: body_change(x))

In [398]:
titles = pd.DataFrame({'doc_id': main_df['doc_id'] ,
                       'title': titles_df['body'],
#                        'title': titles_df['title'] + titles_df['h1'],
#                        'title': titles_df['h2'] + titles_df['h3'] + titles_df['a'],
                       'group_id': main_df['group_id']})
titles.to_csv('./data/unversal_table.csv', index=False)

In [35]:
%%time
titles_df = pd.read_csv('./data/unversal_table.csv')
print('OK1')
main_df = pd.read_csv('data/CORE.csv')
print('OK2')
main_df['body'] = titles_df['title']
print('OK3')
main_df.to_csv('data/CORE.csv')
print('OK4')

OK1
OK2
OK3
OK4
CPU times: user 2min 1s, sys: 35.9 s, total: 2min 37s
Wall time: 3min 33s


In [115]:
titles_df = main_df[['doc_id', 'title', 'group_id', 'h1', 'strong']]

In [None]:
titles_df['strong'] = titles_df['strong'].apply(lambda x: ' '.join(np.unique(x.lower().split())))

In [109]:
titles_df.to_csv('./data/unversal_table.csv', index=False)

In [51]:
df_train = pd.read_csv('data/core_train.csv')
df_test = pd.read_csv('data/core_test.csv')

df_train.fillna(' ', inplace=True)
df_test.fillna(' ', inplace=True)

df_train_strong = pd.read_csv('data/TRAIN_NUMBERS_TITLES.csv')
df_test_strong = pd.read_csv('data/TEST_NUMBERS_TITLES.csv')

df_train_strong.fillna(' ', inplace=True)
df_test_strong.fillna(' ', inplace=True)

In [53]:
titles_all = pd.concat([df_train, df_test])
titles_strong = pd.concat([df_train_strong, df_test_strong])

In [90]:
titles_df = pd.merge(titles_all, titles_strong[['doc_id', 'strong']], on='doc_id', how='inner')

In [87]:
titles_df['strong'] = titles_df['strong'].apply(lambda x: ' '.join(np.unique(x.lower().split())))

In [370]:
# titles_h2h3a = pd.read_csv('./data/no_bad_words_table.csv')

In [383]:
# titles_df = pd.merge(titles_all, titles_h2h3a[['doc_id', 'title']], on='doc_id', how='inner')

In [385]:
titles_df2 = pd.DataFrame({'doc_id': titles_df['doc_id'] ,
                       'title': titles_df['title_x'] + titles_df['h1'],
#                        'title': titles_df['title'] + titles_df['h1'],
#                        'title': titles_df['h2'] + titles_df['h3'] + titles_df['a'],
                       'group_id': titles_df['group_id']})

titles_df2['title'] = stemming_titles(titles_df2)

In [70]:
titles = pd.DataFrame({'doc_id': titles_df['doc_id'] ,
                       'title': titles_df['title'],
#                        'title': titles_df['title'] + titles_df['h1'],
#                        'title': titles_df['h2'] + titles_df['h3'] + titles_df['a'],
                       'group_id': titles_df['group_id']})

titles.fillna(' ', inplace=True)
j = 0

# titles['title'] = stemming_titles(titles)
# titles['title'] = lemmatization(titles)

# titles['title']

NameError: name 'titles_df' is not defined

In [100]:
titles['title'][0]

'ваза зам подшипник ступица нива '

In [101]:
titles.to_csv('./data/unversal_table.csv', index=False)

In [298]:
titles.fillna(' ', inplace=True)

In [52]:
corpus = [
     'This is the first document.',
     'This document is the second document.',
     'And this is the third one.',
     'Is this the first document?',
     ]

# vectorizer = TfidfVectorizer()
# X = vectorizer.fit_transform(corpus)
# print(vectorizer.get_feature_names())
# ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']


In [53]:
corpus

['This is the first document.',
 'This document is the second document.',
 'And this is the third one.',
 'Is this the first document?']

In [54]:
iterat = iter_bm25_bow(corpus, n_jobs=-1)

In [183]:
titles_df= pd.read_csv('data/no_bad_words_table.csv')

In [38]:
type(X)

list

In [169]:
df['title_unique'][5]

'аккумулятор багажник балк бензонасос блок ваз вентилятор виде где генератор главн грант грм давлен дальш датчик двигател для жидкост задн зажиган зам запчаст инжектор как калин клапан колодок контакт коробк крышк лад ламп ларгус магазин масл машин мост опор охлажда охлажден панел передн печк подшипник предохранител прибор приор проверк пружин радиатор разва регулировк рел ремн ремонт рук рулев сайлентблок сайт салон сальник свеч сво систем снят спидометр стартер стеклоподъемник ступиц ступичн схем сцеплен топливн тормоз тормозн тюнинг фильтр цилиндр чита '

In [184]:
titles = pd.DataFrame({'doc_id': titles_df['doc_id'] ,
                       'title': titles_df['title_unique'],
#                        'title': titles_df['title'] + titles_df['h1'],
#                        'title': titles_df['h2'] + titles_df['h3'] + titles_df['a'],
                       'group_id': titles_df['group_id']})

In [180]:
new_df['title_unique'].name = 'title'

In [189]:
titles.fillna(' ', inplace=True)

In [154]:
# Стекинг !!!
def stacking(X_train, X_test, train_target, models, params, folds_gen_func, groups_num=10, thresholds=[0.27], **kwargs):
    
    scaler = StandardScaler()
        
    main_res_train = np.zeros(shape=(train_target.shape[0], 1))
    main_res_test = np.zeros(shape=(X_test.shape[0], 1))
    
    for model in models:
        
        print(params[model])        
        exact_model = model(**params[model]) 
        
        fold_generator = folds_gen_func(groups_num)
        
        res_train = np.zeros(train_target.shape[0])
        res_test = np.zeros(X_test.shape[0])
        
        for train_index, test_index in fold_generator.split(X_train, train_target, **kwargs):
            
            scaler.fit(X_train)
            print(scaler.transform(X_train[train_index]).shape, train_target[train_index].shape)
            exact_model.fit(scaler.transform(X_train[train_index]), train_target[train_index])

            y_pred_train = exact_model.predict_proba(scaler.transform(X_train[test_index]))
            res_train[test_index] = y_pred_train[:,1] 
            
            scaler.fit(X_test)
            y_pred_test = exact_model.predict_proba(scaler.transform(X_test))

            res_test += y_pred_test[:,1]
            
        res_test /= groups_num
        main_res_train = np.hstack((main_res_train, res_train[:, np.newaxis]))
        main_res_test = np.hstack((main_res_test, res_test[:, np.newaxis]))   

    return main_res_train, main_res_test

In [438]:
models = [GradientBoostingClassifier, RandomForestClassifier, KNeighborsClassifier]

In [439]:
params = {}
params[GradientBoostingClassifier] = {'learning_rate': 0.06,
                                      'n_estimators' : 150}
params[RandomForestClassifier] = {'n_estimators': 800, 
                                  'criterion': 'gini', 
                                  'max_depth': 9, 
                                  'n_jobs': -1}
params[KNeighborsClassifier] = {'algorithm': 'ball_tree',
                                'leaf_size': 10,
                                'n_neighbors': 80,
                                'p': 1,
                                'n_jobs': -1}
# params[SGDClassifier] = {'loss': 'log',
#                          'alpha': 0.005}

In [440]:
main_res_train, main_res_test = stacking(X_train, X_test, y_train, models, params, GroupKFold, groups=groups_train)

{'learning_rate': 0.06, 'n_estimators': 150}
(10523, 54) (10523,)
(10513, 54) (10513,)
(10513, 54) (10513,)
(10513, 54) (10513,)
(10524, 54) (10524,)
(10514, 54) (10514,)
(10526, 54) (10526,)
(10516, 54) (10516,)
(10549, 54) (10549,)
(10519, 54) (10519,)
{'n_estimators': 800, 'criterion': 'gini', 'max_depth': 9, 'n_jobs': -1}
(10523, 54) (10523,)
(10513, 54) (10513,)
(10513, 54) (10513,)
(10513, 54) (10513,)
(10524, 54) (10524,)
(10514, 54) (10514,)
(10526, 54) (10526,)
(10516, 54) (10516,)
(10549, 54) (10549,)
(10519, 54) (10519,)
{'algorithm': 'ball_tree', 'leaf_size': 10, 'n_neighbors': 80, 'p': 1, 'n_jobs': -1}
(10523, 54) (10523,)
(10513, 54) (10513,)
(10513, 54) (10513,)
(10513, 54) (10513,)
(10524, 54) (10524,)
(10514, 54) (10514,)
(10526, 54) (10526,)
(10516, 54) (10516,)
(10549, 54) (10549,)
(10519, 54) (10519,)


In [67]:
steming = Porter
morph = pymorphy2.MorphAnalyzer()
lemmatizer = WordNetLemmatizer()
#___________________________________________
def meta_tag(doc_id):
    
    result = str()
    file_ = open('./content/' + str(doc_id) + '.dat', 'r', encoding = 'utf-8')
    text_ = file_.read()
    soup = BeautifulSoup(text_, 'html')
    
    for i in soup.find_all('meta', attrs = {'name' : 'description'}):
        if i.find_all('content'):
            result += i.attrs['content'] + ' '

    result = result.lower()
    new_string = str()
    
    result = steming.stem(result)
    
    for i in re.findall(r'\b[а-я]{1,20}\b', result): 
        new_string += (morph.parse(i)[0].normal_form) + ' ' 

    for i in re.findall(r'\b[a-z]{1,20}\b', result):
        new_string += (lemmatizer.lemmatize(i)) + ' ' 
    
    
    return new_string

In [63]:
df = pd.read_csv('./data/unversal_table.csv')