In [11]:
import os 
import numpy as np
import pandas as pd 
import xgboost as xgb 
from collections import Counter
from nltk.corpus import stopwords

In [10]:
stops = set(stopwords.words("english"))

def word_match_share(row):
    q1words = {}
    q2words = {}
    for word in str(row['text']).lower().split():
        if word not in stops:
            q1words[word] = 1
    for word in str(row['reason']).lower().split():
        if word not in stops:
            q2words[word] = 1
    if len(q1words) == 0 or len(q2words) == 0:
        return 0
    shared_words_in_q1 = [w for w in q1words.keys() if w in q2words]
    shared_words_in_q2 = [w for w in q2words.keys() if w in q1words]
    R = (len(shared_words_in_q1) + len(shared_words_in_q2))/(len(q1words) + len(q2words))
    return R

In [44]:
def tfidf_word_match_share(row):
    q1words = {}
    q2words = {}
    for word in str(row['text']).lower().split():
        if word not in stops:
            q1words[word] = 1
    for word in str(row['reason']).lower().split():
        if word not in stops:
            q2words[word] = 1
    if len(q1words) == 0 or len(q2words) == 0:
        return 0
    
    shared_weights = [
        weights.get(w, 0) for w in q1words.keys() if w in q2words
    ] + [weights.get(w, 0) for w in q2words.keys() if w in q1words]
    
    total_weights = [
        weights.get(w, 0) for w in q1words
    ] + [weights.get(w, 0) for w in q2words]
    
    R = np.sum(shared_weights) / np.sum(total_weights)
    return R

In [12]:
def get_weight(count, eps=10000, min_count=2):
    if count < min_count:
        return 0
    else:
        return 1 / (count + eps)

In [14]:
def get_train_params_for_xgboost(df):
    word_match = df.apply(word_match_share, axis=1)
    tfidf_word_match = df.apply(tfidf_word_match_share, axis=1)
    return word_match, tfidf_word_match

In [48]:
def prepare_data(df, p = 0.165):
    data = pd.DataFrame()
    word_match, tfidf_word_match = get_train_params_for_xgboost(df)
    data['word_match'] = word_match
    data['tfidf_word_match'] = tfidf_word_match
    targets = df['label'].values
    
    pos = data[targets == 1]
    neg = data[targets == 0]
    scale = ((len(pos) / (len(pos) + len(neg))) / p) - 1
    
    while scale > 1:
        neg = pd.concat([neg, neg])
        scale -=1
    neg = pd.concat([neg, neg[:int(scale * len(neg))]])
    print(len(pos) / (len(pos) + len(neg)))
    
    data = pd.concat([pos, neg])
    targets = (np.zeros(len(pos)) + 1).tolist() + np.zeros(len(neg)).tolist()
    del pos, neg 
    return data, np.array(targets)

In [17]:
def fit(x_train, y_train, x_valid, y_valid, epochs=2000):
    params = {}
    params['objective'] = 'binary:logistic'
    params['eval_metric'] = 'logloss'
    params['eta'] = 0.02
    params['max_depth'] = 4 
    
    d_train = xgb.DMatrix(x_train, label=y_train)
    d_test = xgb.DMatrix(x_valid, label=y_valid)
    
    watchlist = [(d_test, 'train'), (d_test, 'test')]
    bst = xgb.train(params, d_train, epochs, watchlist, verbose_eval=10)
    return bst

In [18]:
def test_accuracy(bst, df_test):
    x_test = pd.DataFrame()
    y_test = np.array(df_test['label'].values)
    x_test['word_match'] = df_test.apply(word_match_share, axis=1)
    x_test['tfidf_word_match'] = df_test.apply(tfidf_word_match_share, axis=1)
    d_test = xgb.DMatrix(x_test)
    
    predictions = bst.predict(d_test)
    preds = []
    for pred in predictions:
        preds.append(
            0 if pred < 0.5 else 1 
        )
    return (np.array(preds) == y_test).sum() / len(y_test)

### Loading All the data we have got so far

In [19]:
!gio open .

In [37]:
sample1 = pd.read_csv('samples/sample1.csv').drop(['Unnamed: 0'], axis=1)
sample2 = pd.read_csv('samples/sample2.csv').drop(['Unnamed: 0'], axis=1)
only_pos = pd.read_csv('../Data/train.csv')
position_changed = pd.read_csv(
    '../Data/train_dataset_random_negative_sample_from_iteself.csv'
).drop(['Unnamed: 0'], axis=1)
test_data = pd.read_csv('../Data/evaluation.csv')

In [38]:
sample2.columns = ['text', 'reason', 'label']
sample2.dropna(axis=0, inplace=True)

In [39]:
position_changed['label'].value_counts()

1    1237
0     824
Name: label, dtype: int64

#### Doing with only position changed data

In [53]:
position_changed = position_changed.sample(frac=1, random_state=1234)

es = pd.Series(
    position_changed['text'].to_list() + position_changed['reason'].to_list()).astype(str)

words = (" ".join(es)).lower().split()
counts = Counter(words)
weights = {word: get_weight(count) for word, count in counts.items()}

x_train, y_train = prepare_data(position_changed)

0.18646367199276453


In [57]:
from sklearn.model_selection import train_test_split

x_train, x_valid, y_train, y_valid = train_test_split(
    x_train, y_train, test_size=0.2, random_state=4242)

In [61]:
xgb_app1 = fit(x_train, y_train, x_valid, y_valid)

[0]	train-logloss:0.67665	test-logloss:0.67665
[10]	train-logloss:0.54139	test-logloss:0.54139
[20]	train-logloss:0.44480	test-logloss:0.44480
[30]	train-logloss:0.37357	test-logloss:0.37357
[40]	train-logloss:0.31992	test-logloss:0.31992
[50]	train-logloss:0.27910	test-logloss:0.27910
[60]	train-logloss:0.24762	test-logloss:0.24762
[70]	train-logloss:0.22325	test-logloss:0.22325




[80]	train-logloss:0.20411	test-logloss:0.20411
[90]	train-logloss:0.18895	test-logloss:0.18895
[100]	train-logloss:0.17699	test-logloss:0.17699
[110]	train-logloss:0.16757	test-logloss:0.16757
[120]	train-logloss:0.16012	test-logloss:0.16012
[130]	train-logloss:0.15420	test-logloss:0.15420
[140]	train-logloss:0.14917	test-logloss:0.14917
[150]	train-logloss:0.14498	test-logloss:0.14498
[160]	train-logloss:0.14155	test-logloss:0.14155
[170]	train-logloss:0.13867	test-logloss:0.13867
[180]	train-logloss:0.13628	test-logloss:0.13628
[190]	train-logloss:0.13440	test-logloss:0.13440
[200]	train-logloss:0.13280	test-logloss:0.13280
[210]	train-logloss:0.13143	test-logloss:0.13143
[220]	train-logloss:0.13026	test-logloss:0.13026
[230]	train-logloss:0.12937	test-logloss:0.12937
[240]	train-logloss:0.12851	test-logloss:0.12851
[250]	train-logloss:0.12766	test-logloss:0.12766
[260]	train-logloss:0.12690	test-logloss:0.12690
[270]	train-logloss:0.12616	test-logloss:0.12616
[280]	train-logloss:0.

[1740]	train-logloss:0.09847	test-logloss:0.09847
[1750]	train-logloss:0.09841	test-logloss:0.09841
[1760]	train-logloss:0.09836	test-logloss:0.09836
[1770]	train-logloss:0.09832	test-logloss:0.09832
[1780]	train-logloss:0.09827	test-logloss:0.09827
[1790]	train-logloss:0.09821	test-logloss:0.09821
[1800]	train-logloss:0.09821	test-logloss:0.09821
[1810]	train-logloss:0.09816	test-logloss:0.09816
[1820]	train-logloss:0.09816	test-logloss:0.09816
[1830]	train-logloss:0.09815	test-logloss:0.09815
[1840]	train-logloss:0.09815	test-logloss:0.09815
[1850]	train-logloss:0.09815	test-logloss:0.09815
[1860]	train-logloss:0.09815	test-logloss:0.09815
[1870]	train-logloss:0.09814	test-logloss:0.09814
[1880]	train-logloss:0.09812	test-logloss:0.09812
[1890]	train-logloss:0.09811	test-logloss:0.09811
[1900]	train-logloss:0.09807	test-logloss:0.09807
[1910]	train-logloss:0.09804	test-logloss:0.09804
[1920]	train-logloss:0.09802	test-logloss:0.09802
[1930]	train-logloss:0.09798	test-logloss:0.09798


In [62]:
ts = pd.Series(
    test_data['text'].to_list() + test_data['reason'].to_list()).astype(str)

words = (" ".join(ts)).lower().split()
counts = Counter(words)
weights = {word: get_weight(count) for word, count in counts.items()}

In [63]:
test_accuracy(xgb_app1, df_test=test_data)

0.6613333333333333

### Approach 2

In [64]:
def perform_exp(df_train, df_test, epochs=2000):
    # train
    
    train = pd.Series(
    df_train['text'].to_list() + df_train['reason'].to_list()).astype(str)
    words = (" ".join(train)).lower().split()
    counts = Counter(words)
    weights = {word: get_weight(count) for word, count in counts.items()}

    x_train, y_train = prepare_data(df_train)
    x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=4242)
    bst = fit(x_train, y_train, x_valid, y_valid)
    
    # testing
    
    test = pd.Series(
    df_test['text'].to_list() + df_test['reason'].to_list()).astype(str)

    words = (" ".join(test)).lower().split()
    counts = Counter(words)
    weights = {word: get_weight(count) for word, count in counts.items()}
    test_acc = test_accuracy(bst, df_test=df_test)
    print(f"Accuracy: {test_acc}")
    return bst

In [65]:
bst_approach1 = perform_exp(position_changed, test_data)

0.18646367199276453
[0]	train-logloss:0.67662	test-logloss:0.67662
[10]	train-logloss:0.54168	test-logloss:0.54168
[20]	train-logloss:0.44597	test-logloss:0.44597
[30]	train-logloss:0.37536	test-logloss:0.37536
[40]	train-logloss:0.32202	test-logloss:0.32202
[50]	train-logloss:0.28143	test-logloss:0.28143
[60]	train-logloss:0.24977	test-logloss:0.24977
[70]	train-logloss:0.22478	test-logloss:0.22478
[80]	train-logloss:0.20535	test-logloss:0.20535




[90]	train-logloss:0.18990	test-logloss:0.18990
[100]	train-logloss:0.17771	test-logloss:0.17771
[110]	train-logloss:0.16801	test-logloss:0.16801
[120]	train-logloss:0.16044	test-logloss:0.16044
[130]	train-logloss:0.15448	test-logloss:0.15448
[140]	train-logloss:0.14956	test-logloss:0.14956
[150]	train-logloss:0.14558	test-logloss:0.14558
[160]	train-logloss:0.14231	test-logloss:0.14231
[170]	train-logloss:0.13976	test-logloss:0.13976
[180]	train-logloss:0.13768	test-logloss:0.13768
[190]	train-logloss:0.13591	test-logloss:0.13591
[200]	train-logloss:0.13447	test-logloss:0.13447
[210]	train-logloss:0.13342	test-logloss:0.13342
[220]	train-logloss:0.13260	test-logloss:0.13260
[230]	train-logloss:0.13173	test-logloss:0.13173
[240]	train-logloss:0.13110	test-logloss:0.13110
[250]	train-logloss:0.13054	test-logloss:0.13054
[260]	train-logloss:0.13007	test-logloss:0.13007
[270]	train-logloss:0.12958	test-logloss:0.12958
[280]	train-logloss:0.12914	test-logloss:0.12914
[290]	train-logloss:0

[1750]	train-logloss:0.10482	test-logloss:0.10482
[1760]	train-logloss:0.10480	test-logloss:0.10480
[1770]	train-logloss:0.10473	test-logloss:0.10473
[1780]	train-logloss:0.10466	test-logloss:0.10466
[1790]	train-logloss:0.10458	test-logloss:0.10458
[1800]	train-logloss:0.10456	test-logloss:0.10456
[1810]	train-logloss:0.10452	test-logloss:0.10452
[1820]	train-logloss:0.10451	test-logloss:0.10451
[1830]	train-logloss:0.10450	test-logloss:0.10450
[1840]	train-logloss:0.10450	test-logloss:0.10450
[1850]	train-logloss:0.10446	test-logloss:0.10446
[1860]	train-logloss:0.10444	test-logloss:0.10444
[1870]	train-logloss:0.10442	test-logloss:0.10442
[1880]	train-logloss:0.10444	test-logloss:0.10444
[1890]	train-logloss:0.10440	test-logloss:0.10440
[1900]	train-logloss:0.10436	test-logloss:0.10436
[1910]	train-logloss:0.10431	test-logloss:0.10431
[1920]	train-logloss:0.10435	test-logloss:0.10435
[1930]	train-logloss:0.10432	test-logloss:0.10432
[1940]	train-logloss:0.10427	test-logloss:0.10427


**Approch 2 Concating some samples of Sample2 with position wise**

In [72]:
sample1_sample = sample1.sample(250, random_state=1200).reset_index(drop=True)
sample2_sample = sample2.sample(400, random_state=1200).reset_index(drop=True)

In [75]:
approach2_train_df = pd.concat([
    position_changed, sample1_sample, sample2_sample], axis=0).reset_index(drop=True)

In [77]:
approach2_train_df = approach2_train_df.sample(frac=1)

In [78]:
bst_approach2 = perform_exp(approach2_train_df, test_data)

0.192050923769601
[0]	train-logloss:0.67910	test-logloss:0.67910
[10]	train-logloss:0.56467	test-logloss:0.56467
[20]	train-logloss:0.48371	test-logloss:0.48371
[30]	train-logloss:0.42454	test-logloss:0.42454
[40]	train-logloss:0.38042	test-logloss:0.38042
[50]	train-logloss:0.34707	test-logloss:0.34707


  R = np.sum(shared_weights) / np.sum(total_weights)


[60]	train-logloss:0.32181	test-logloss:0.32181
[70]	train-logloss:0.30245	test-logloss:0.30245
[80]	train-logloss:0.28688	test-logloss:0.28688
[90]	train-logloss:0.27478	test-logloss:0.27478
[100]	train-logloss:0.26542	test-logloss:0.26542
[110]	train-logloss:0.25822	test-logloss:0.25822
[120]	train-logloss:0.25249	test-logloss:0.25249
[130]	train-logloss:0.24812	test-logloss:0.24812
[140]	train-logloss:0.24482	test-logloss:0.24482
[150]	train-logloss:0.24254	test-logloss:0.24254
[160]	train-logloss:0.24068	test-logloss:0.24068
[170]	train-logloss:0.23913	test-logloss:0.23913
[180]	train-logloss:0.23797	test-logloss:0.23797
[190]	train-logloss:0.23720	test-logloss:0.23720
[200]	train-logloss:0.23649	test-logloss:0.23649
[210]	train-logloss:0.23567	test-logloss:0.23567
[220]	train-logloss:0.23483	test-logloss:0.23483
[230]	train-logloss:0.23385	test-logloss:0.23385
[240]	train-logloss:0.23283	test-logloss:0.23283
[250]	train-logloss:0.23181	test-logloss:0.23181
[260]	train-logloss:0.23

[1720]	train-logloss:0.19100	test-logloss:0.19100
[1730]	train-logloss:0.19094	test-logloss:0.19094
[1740]	train-logloss:0.19087	test-logloss:0.19087
[1750]	train-logloss:0.19077	test-logloss:0.19077
[1760]	train-logloss:0.19068	test-logloss:0.19068
[1770]	train-logloss:0.19061	test-logloss:0.19061
[1780]	train-logloss:0.19055	test-logloss:0.19055
[1790]	train-logloss:0.19053	test-logloss:0.19053
[1800]	train-logloss:0.19048	test-logloss:0.19048
[1810]	train-logloss:0.19042	test-logloss:0.19042
[1820]	train-logloss:0.19039	test-logloss:0.19039
[1830]	train-logloss:0.19036	test-logloss:0.19036
[1840]	train-logloss:0.19029	test-logloss:0.19029
[1850]	train-logloss:0.19024	test-logloss:0.19024
[1860]	train-logloss:0.19007	test-logloss:0.19007
[1870]	train-logloss:0.18984	test-logloss:0.18984
[1880]	train-logloss:0.18967	test-logloss:0.18967
[1890]	train-logloss:0.18959	test-logloss:0.18959
[1900]	train-logloss:0.18948	test-logloss:0.18948
[1910]	train-logloss:0.18941	test-logloss:0.18941


**Approch 3**

In [80]:
sample1_sample = sample1.sample(500, random_state=1200).reset_index(drop=True)
sample2_sample = sample2.sample(500, random_state=1200).reset_index(drop=True)

approach3_train_df = pd.concat([
    position_changed, sample1_sample, sample2_sample], axis=0).reset_index(drop=True)

approach3_train_df = approach3_train_df.sample(frac=1)
bst_approach3 = perform_exp(approach3_train_df, test_data, 3000)

0.1896366702437529
[0]	train-logloss:0.68008	test-logloss:0.68008
[10]	train-logloss:0.57289	test-logloss:0.57289
[20]	train-logloss:0.49706	test-logloss:0.49706
[30]	train-logloss:0.44144	test-logloss:0.44144
[40]	train-logloss:0.39995	test-logloss:0.39995


  R = np.sum(shared_weights) / np.sum(total_weights)


[50]	train-logloss:0.36848	test-logloss:0.36848
[60]	train-logloss:0.34430	test-logloss:0.34430
[70]	train-logloss:0.32578	test-logloss:0.32578
[80]	train-logloss:0.31120	test-logloss:0.31120
[90]	train-logloss:0.29983	test-logloss:0.29983
[100]	train-logloss:0.29091	test-logloss:0.29091
[110]	train-logloss:0.28385	test-logloss:0.28385
[120]	train-logloss:0.27819	test-logloss:0.27819
[130]	train-logloss:0.27378	test-logloss:0.27378
[140]	train-logloss:0.27043	test-logloss:0.27043
[150]	train-logloss:0.26767	test-logloss:0.26767
[160]	train-logloss:0.26551	test-logloss:0.26551
[170]	train-logloss:0.26378	test-logloss:0.26378
[180]	train-logloss:0.26231	test-logloss:0.26231
[190]	train-logloss:0.26110	test-logloss:0.26110
[200]	train-logloss:0.26001	test-logloss:0.26001
[210]	train-logloss:0.25907	test-logloss:0.25907
[220]	train-logloss:0.25839	test-logloss:0.25839
[230]	train-logloss:0.25785	test-logloss:0.25785
[240]	train-logloss:0.25706	test-logloss:0.25706
[250]	train-logloss:0.256

[1710]	train-logloss:0.22171	test-logloss:0.22171
[1720]	train-logloss:0.22153	test-logloss:0.22153
[1730]	train-logloss:0.22138	test-logloss:0.22138
[1740]	train-logloss:0.22128	test-logloss:0.22128
[1750]	train-logloss:0.22113	test-logloss:0.22113
[1760]	train-logloss:0.22103	test-logloss:0.22103
[1770]	train-logloss:0.22095	test-logloss:0.22095
[1780]	train-logloss:0.22090	test-logloss:0.22090
[1790]	train-logloss:0.22076	test-logloss:0.22076
[1800]	train-logloss:0.22068	test-logloss:0.22068
[1810]	train-logloss:0.22054	test-logloss:0.22054
[1820]	train-logloss:0.22043	test-logloss:0.22043
[1830]	train-logloss:0.22035	test-logloss:0.22035
[1840]	train-logloss:0.22024	test-logloss:0.22024
[1850]	train-logloss:0.22014	test-logloss:0.22014
[1860]	train-logloss:0.22001	test-logloss:0.22001
[1870]	train-logloss:0.21994	test-logloss:0.21994
[1880]	train-logloss:0.21985	test-logloss:0.21985
[1890]	train-logloss:0.21977	test-logloss:0.21977
[1900]	train-logloss:0.21967	test-logloss:0.21967


**Final approach concatinating only +ves and sample1, sample2**

In [82]:
approach4_train_df = pd.concat([
    only_pos, sample1, sample2], axis=0).reset_index(drop=True)

approach4_train_df = approach4_train_df.sample(frac=1).reset_index(drop=True)
bst_approach4 = perform_exp(approach4_train_df, test_data, 3000)

0.19392171622130222
[0]	train-logloss:0.68178	test-logloss:0.68178
[10]	train-logloss:0.58915	test-logloss:0.58915
[20]	train-logloss:0.52411	test-logloss:0.52411
[30]	train-logloss:0.47704	test-logloss:0.47704


  R = np.sum(shared_weights) / np.sum(total_weights)
  R = np.sum(shared_weights) / np.sum(total_weights)


[40]	train-logloss:0.44262	test-logloss:0.44262
[50]	train-logloss:0.41678	test-logloss:0.41678
[60]	train-logloss:0.39748	test-logloss:0.39748
[70]	train-logloss:0.38281	test-logloss:0.38281
[80]	train-logloss:0.37163	test-logloss:0.37163
[90]	train-logloss:0.36313	test-logloss:0.36313
[100]	train-logloss:0.35667	test-logloss:0.35667
[110]	train-logloss:0.35179	test-logloss:0.35179
[120]	train-logloss:0.34787	test-logloss:0.34787
[130]	train-logloss:0.34466	test-logloss:0.34466
[140]	train-logloss:0.34232	test-logloss:0.34232
[150]	train-logloss:0.34058	test-logloss:0.34058
[160]	train-logloss:0.33911	test-logloss:0.33911
[170]	train-logloss:0.33793	test-logloss:0.33793
[180]	train-logloss:0.33692	test-logloss:0.33692
[190]	train-logloss:0.33620	test-logloss:0.33620
[200]	train-logloss:0.33551	test-logloss:0.33551
[210]	train-logloss:0.33494	test-logloss:0.33494
[220]	train-logloss:0.33450	test-logloss:0.33450
[230]	train-logloss:0.33404	test-logloss:0.33404
[240]	train-logloss:0.3332

[1700]	train-logloss:0.30229	test-logloss:0.30229
[1710]	train-logloss:0.30218	test-logloss:0.30218
[1720]	train-logloss:0.30209	test-logloss:0.30209
[1730]	train-logloss:0.30202	test-logloss:0.30202
[1740]	train-logloss:0.30190	test-logloss:0.30190
[1750]	train-logloss:0.30180	test-logloss:0.30180
[1760]	train-logloss:0.30170	test-logloss:0.30170
[1770]	train-logloss:0.30165	test-logloss:0.30165
[1780]	train-logloss:0.30153	test-logloss:0.30153
[1790]	train-logloss:0.30138	test-logloss:0.30138
[1800]	train-logloss:0.30128	test-logloss:0.30128
[1810]	train-logloss:0.30118	test-logloss:0.30118
[1820]	train-logloss:0.30108	test-logloss:0.30108
[1830]	train-logloss:0.30098	test-logloss:0.30098
[1840]	train-logloss:0.30097	test-logloss:0.30097
[1850]	train-logloss:0.30088	test-logloss:0.30088
[1860]	train-logloss:0.30082	test-logloss:0.30082
[1870]	train-logloss:0.30079	test-logloss:0.30079
[1880]	train-logloss:0.30070	test-logloss:0.30070
[1890]	train-logloss:0.30061	test-logloss:0.30061


**Putting everything**

In [84]:
final_train = pd.concat([
    only_pos, sample1, sample2, position_changed], axis=0).reset_index(drop=True)

final_train = final_train.sample(frac=1).reset_index(drop=True)
bst_approach_final = perform_exp(final_train, test_data, 3000)

0.18777043953541334
[0]	train-logloss:0.68101	test-logloss:0.68101
[10]	train-logloss:0.58184	test-logloss:0.58184
[20]	train-logloss:0.51187	test-logloss:0.51187


  R = np.sum(shared_weights) / np.sum(total_weights)
  R = np.sum(shared_weights) / np.sum(total_weights)


[30]	train-logloss:0.46117	test-logloss:0.46117
[40]	train-logloss:0.42346	test-logloss:0.42346
[50]	train-logloss:0.39505	test-logloss:0.39505
[60]	train-logloss:0.37326	test-logloss:0.37326
[70]	train-logloss:0.35656	test-logloss:0.35656
[80]	train-logloss:0.34386	test-logloss:0.34386
[90]	train-logloss:0.33407	test-logloss:0.33407
[100]	train-logloss:0.32650	test-logloss:0.32650
[110]	train-logloss:0.32073	test-logloss:0.32073
[120]	train-logloss:0.31611	test-logloss:0.31611
[130]	train-logloss:0.31243	test-logloss:0.31243
[140]	train-logloss:0.30966	test-logloss:0.30966
[150]	train-logloss:0.30748	test-logloss:0.30748
[160]	train-logloss:0.30571	test-logloss:0.30571
[170]	train-logloss:0.30433	test-logloss:0.30433
[180]	train-logloss:0.30318	test-logloss:0.30318
[190]	train-logloss:0.30244	test-logloss:0.30244
[200]	train-logloss:0.30174	test-logloss:0.30174
[210]	train-logloss:0.30117	test-logloss:0.30117
[220]	train-logloss:0.30049	test-logloss:0.30049
[230]	train-logloss:0.29973

[1690]	train-logloss:0.25701	test-logloss:0.25701
[1700]	train-logloss:0.25687	test-logloss:0.25687
[1710]	train-logloss:0.25671	test-logloss:0.25671
[1720]	train-logloss:0.25655	test-logloss:0.25655
[1730]	train-logloss:0.25638	test-logloss:0.25638
[1740]	train-logloss:0.25626	test-logloss:0.25626
[1750]	train-logloss:0.25609	test-logloss:0.25609
[1760]	train-logloss:0.25598	test-logloss:0.25598
[1770]	train-logloss:0.25582	test-logloss:0.25582
[1780]	train-logloss:0.25552	test-logloss:0.25552
[1790]	train-logloss:0.25541	test-logloss:0.25541
[1800]	train-logloss:0.25525	test-logloss:0.25525
[1810]	train-logloss:0.25513	test-logloss:0.25513
[1820]	train-logloss:0.25488	test-logloss:0.25488
[1830]	train-logloss:0.25469	test-logloss:0.25469
[1840]	train-logloss:0.25453	test-logloss:0.25453
[1850]	train-logloss:0.25437	test-logloss:0.25437
[1860]	train-logloss:0.25407	test-logloss:0.25407
[1870]	train-logloss:0.25388	test-logloss:0.25388
[1880]	train-logloss:0.25371	test-logloss:0.25371
