In [73]:
import numpy as np
import pandas as pd
import jieba
import textdistance as td
from tqdm import tqdm
from gensim import corpora,models,similarities
from gensim.test.utils import common_texts
from gensim.models import Word2Vec,TfidfModel
from gensim import corpora

In [82]:
#文本处理，有些可能用不到
import re
import string
import jieba
with open("baidu_stopwords.txt",encoding="utf-8") as f:
    stopword_list=f.readlines()

def tokenize_text(text):
    tokens=jieba.cut(text)
    tokens=[token.strip() for token in tokens]
    return tokens

def remove_special_characters(text):
    tokens=tokenize_text(text)
    pattern=re.compile('[{}]'.format(re.escape(string.punctuation)))
    filtered_tokens=filter(None,[pattern.sub('',token) for token in tokens])
    filtered_text=''.join(filtered_tokens)
    return filtered_text

#去除停用词
def remove_stopwords(text):
    tokens=tokenize_text(text)
    filtered_tokens=[token for token in tokens if token not in stopword_list]
    filtered_text=''.join(filtered_tokens)
    return filtered_text

def normalize_corpus(corpus,tokenize=False):
    normalize_corpus=[]
    for text in corpus:
        text=remove_special_characters(text)
        text=remove_stopwords(text)
        if tokenize:
            normalize_corpus.append(tokenize_text(text))
        else:
            normalize_corpus.append(text)
    return normalize_corpus

In [89]:
#用于提取tfidf内积作为对比相似度
from sklearn.feature_extraction.text import TfidfVectorizer

def tfidf_extractor(corpus,ngram_range=(1,1)):
    vectorizer=TfidfVectorizer(min_df=1,
                              norm='l2',
                              smooth_idf=True,
                              use_idf=True,
                              ngram_range=ngram_range)
    features=vectorizer.fit_transform(corpus)
    return vectorizer,features


def participle_text(text):
    words_list=jieba.lcut(text)
    return ' '.join(words_list)


#得到tfidf特征向量的内积距离
def get_tfidfvec_dis(tfidf_vectorizer,text1,text2):
    fit_text1=tfidf_vectorizer.transform([participle_text(text1)])
    fit_text2=tfidf_vectorizer.transform([participle_text(text2)])
    vec1=fit_text1.toarray()[0]
    vec2=fit_text2.toarray()[0]
    return np.dot(vec1,vec2)

def tfidfvec_dis_list(tfidf_vectorizer,df):
    dis_list=[]
    for text1,text2 in zip(df['text1'],df['text2']):
        dis_list.append(get_tfidfvec_dis(tfidf_vectorizer,text1,text2))
    return dis_list

In [74]:
train_raw = pd.read_csv('paws-x-zh/paws-x-zh/train.tsv', sep='\t',names=['text_a', 'text_b', 'label'])
test_raw = pd.read_csv('paws-x-zh/paws-x-zh/test.tsv', sep='\t',names=['text_a', 'text_b', 'label'])
test_raw['label'] = -1
train_raw = train_raw.dropna()
test_raw = test_raw.dropna()

In [84]:
A_corpus = normalize_corpus(train_raw['text_a'].values.tolist())
B_corpus = normalize_corpus(train_raw['text_b'].values.tolist())
corpusall = A_corpus+B_corpus
tokenized_corpus=[' '.join(jieba.lcut(text)) for text in corpusall]

In [90]:
tfidf_vectorizer,tfidf_train_features=tfidf_extractor(tokenized_corpus)

In [95]:
get_tfidfvec_dis(tfidf_vectorizer,'1975年的NBA赛季 - 76赛季是全美篮球协会的第30个赛季。','1975-76赛季的全国篮球协会是NBA的第30个赛季。')

0.8752516213468617

In [9]:
train_raw['len_a'] = train_raw['text_a'].apply(lambda x:len(x))
p = np.percentile(train_raw['len_a'].tolist(), [50,75,90,97.5]) # return 50th percentile, e.g median.
p

array([42., 55., 69., 88.])

In [100]:
def cut(content):
    try:
        seg_list = jieba.lcut(content, cut_all=True)
    except AttributeError as ex:
        print(content)
        raise ex
    return seg_list

#text1和text2长度差
def len_diff(text1,text2):
    return abs(len(text1)-len(text2))
def both_num(list1,list2):
    dict1,dict2,ans={},{},0
    for i in list1:
        dict1[i]=list1.count(i)
    for i in list2:
        dict2[i]=list2.count(i)
    for k,v in dict1.items():
        tmp=0 if k not in list2 else dict2[k]
        ans+=min(v,tmp)
    return ans

def both_words_num(text1,text2):
    a,b=jieba.lcut(text1),jieba.lcut(text2)
    return both_num(a,b)

# text1和text2共有字符的个数
def both_chars_num(text1,text2):
    a,b=[i for i in text1],[i for i in text2]
    return both_num(a,b)


#text1与text2共有单词的个数 / text1字符个数
def both_words_divideby_char1(text1,text2):
    return both_words_num(text1,text2)/len(text1)

#text1与text2共有单词的个数 / text2字符个数
def both_words_divideby_char2(text1,text2):
    return both_words_num(text1,text2)/len(text2)

def editDistance(word1: str, word2: str) -> int:
    n1 = len(word1)
    n2 = len(word2)
    dp = [[0] * (n2 + 1) for _ in range(n1 + 1)]
       
       # init
    for j in range(1, n2 + 1):
        dp[0][j] = j
    for i in range(1, n1 + 1):
        dp[i][0] = i
       
    for i in range(1, n1 + 1):
        for j in range(1, n2 + 1):
            if word1[i-1] == word2[j-1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = min(dp[i][j-1], dp[i-1][j], dp[i-1][j-1] ) + 1      
    return dp[-1][-1]
def data_anaysis(df):
    # 距离
    df['edit_dist'] = df.apply(lambda row: editDistance(row['text_a'], row['text_b']), axis=1)
    df['jaccard_dist'] = df.apply(lambda row: td.jaccard(row['text_a'], row['text_b']), axis=1)
    df['tversky_dist'] = df.apply(lambda row: td.tversky(row['text_a'], row['text_b']), axis=1)
    df['jaro_dist'] = df.apply(lambda row: td.jaro(row['text_a'], row['text_b']), axis=1)
    df['tfidf_dist'] = df.apply(lambda row: get_tfidfvec_dis(tfidf_vectorizer,row['text_a'], row['text_b']), axis=1)
    # 长度差
    df['len_diff'] = df.apply(lambda row: len_diff(row['text_a'], row['text_b']), axis=1)
    # 共有单词的个数
    df['both_words_num'] = df.apply(lambda row: both_words_num(row['text_a'], row['text_b']), axis=1)
    df['both_chars_num'] = df.apply(lambda row: both_chars_num(row['text_a'], row['text_b']), axis=1)
    # 单词个数比
    df['both_words_divideby_char1'] = df.apply(lambda row: both_words_divideby_char1(row['text_a'], row['text_b']), axis=1)
    df['both_words_divideby_char2'] = df.apply(lambda row: both_words_divideby_char2(row['text_a'], row['text_b']), axis=1)
    
    return df

In [101]:
train = data_anaysis(train_raw)
test = data_anaysis(test_raw)

In [102]:
train

Unnamed: 0,text_a,text_b,label,edit_dist,jaccard_dist,tversky_dist,jaro_dist,tfidf_dist,len_diff,both_words_num,both_chars_num,both_words_divideby_char1,both_words_divideby_char2
0,1560年10月，他在巴黎秘密会见了英国大使Nicolas Throckmorton，要求他...,1560年10月，他在巴黎秘密会见了英国大使尼古拉斯·斯罗克莫顿，并要求他通过英格兰返回苏格...,0,28,0.486111,0.486111,0.760901,0.762949,5,20,35,0.357143,0.392157
1,1975年的NBA赛季 - 76赛季是全美篮球协会的第30个赛季。,1975-76赛季的全国篮球协会是NBA的第30个赛季。,1,16,0.771429,0.771429,0.796011,0.875252,6,15,27,0.441176,0.535714
2,还有具体的讨论，公众形象辩论和项目讨论。,还有公开讨论，特定档案讨论和项目讨论。,0,8,0.500000,0.500000,0.675506,0.620696,1,7,13,0.350000,0.368421
3,当可以保持相当的流速时，结果很高。,当可以保持可比较的流速时，结果很高。,1,3,0.750000,0.750000,0.905229,0.788200,1,11,15,0.647059,0.611111
4,它是Akmola地区Zerendi区的所在地。,它是Akmola地区Zerendi区的所在地。,1,0,1.000000,1.000000,1.000000,1.000000,0,9,23,0.391304,0.391304
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49396,``我们的学校是精神和精神，热爱（时间路径）是我们的第一承诺''。,``我们的学校属于时间和精神，对Rehit的爱（精神之路）是我们的第一承诺。 “”,0,19,0.608696,0.608696,0.772375,0.760011,8,18,28,0.545455,0.439024
49397,她于6月24日在科克，并于7月8日抵达。,她于6月24日在科克，并于7月8日抵达唐斯。,1,2,0.909091,0.909091,0.969697,0.786773,2,16,20,0.800000,0.727273
49398,Cornelia Stuyvesant Vanderbilt（George和Edith Va...,John John F. A. Cecil（George和Cornelia Stuyvesa...,0,68,0.891667,0.891667,0.748008,0.827630,7,38,107,0.345455,0.324786
49399,第三季于2010年6月7日首播，第四季是混合情侣竞赛系统。,第四季于2010年6月7日首播。就像第三季一样，比赛系统是混合情侣。,0,16,0.800000,0.800000,0.760236,0.849129,5,16,28,0.551724,0.470588


In [103]:
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgb

# 建模
fretures = ['len_diff','both_words_num','both_chars_num','both_words_divideby_char1','both_words_divideby_char2','tfidf_dist','jaccard_dist','tversky_dist','jaro_dist','edit_dist']
X = train[fretures]
y = train['label']
test_features = test[fretures]

model = lgb.LGBMClassifier(num_leaves=128,
                           max_depth=10,
                           learning_rate=0.01,
                           n_estimators=2000,
                           subsample=0.8,
                           feature_fraction=0.8,
                           reg_alpha=0.5,
                           reg_lambda=0.5,
                           random_state=2022,
                           metric='auc',
                           boosting_type='gbdt',
                           subsample_freq=1,
                           bagging_fraction=0.8)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2022)
prob = []
mean_acc = 0
for k,(train_index, test_index) in enumerate(skf.split(X, y)):
    print(k)
    X_train, X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    # 训练
    print(y_val)
    model = model.fit(X_train,
                          y_train,
                          eval_set=[(X_val, y_val)],
                          eval_metric='auc',
                          verbose = True)
    # 正式预测
    test_y_pred = model.predict_proba(test_features)
    prob.append(test_y_pred)

0
0        0
21       0
22       1
26       1
29       1
        ..
49375    0
49384    1
49389    0
49396    0
49399    0
Name: label, Length: 9826, dtype: int64
[1]	valid_0's auc: 0.752973
[2]	valid_0's auc: 0.773637
[3]	valid_0's auc: 0.780437
[4]	valid_0's auc: 0.78273
[5]	valid_0's auc: 0.785849
[6]	valid_0's auc: 0.787501
[7]	valid_0's auc: 0.789968
[8]	valid_0's auc: 0.790381
[9]	valid_0's auc: 0.791125
[10]	valid_0's auc: 0.791925
[11]	valid_0's auc: 0.791945
[12]	valid_0's auc: 0.791614
[13]	valid_0's auc: 0.791177
[14]	valid_0's auc: 0.792351
[15]	valid_0's auc: 0.793195
[16]	valid_0's auc: 0.793089
[17]	valid_0's auc: 0.793794
[18]	valid_0's auc: 0.794211
[19]	valid_0's auc: 0.794348
[20]	valid_0's auc: 0.794608
[21]	valid_0's auc: 0.794884
[22]	valid_0's auc: 0.795272
[23]	valid_0's auc: 0.795315
[24]	valid_0's auc: 0.795869
[25]	valid_0's auc: 0.795582
[26]	valid_0's auc: 0.79565
[27]	valid_0's auc: 0.795472
[28]	valid_0's auc: 0.795766
[29]	valid_0's auc: 0.795788
[30]	va

[293]	valid_0's auc: 0.805968
[294]	valid_0's auc: 0.806016
[295]	valid_0's auc: 0.80603
[296]	valid_0's auc: 0.806054
[297]	valid_0's auc: 0.806112
[298]	valid_0's auc: 0.806101
[299]	valid_0's auc: 0.806112
[300]	valid_0's auc: 0.806173
[301]	valid_0's auc: 0.806208
[302]	valid_0's auc: 0.80623
[303]	valid_0's auc: 0.806267
[304]	valid_0's auc: 0.806314
[305]	valid_0's auc: 0.806285
[306]	valid_0's auc: 0.806341
[307]	valid_0's auc: 0.806359
[308]	valid_0's auc: 0.806378
[309]	valid_0's auc: 0.8064
[310]	valid_0's auc: 0.806402
[311]	valid_0's auc: 0.806441
[312]	valid_0's auc: 0.806454
[313]	valid_0's auc: 0.806463
[314]	valid_0's auc: 0.80651
[315]	valid_0's auc: 0.806536
[316]	valid_0's auc: 0.806553
[317]	valid_0's auc: 0.806572
[318]	valid_0's auc: 0.806638
[319]	valid_0's auc: 0.806678
[320]	valid_0's auc: 0.806736
[321]	valid_0's auc: 0.806763
[322]	valid_0's auc: 0.806779
[323]	valid_0's auc: 0.8068
[324]	valid_0's auc: 0.806816
[325]	valid_0's auc: 0.806832
[326]	valid_0's a

[576]	valid_0's auc: 0.810335
[577]	valid_0's auc: 0.810339
[578]	valid_0's auc: 0.810332
[579]	valid_0's auc: 0.810329
[580]	valid_0's auc: 0.810337
[581]	valid_0's auc: 0.810354
[582]	valid_0's auc: 0.810353
[583]	valid_0's auc: 0.810354
[584]	valid_0's auc: 0.810372
[585]	valid_0's auc: 0.810358
[586]	valid_0's auc: 0.810366
[587]	valid_0's auc: 0.81037
[588]	valid_0's auc: 0.810391
[589]	valid_0's auc: 0.810409
[590]	valid_0's auc: 0.810405
[591]	valid_0's auc: 0.810403
[592]	valid_0's auc: 0.810434
[593]	valid_0's auc: 0.810419
[594]	valid_0's auc: 0.810437
[595]	valid_0's auc: 0.810453
[596]	valid_0's auc: 0.810453
[597]	valid_0's auc: 0.810504
[598]	valid_0's auc: 0.810489
[599]	valid_0's auc: 0.810512
[600]	valid_0's auc: 0.810515
[601]	valid_0's auc: 0.810529
[602]	valid_0's auc: 0.810561
[603]	valid_0's auc: 0.810598
[604]	valid_0's auc: 0.810603
[605]	valid_0's auc: 0.810611
[606]	valid_0's auc: 0.810646
[607]	valid_0's auc: 0.810675
[608]	valid_0's auc: 0.81069
[609]	valid_

[888]	valid_0's auc: 0.812364
[889]	valid_0's auc: 0.812348
[890]	valid_0's auc: 0.812356
[891]	valid_0's auc: 0.812352
[892]	valid_0's auc: 0.812345
[893]	valid_0's auc: 0.812347
[894]	valid_0's auc: 0.81234
[895]	valid_0's auc: 0.812357
[896]	valid_0's auc: 0.812355
[897]	valid_0's auc: 0.812363
[898]	valid_0's auc: 0.812338
[899]	valid_0's auc: 0.812343
[900]	valid_0's auc: 0.81235
[901]	valid_0's auc: 0.812354
[902]	valid_0's auc: 0.812379
[903]	valid_0's auc: 0.812397
[904]	valid_0's auc: 0.812392
[905]	valid_0's auc: 0.81239
[906]	valid_0's auc: 0.81237
[907]	valid_0's auc: 0.812356
[908]	valid_0's auc: 0.812394
[909]	valid_0's auc: 0.812411
[910]	valid_0's auc: 0.812397
[911]	valid_0's auc: 0.812383
[912]	valid_0's auc: 0.812384
[913]	valid_0's auc: 0.812387
[914]	valid_0's auc: 0.812369
[915]	valid_0's auc: 0.812382
[916]	valid_0's auc: 0.812384
[917]	valid_0's auc: 0.812398
[918]	valid_0's auc: 0.812418
[919]	valid_0's auc: 0.812416
[920]	valid_0's auc: 0.812419
[921]	valid_0'

[1188]	valid_0's auc: 0.813376
[1189]	valid_0's auc: 0.81338
[1190]	valid_0's auc: 0.813369
[1191]	valid_0's auc: 0.813382
[1192]	valid_0's auc: 0.813378
[1193]	valid_0's auc: 0.813393
[1194]	valid_0's auc: 0.813403
[1195]	valid_0's auc: 0.813387
[1196]	valid_0's auc: 0.813373
[1197]	valid_0's auc: 0.813387
[1198]	valid_0's auc: 0.813396
[1199]	valid_0's auc: 0.813403
[1200]	valid_0's auc: 0.813413
[1201]	valid_0's auc: 0.813413
[1202]	valid_0's auc: 0.813426
[1203]	valid_0's auc: 0.813405
[1204]	valid_0's auc: 0.813412
[1205]	valid_0's auc: 0.813415
[1206]	valid_0's auc: 0.813426
[1207]	valid_0's auc: 0.81345
[1208]	valid_0's auc: 0.813461
[1209]	valid_0's auc: 0.813463
[1210]	valid_0's auc: 0.813485
[1211]	valid_0's auc: 0.813476
[1212]	valid_0's auc: 0.81348
[1213]	valid_0's auc: 0.813459
[1214]	valid_0's auc: 0.813492
[1215]	valid_0's auc: 0.813491
[1216]	valid_0's auc: 0.81349
[1217]	valid_0's auc: 0.813512
[1218]	valid_0's auc: 0.813521
[1219]	valid_0's auc: 0.81352
[1220]	valid_

[1503]	valid_0's auc: 0.814093
[1504]	valid_0's auc: 0.814108
[1505]	valid_0's auc: 0.814107
[1506]	valid_0's auc: 0.814102
[1507]	valid_0's auc: 0.814097
[1508]	valid_0's auc: 0.814068
[1509]	valid_0's auc: 0.814072
[1510]	valid_0's auc: 0.814072
[1511]	valid_0's auc: 0.814085
[1512]	valid_0's auc: 0.814112
[1513]	valid_0's auc: 0.81412
[1514]	valid_0's auc: 0.814125
[1515]	valid_0's auc: 0.814135
[1516]	valid_0's auc: 0.814164
[1517]	valid_0's auc: 0.814168
[1518]	valid_0's auc: 0.814162
[1519]	valid_0's auc: 0.814143
[1520]	valid_0's auc: 0.814153
[1521]	valid_0's auc: 0.814153
[1522]	valid_0's auc: 0.814168
[1523]	valid_0's auc: 0.814167
[1524]	valid_0's auc: 0.814163
[1525]	valid_0's auc: 0.814159
[1526]	valid_0's auc: 0.814152
[1527]	valid_0's auc: 0.814166
[1528]	valid_0's auc: 0.814188
[1529]	valid_0's auc: 0.814195
[1530]	valid_0's auc: 0.814188
[1531]	valid_0's auc: 0.814183
[1532]	valid_0's auc: 0.814181
[1533]	valid_0's auc: 0.814202
[1534]	valid_0's auc: 0.814182
[1535]	va

[1807]	valid_0's auc: 0.814463
[1808]	valid_0's auc: 0.814449
[1809]	valid_0's auc: 0.814453
[1810]	valid_0's auc: 0.814456
[1811]	valid_0's auc: 0.814462
[1812]	valid_0's auc: 0.814454
[1813]	valid_0's auc: 0.814437
[1814]	valid_0's auc: 0.814446
[1815]	valid_0's auc: 0.81446
[1816]	valid_0's auc: 0.814444
[1817]	valid_0's auc: 0.814429
[1818]	valid_0's auc: 0.814424
[1819]	valid_0's auc: 0.814434
[1820]	valid_0's auc: 0.81443
[1821]	valid_0's auc: 0.814432
[1822]	valid_0's auc: 0.814452
[1823]	valid_0's auc: 0.814451
[1824]	valid_0's auc: 0.814453
[1825]	valid_0's auc: 0.814473
[1826]	valid_0's auc: 0.814466
[1827]	valid_0's auc: 0.814461
[1828]	valid_0's auc: 0.814476
[1829]	valid_0's auc: 0.814484
[1830]	valid_0's auc: 0.814475
[1831]	valid_0's auc: 0.814475
[1832]	valid_0's auc: 0.814469
[1833]	valid_0's auc: 0.814465
[1834]	valid_0's auc: 0.814463
[1835]	valid_0's auc: 0.814468
[1836]	valid_0's auc: 0.81448
[1837]	valid_0's auc: 0.814495
[1838]	valid_0's auc: 0.814495
[1839]	vali

[96]	valid_0's auc: 0.801216
[97]	valid_0's auc: 0.801139
[98]	valid_0's auc: 0.801273
[99]	valid_0's auc: 0.801323
[100]	valid_0's auc: 0.801405
[101]	valid_0's auc: 0.801491
[102]	valid_0's auc: 0.801524
[103]	valid_0's auc: 0.801559
[104]	valid_0's auc: 0.801652
[105]	valid_0's auc: 0.801635
[106]	valid_0's auc: 0.801708
[107]	valid_0's auc: 0.801718
[108]	valid_0's auc: 0.801831
[109]	valid_0's auc: 0.801828
[110]	valid_0's auc: 0.80184
[111]	valid_0's auc: 0.801936
[112]	valid_0's auc: 0.80199
[113]	valid_0's auc: 0.802
[114]	valid_0's auc: 0.801993
[115]	valid_0's auc: 0.802033
[116]	valid_0's auc: 0.802062
[117]	valid_0's auc: 0.802048
[118]	valid_0's auc: 0.802081
[119]	valid_0's auc: 0.802112
[120]	valid_0's auc: 0.80219
[121]	valid_0's auc: 0.802265
[122]	valid_0's auc: 0.802312
[123]	valid_0's auc: 0.802409
[124]	valid_0's auc: 0.802458
[125]	valid_0's auc: 0.802531
[126]	valid_0's auc: 0.802582
[127]	valid_0's auc: 0.802628
[128]	valid_0's auc: 0.802653
[129]	valid_0's auc:

[391]	valid_0's auc: 0.810651
[392]	valid_0's auc: 0.810653
[393]	valid_0's auc: 0.810656
[394]	valid_0's auc: 0.810685
[395]	valid_0's auc: 0.81067
[396]	valid_0's auc: 0.810727
[397]	valid_0's auc: 0.810714
[398]	valid_0's auc: 0.810735
[399]	valid_0's auc: 0.810778
[400]	valid_0's auc: 0.810783
[401]	valid_0's auc: 0.8108
[402]	valid_0's auc: 0.81082
[403]	valid_0's auc: 0.810817
[404]	valid_0's auc: 0.810831
[405]	valid_0's auc: 0.810825
[406]	valid_0's auc: 0.810846
[407]	valid_0's auc: 0.810862
[408]	valid_0's auc: 0.810908
[409]	valid_0's auc: 0.810919
[410]	valid_0's auc: 0.810924
[411]	valid_0's auc: 0.810935
[412]	valid_0's auc: 0.810977
[413]	valid_0's auc: 0.810988
[414]	valid_0's auc: 0.810996
[415]	valid_0's auc: 0.810985
[416]	valid_0's auc: 0.810996
[417]	valid_0's auc: 0.811008
[418]	valid_0's auc: 0.811
[419]	valid_0's auc: 0.811036
[420]	valid_0's auc: 0.811083
[421]	valid_0's auc: 0.811103
[422]	valid_0's auc: 0.811104
[423]	valid_0's auc: 0.81111
[424]	valid_0's au

[719]	valid_0's auc: 0.813465
[720]	valid_0's auc: 0.813476
[721]	valid_0's auc: 0.813503
[722]	valid_0's auc: 0.813511
[723]	valid_0's auc: 0.813519
[724]	valid_0's auc: 0.813552
[725]	valid_0's auc: 0.813531
[726]	valid_0's auc: 0.813537
[727]	valid_0's auc: 0.813541
[728]	valid_0's auc: 0.813539
[729]	valid_0's auc: 0.813545
[730]	valid_0's auc: 0.813545
[731]	valid_0's auc: 0.813562
[732]	valid_0's auc: 0.813541
[733]	valid_0's auc: 0.813534
[734]	valid_0's auc: 0.813531
[735]	valid_0's auc: 0.813542
[736]	valid_0's auc: 0.813542
[737]	valid_0's auc: 0.813544
[738]	valid_0's auc: 0.81357
[739]	valid_0's auc: 0.813588
[740]	valid_0's auc: 0.813587
[741]	valid_0's auc: 0.813588
[742]	valid_0's auc: 0.813553
[743]	valid_0's auc: 0.813547
[744]	valid_0's auc: 0.81357
[745]	valid_0's auc: 0.813578
[746]	valid_0's auc: 0.813589
[747]	valid_0's auc: 0.813571
[748]	valid_0's auc: 0.813595
[749]	valid_0's auc: 0.813601
[750]	valid_0's auc: 0.81357
[751]	valid_0's auc: 0.813596
[752]	valid_0

[1021]	valid_0's auc: 0.814752
[1022]	valid_0's auc: 0.814754
[1023]	valid_0's auc: 0.814762
[1024]	valid_0's auc: 0.814758
[1025]	valid_0's auc: 0.814785
[1026]	valid_0's auc: 0.814783
[1027]	valid_0's auc: 0.814772
[1028]	valid_0's auc: 0.814792
[1029]	valid_0's auc: 0.814779
[1030]	valid_0's auc: 0.814788
[1031]	valid_0's auc: 0.814779
[1032]	valid_0's auc: 0.814786
[1033]	valid_0's auc: 0.81477
[1034]	valid_0's auc: 0.814782
[1035]	valid_0's auc: 0.814772
[1036]	valid_0's auc: 0.814767
[1037]	valid_0's auc: 0.814757
[1038]	valid_0's auc: 0.814752
[1039]	valid_0's auc: 0.814741
[1040]	valid_0's auc: 0.81474
[1041]	valid_0's auc: 0.814749
[1042]	valid_0's auc: 0.814733
[1043]	valid_0's auc: 0.814755
[1044]	valid_0's auc: 0.814755
[1045]	valid_0's auc: 0.814758
[1046]	valid_0's auc: 0.814762
[1047]	valid_0's auc: 0.814763
[1048]	valid_0's auc: 0.814756
[1049]	valid_0's auc: 0.814742
[1050]	valid_0's auc: 0.814754
[1051]	valid_0's auc: 0.814738
[1052]	valid_0's auc: 0.814741
[1053]	val

[1290]	valid_0's auc: 0.815417
[1291]	valid_0's auc: 0.815421
[1292]	valid_0's auc: 0.815436
[1293]	valid_0's auc: 0.815437
[1294]	valid_0's auc: 0.81544
[1295]	valid_0's auc: 0.815434
[1296]	valid_0's auc: 0.815433
[1297]	valid_0's auc: 0.815452
[1298]	valid_0's auc: 0.815448
[1299]	valid_0's auc: 0.815451
[1300]	valid_0's auc: 0.815454
[1301]	valid_0's auc: 0.815437
[1302]	valid_0's auc: 0.815428
[1303]	valid_0's auc: 0.815447
[1304]	valid_0's auc: 0.815472
[1305]	valid_0's auc: 0.815474
[1306]	valid_0's auc: 0.815474
[1307]	valid_0's auc: 0.815462
[1308]	valid_0's auc: 0.815457
[1309]	valid_0's auc: 0.815479
[1310]	valid_0's auc: 0.815498
[1311]	valid_0's auc: 0.815483
[1312]	valid_0's auc: 0.815475
[1313]	valid_0's auc: 0.815468
[1314]	valid_0's auc: 0.815473
[1315]	valid_0's auc: 0.815466
[1316]	valid_0's auc: 0.815481
[1317]	valid_0's auc: 0.815494
[1318]	valid_0's auc: 0.815515
[1319]	valid_0's auc: 0.815517
[1320]	valid_0's auc: 0.81551
[1321]	valid_0's auc: 0.81552
[1322]	vali

[1606]	valid_0's auc: 0.815356
[1607]	valid_0's auc: 0.815337
[1608]	valid_0's auc: 0.815336
[1609]	valid_0's auc: 0.815339
[1610]	valid_0's auc: 0.81534
[1611]	valid_0's auc: 0.81535
[1612]	valid_0's auc: 0.815367
[1613]	valid_0's auc: 0.815365
[1614]	valid_0's auc: 0.815364
[1615]	valid_0's auc: 0.815347
[1616]	valid_0's auc: 0.815345
[1617]	valid_0's auc: 0.815343
[1618]	valid_0's auc: 0.815346
[1619]	valid_0's auc: 0.815348
[1620]	valid_0's auc: 0.815339
[1621]	valid_0's auc: 0.815337
[1622]	valid_0's auc: 0.81533
[1623]	valid_0's auc: 0.815327
[1624]	valid_0's auc: 0.815326
[1625]	valid_0's auc: 0.815321
[1626]	valid_0's auc: 0.815328
[1627]	valid_0's auc: 0.815324
[1628]	valid_0's auc: 0.815321
[1629]	valid_0's auc: 0.81533
[1630]	valid_0's auc: 0.815323
[1631]	valid_0's auc: 0.815316
[1632]	valid_0's auc: 0.815322
[1633]	valid_0's auc: 0.815298
[1634]	valid_0's auc: 0.815297
[1635]	valid_0's auc: 0.815285
[1636]	valid_0's auc: 0.815316
[1637]	valid_0's auc: 0.815302
[1638]	valid

[1910]	valid_0's auc: 0.815486
[1911]	valid_0's auc: 0.815486
[1912]	valid_0's auc: 0.815478
[1913]	valid_0's auc: 0.815465
[1914]	valid_0's auc: 0.815457
[1915]	valid_0's auc: 0.815471
[1916]	valid_0's auc: 0.815461
[1917]	valid_0's auc: 0.815473
[1918]	valid_0's auc: 0.815467
[1919]	valid_0's auc: 0.815467
[1920]	valid_0's auc: 0.81549
[1921]	valid_0's auc: 0.815474
[1922]	valid_0's auc: 0.815471
[1923]	valid_0's auc: 0.81545
[1924]	valid_0's auc: 0.815453
[1925]	valid_0's auc: 0.815461
[1926]	valid_0's auc: 0.815458
[1927]	valid_0's auc: 0.815468
[1928]	valid_0's auc: 0.815477
[1929]	valid_0's auc: 0.815479
[1930]	valid_0's auc: 0.815469
[1931]	valid_0's auc: 0.815465
[1932]	valid_0's auc: 0.815461
[1933]	valid_0's auc: 0.815472
[1934]	valid_0's auc: 0.815472
[1935]	valid_0's auc: 0.815479
[1936]	valid_0's auc: 0.815486
[1937]	valid_0's auc: 0.815479
[1938]	valid_0's auc: 0.815479
[1939]	valid_0's auc: 0.815474
[1940]	valid_0's auc: 0.815471
[1941]	valid_0's auc: 0.815469
[1942]	val

[205]	valid_0's auc: 0.80004
[206]	valid_0's auc: 0.800089
[207]	valid_0's auc: 0.800141
[208]	valid_0's auc: 0.80021
[209]	valid_0's auc: 0.800251
[210]	valid_0's auc: 0.800291
[211]	valid_0's auc: 0.800322
[212]	valid_0's auc: 0.800352
[213]	valid_0's auc: 0.800411
[214]	valid_0's auc: 0.800484
[215]	valid_0's auc: 0.80049
[216]	valid_0's auc: 0.800551
[217]	valid_0's auc: 0.800571
[218]	valid_0's auc: 0.800596
[219]	valid_0's auc: 0.800633
[220]	valid_0's auc: 0.800639
[221]	valid_0's auc: 0.800659
[222]	valid_0's auc: 0.800659
[223]	valid_0's auc: 0.800692
[224]	valid_0's auc: 0.800723
[225]	valid_0's auc: 0.80075
[226]	valid_0's auc: 0.800831
[227]	valid_0's auc: 0.800879
[228]	valid_0's auc: 0.800915
[229]	valid_0's auc: 0.800939
[230]	valid_0's auc: 0.800957
[231]	valid_0's auc: 0.801023
[232]	valid_0's auc: 0.801034
[233]	valid_0's auc: 0.801039
[234]	valid_0's auc: 0.801102
[235]	valid_0's auc: 0.801113
[236]	valid_0's auc: 0.801155
[237]	valid_0's auc: 0.801168
[238]	valid_0'

[505]	valid_0's auc: 0.807069
[506]	valid_0's auc: 0.807091
[507]	valid_0's auc: 0.807085
[508]	valid_0's auc: 0.807073
[509]	valid_0's auc: 0.807086
[510]	valid_0's auc: 0.807095
[511]	valid_0's auc: 0.80712
[512]	valid_0's auc: 0.807139
[513]	valid_0's auc: 0.807159
[514]	valid_0's auc: 0.807183
[515]	valid_0's auc: 0.807197
[516]	valid_0's auc: 0.807218
[517]	valid_0's auc: 0.807224
[518]	valid_0's auc: 0.807248
[519]	valid_0's auc: 0.807276
[520]	valid_0's auc: 0.807289
[521]	valid_0's auc: 0.807309
[522]	valid_0's auc: 0.807332
[523]	valid_0's auc: 0.807364
[524]	valid_0's auc: 0.807406
[525]	valid_0's auc: 0.807413
[526]	valid_0's auc: 0.807403
[527]	valid_0's auc: 0.807399
[528]	valid_0's auc: 0.807428
[529]	valid_0's auc: 0.807432
[530]	valid_0's auc: 0.807464
[531]	valid_0's auc: 0.80744
[532]	valid_0's auc: 0.807437
[533]	valid_0's auc: 0.807458
[534]	valid_0's auc: 0.807475
[535]	valid_0's auc: 0.807481
[536]	valid_0's auc: 0.807509
[537]	valid_0's auc: 0.807518
[538]	valid_

[826]	valid_0's auc: 0.809799
[827]	valid_0's auc: 0.809814
[828]	valid_0's auc: 0.809828
[829]	valid_0's auc: 0.809843
[830]	valid_0's auc: 0.809849
[831]	valid_0's auc: 0.80985
[832]	valid_0's auc: 0.809857
[833]	valid_0's auc: 0.809863
[834]	valid_0's auc: 0.809875
[835]	valid_0's auc: 0.809876
[836]	valid_0's auc: 0.809865
[837]	valid_0's auc: 0.809861
[838]	valid_0's auc: 0.809901
[839]	valid_0's auc: 0.809924
[840]	valid_0's auc: 0.809921
[841]	valid_0's auc: 0.80993
[842]	valid_0's auc: 0.809921
[843]	valid_0's auc: 0.809913
[844]	valid_0's auc: 0.809907
[845]	valid_0's auc: 0.809911
[846]	valid_0's auc: 0.809904
[847]	valid_0's auc: 0.809935
[848]	valid_0's auc: 0.809934
[849]	valid_0's auc: 0.809942
[850]	valid_0's auc: 0.809965
[851]	valid_0's auc: 0.809977
[852]	valid_0's auc: 0.809952
[853]	valid_0's auc: 0.809943
[854]	valid_0's auc: 0.809952
[855]	valid_0's auc: 0.809949
[856]	valid_0's auc: 0.809945
[857]	valid_0's auc: 0.809938
[858]	valid_0's auc: 0.80995
[859]	valid_0

[1164]	valid_0's auc: 0.810903
[1165]	valid_0's auc: 0.810894
[1166]	valid_0's auc: 0.810896
[1167]	valid_0's auc: 0.810895
[1168]	valid_0's auc: 0.810887
[1169]	valid_0's auc: 0.810881
[1170]	valid_0's auc: 0.810902
[1171]	valid_0's auc: 0.810902
[1172]	valid_0's auc: 0.8109
[1173]	valid_0's auc: 0.810904
[1174]	valid_0's auc: 0.8109
[1175]	valid_0's auc: 0.810898
[1176]	valid_0's auc: 0.810897
[1177]	valid_0's auc: 0.810903
[1178]	valid_0's auc: 0.810915
[1179]	valid_0's auc: 0.81091
[1180]	valid_0's auc: 0.810911
[1181]	valid_0's auc: 0.810914
[1182]	valid_0's auc: 0.810906
[1183]	valid_0's auc: 0.810913
[1184]	valid_0's auc: 0.810923
[1185]	valid_0's auc: 0.810931
[1186]	valid_0's auc: 0.810921
[1187]	valid_0's auc: 0.810908
[1188]	valid_0's auc: 0.810919
[1189]	valid_0's auc: 0.810894
[1190]	valid_0's auc: 0.810899
[1191]	valid_0's auc: 0.810881
[1192]	valid_0's auc: 0.810884
[1193]	valid_0's auc: 0.810888
[1194]	valid_0's auc: 0.810877
[1195]	valid_0's auc: 0.810894
[1196]	valid_

[1431]	valid_0's auc: 0.811555
[1432]	valid_0's auc: 0.811539
[1433]	valid_0's auc: 0.811535
[1434]	valid_0's auc: 0.811545
[1435]	valid_0's auc: 0.811551
[1436]	valid_0's auc: 0.811543
[1437]	valid_0's auc: 0.811531
[1438]	valid_0's auc: 0.811535
[1439]	valid_0's auc: 0.811543
[1440]	valid_0's auc: 0.811522
[1441]	valid_0's auc: 0.811511
[1442]	valid_0's auc: 0.811514
[1443]	valid_0's auc: 0.81152
[1444]	valid_0's auc: 0.811495
[1445]	valid_0's auc: 0.811491
[1446]	valid_0's auc: 0.811496
[1447]	valid_0's auc: 0.811502
[1448]	valid_0's auc: 0.811502
[1449]	valid_0's auc: 0.811496
[1450]	valid_0's auc: 0.811517
[1451]	valid_0's auc: 0.811503
[1452]	valid_0's auc: 0.811501
[1453]	valid_0's auc: 0.811508
[1454]	valid_0's auc: 0.811524
[1455]	valid_0's auc: 0.811516
[1456]	valid_0's auc: 0.811496
[1457]	valid_0's auc: 0.811493
[1458]	valid_0's auc: 0.811481
[1459]	valid_0's auc: 0.811503
[1460]	valid_0's auc: 0.811486
[1461]	valid_0's auc: 0.811497
[1462]	valid_0's auc: 0.811509
[1463]	va

[1746]	valid_0's auc: 0.811852
[1747]	valid_0's auc: 0.811856
[1748]	valid_0's auc: 0.811853
[1749]	valid_0's auc: 0.811843
[1750]	valid_0's auc: 0.811848
[1751]	valid_0's auc: 0.811847
[1752]	valid_0's auc: 0.811861
[1753]	valid_0's auc: 0.811843
[1754]	valid_0's auc: 0.811848
[1755]	valid_0's auc: 0.811852
[1756]	valid_0's auc: 0.811885
[1757]	valid_0's auc: 0.811895
[1758]	valid_0's auc: 0.811883
[1759]	valid_0's auc: 0.811899
[1760]	valid_0's auc: 0.811893
[1761]	valid_0's auc: 0.811899
[1762]	valid_0's auc: 0.811891
[1763]	valid_0's auc: 0.811875
[1764]	valid_0's auc: 0.81187
[1765]	valid_0's auc: 0.811857
[1766]	valid_0's auc: 0.811867
[1767]	valid_0's auc: 0.811859
[1768]	valid_0's auc: 0.811879
[1769]	valid_0's auc: 0.811879
[1770]	valid_0's auc: 0.811892
[1771]	valid_0's auc: 0.81188
[1772]	valid_0's auc: 0.811877
[1773]	valid_0's auc: 0.81188
[1774]	valid_0's auc: 0.811898
[1775]	valid_0's auc: 0.81189
[1776]	valid_0's auc: 0.811888
[1777]	valid_0's auc: 0.811875
[1778]	valid

[40]	valid_0's auc: 0.7971
[41]	valid_0's auc: 0.797422
[42]	valid_0's auc: 0.797473
[43]	valid_0's auc: 0.797966
[44]	valid_0's auc: 0.797948
[45]	valid_0's auc: 0.797928
[46]	valid_0's auc: 0.797993
[47]	valid_0's auc: 0.797954
[48]	valid_0's auc: 0.798011
[49]	valid_0's auc: 0.797976
[50]	valid_0's auc: 0.797946
[51]	valid_0's auc: 0.798021
[52]	valid_0's auc: 0.798258
[53]	valid_0's auc: 0.798511
[54]	valid_0's auc: 0.798503
[55]	valid_0's auc: 0.798526
[56]	valid_0's auc: 0.798542
[57]	valid_0's auc: 0.798525
[58]	valid_0's auc: 0.798559
[59]	valid_0's auc: 0.798582
[60]	valid_0's auc: 0.798606
[61]	valid_0's auc: 0.798634
[62]	valid_0's auc: 0.7986
[63]	valid_0's auc: 0.798785
[64]	valid_0's auc: 0.798773
[65]	valid_0's auc: 0.798869
[66]	valid_0's auc: 0.798994
[67]	valid_0's auc: 0.799189
[68]	valid_0's auc: 0.799292
[69]	valid_0's auc: 0.799315
[70]	valid_0's auc: 0.799454
[71]	valid_0's auc: 0.799489
[72]	valid_0's auc: 0.799602
[73]	valid_0's auc: 0.799723
[74]	valid_0's auc

[353]	valid_0's auc: 0.809286
[354]	valid_0's auc: 0.809329
[355]	valid_0's auc: 0.809339
[356]	valid_0's auc: 0.80936
[357]	valid_0's auc: 0.809388
[358]	valid_0's auc: 0.809432
[359]	valid_0's auc: 0.80948
[360]	valid_0's auc: 0.809465
[361]	valid_0's auc: 0.809463
[362]	valid_0's auc: 0.809501
[363]	valid_0's auc: 0.809542
[364]	valid_0's auc: 0.809565
[365]	valid_0's auc: 0.809571
[366]	valid_0's auc: 0.809582
[367]	valid_0's auc: 0.809619
[368]	valid_0's auc: 0.809629
[369]	valid_0's auc: 0.809646
[370]	valid_0's auc: 0.809674
[371]	valid_0's auc: 0.809679
[372]	valid_0's auc: 0.809691
[373]	valid_0's auc: 0.809727
[374]	valid_0's auc: 0.80975
[375]	valid_0's auc: 0.809758
[376]	valid_0's auc: 0.809794
[377]	valid_0's auc: 0.809798
[378]	valid_0's auc: 0.809833
[379]	valid_0's auc: 0.809888
[380]	valid_0's auc: 0.809896
[381]	valid_0's auc: 0.809953
[382]	valid_0's auc: 0.809972
[383]	valid_0's auc: 0.810005
[384]	valid_0's auc: 0.810022
[385]	valid_0's auc: 0.810076
[386]	valid_0

[632]	valid_0's auc: 0.813252
[633]	valid_0's auc: 0.813247
[634]	valid_0's auc: 0.813278
[635]	valid_0's auc: 0.813297
[636]	valid_0's auc: 0.813325
[637]	valid_0's auc: 0.813335
[638]	valid_0's auc: 0.813327
[639]	valid_0's auc: 0.813343
[640]	valid_0's auc: 0.813356
[641]	valid_0's auc: 0.813366
[642]	valid_0's auc: 0.813364
[643]	valid_0's auc: 0.81337
[644]	valid_0's auc: 0.813377
[645]	valid_0's auc: 0.813362
[646]	valid_0's auc: 0.813364
[647]	valid_0's auc: 0.813387
[648]	valid_0's auc: 0.813371
[649]	valid_0's auc: 0.813409
[650]	valid_0's auc: 0.813382
[651]	valid_0's auc: 0.813421
[652]	valid_0's auc: 0.813439
[653]	valid_0's auc: 0.813464
[654]	valid_0's auc: 0.813468
[655]	valid_0's auc: 0.813466
[656]	valid_0's auc: 0.813463
[657]	valid_0's auc: 0.813461
[658]	valid_0's auc: 0.81347
[659]	valid_0's auc: 0.813462
[660]	valid_0's auc: 0.813462
[661]	valid_0's auc: 0.813465
[662]	valid_0's auc: 0.813474
[663]	valid_0's auc: 0.813502
[664]	valid_0's auc: 0.813516
[665]	valid_

[941]	valid_0's auc: 0.814602
[942]	valid_0's auc: 0.814577
[943]	valid_0's auc: 0.814561
[944]	valid_0's auc: 0.814587
[945]	valid_0's auc: 0.81458
[946]	valid_0's auc: 0.814568
[947]	valid_0's auc: 0.814554
[948]	valid_0's auc: 0.814564
[949]	valid_0's auc: 0.814563
[950]	valid_0's auc: 0.814577
[951]	valid_0's auc: 0.81458
[952]	valid_0's auc: 0.814572
[953]	valid_0's auc: 0.814564
[954]	valid_0's auc: 0.814566
[955]	valid_0's auc: 0.814581
[956]	valid_0's auc: 0.814599
[957]	valid_0's auc: 0.814594
[958]	valid_0's auc: 0.814614
[959]	valid_0's auc: 0.814636
[960]	valid_0's auc: 0.814633
[961]	valid_0's auc: 0.814644
[962]	valid_0's auc: 0.814652
[963]	valid_0's auc: 0.814676
[964]	valid_0's auc: 0.814666
[965]	valid_0's auc: 0.814664
[966]	valid_0's auc: 0.814672
[967]	valid_0's auc: 0.814683
[968]	valid_0's auc: 0.814703
[969]	valid_0's auc: 0.814716
[970]	valid_0's auc: 0.814711
[971]	valid_0's auc: 0.814709
[972]	valid_0's auc: 0.814702
[973]	valid_0's auc: 0.8147
[974]	valid_0'

[1259]	valid_0's auc: 0.815711
[1260]	valid_0's auc: 0.815732
[1261]	valid_0's auc: 0.81573
[1262]	valid_0's auc: 0.815717
[1263]	valid_0's auc: 0.815728
[1264]	valid_0's auc: 0.815712
[1265]	valid_0's auc: 0.815712
[1266]	valid_0's auc: 0.815722
[1267]	valid_0's auc: 0.815715
[1268]	valid_0's auc: 0.815735
[1269]	valid_0's auc: 0.815763
[1270]	valid_0's auc: 0.815779
[1271]	valid_0's auc: 0.81579
[1272]	valid_0's auc: 0.815799
[1273]	valid_0's auc: 0.815803
[1274]	valid_0's auc: 0.815796
[1275]	valid_0's auc: 0.815785
[1276]	valid_0's auc: 0.815778
[1277]	valid_0's auc: 0.815784
[1278]	valid_0's auc: 0.815794
[1279]	valid_0's auc: 0.815793
[1280]	valid_0's auc: 0.815803
[1281]	valid_0's auc: 0.815816
[1282]	valid_0's auc: 0.815803
[1283]	valid_0's auc: 0.815812
[1284]	valid_0's auc: 0.815829
[1285]	valid_0's auc: 0.815809
[1286]	valid_0's auc: 0.815814
[1287]	valid_0's auc: 0.815805
[1288]	valid_0's auc: 0.8158
[1289]	valid_0's auc: 0.815822
[1290]	valid_0's auc: 0.815806
[1291]	valid

[1527]	valid_0's auc: 0.81579
[1528]	valid_0's auc: 0.815807
[1529]	valid_0's auc: 0.815809
[1530]	valid_0's auc: 0.815811
[1531]	valid_0's auc: 0.815798
[1532]	valid_0's auc: 0.815798
[1533]	valid_0's auc: 0.815796
[1534]	valid_0's auc: 0.815798
[1535]	valid_0's auc: 0.815799
[1536]	valid_0's auc: 0.815796
[1537]	valid_0's auc: 0.815785
[1538]	valid_0's auc: 0.815788
[1539]	valid_0's auc: 0.815785
[1540]	valid_0's auc: 0.815786
[1541]	valid_0's auc: 0.815775
[1542]	valid_0's auc: 0.81578
[1543]	valid_0's auc: 0.815792
[1544]	valid_0's auc: 0.815796
[1545]	valid_0's auc: 0.815797
[1546]	valid_0's auc: 0.815795
[1547]	valid_0's auc: 0.815816
[1548]	valid_0's auc: 0.815812
[1549]	valid_0's auc: 0.815826
[1550]	valid_0's auc: 0.815838
[1551]	valid_0's auc: 0.815831
[1552]	valid_0's auc: 0.815832
[1553]	valid_0's auc: 0.815838
[1554]	valid_0's auc: 0.815831
[1555]	valid_0's auc: 0.815844
[1556]	valid_0's auc: 0.815838
[1557]	valid_0's auc: 0.815835
[1558]	valid_0's auc: 0.815833
[1559]	val

[1803]	valid_0's auc: 0.81585
[1804]	valid_0's auc: 0.815856
[1805]	valid_0's auc: 0.81585
[1806]	valid_0's auc: 0.815827
[1807]	valid_0's auc: 0.815824
[1808]	valid_0's auc: 0.815829
[1809]	valid_0's auc: 0.815844
[1810]	valid_0's auc: 0.81583
[1811]	valid_0's auc: 0.81583
[1812]	valid_0's auc: 0.815836
[1813]	valid_0's auc: 0.815837
[1814]	valid_0's auc: 0.815841
[1815]	valid_0's auc: 0.815839
[1816]	valid_0's auc: 0.815817
[1817]	valid_0's auc: 0.815825
[1818]	valid_0's auc: 0.81582
[1819]	valid_0's auc: 0.815836
[1820]	valid_0's auc: 0.815841
[1821]	valid_0's auc: 0.815842
[1822]	valid_0's auc: 0.81584
[1823]	valid_0's auc: 0.815842
[1824]	valid_0's auc: 0.815809
[1825]	valid_0's auc: 0.815809
[1826]	valid_0's auc: 0.81581
[1827]	valid_0's auc: 0.815812
[1828]	valid_0's auc: 0.815824
[1829]	valid_0's auc: 0.815841
[1830]	valid_0's auc: 0.815845
[1831]	valid_0's auc: 0.815847
[1832]	valid_0's auc: 0.815835
[1833]	valid_0's auc: 0.815835
[1834]	valid_0's auc: 0.815832
[1835]	valid_0'

[88]	valid_0's auc: 0.796316
[89]	valid_0's auc: 0.796252
[90]	valid_0's auc: 0.796376
[91]	valid_0's auc: 0.796451
[92]	valid_0's auc: 0.796517
[93]	valid_0's auc: 0.796654
[94]	valid_0's auc: 0.796597
[95]	valid_0's auc: 0.796563
[96]	valid_0's auc: 0.796625
[97]	valid_0's auc: 0.796634
[98]	valid_0's auc: 0.796704
[99]	valid_0's auc: 0.796746
[100]	valid_0's auc: 0.796842
[101]	valid_0's auc: 0.796809
[102]	valid_0's auc: 0.796965
[103]	valid_0's auc: 0.796889
[104]	valid_0's auc: 0.796958
[105]	valid_0's auc: 0.796969
[106]	valid_0's auc: 0.796964
[107]	valid_0's auc: 0.796963
[108]	valid_0's auc: 0.797026
[109]	valid_0's auc: 0.796991
[110]	valid_0's auc: 0.797149
[111]	valid_0's auc: 0.79715
[112]	valid_0's auc: 0.797198
[113]	valid_0's auc: 0.79721
[114]	valid_0's auc: 0.79728
[115]	valid_0's auc: 0.797311
[116]	valid_0's auc: 0.797365
[117]	valid_0's auc: 0.79748
[118]	valid_0's auc: 0.797478
[119]	valid_0's auc: 0.797507
[120]	valid_0's auc: 0.797566
[121]	valid_0's auc: 0.797

[373]	valid_0's auc: 0.80669
[374]	valid_0's auc: 0.806702
[375]	valid_0's auc: 0.806718
[376]	valid_0's auc: 0.806737
[377]	valid_0's auc: 0.806799
[378]	valid_0's auc: 0.806802
[379]	valid_0's auc: 0.806858
[380]	valid_0's auc: 0.806894
[381]	valid_0's auc: 0.806904
[382]	valid_0's auc: 0.806902
[383]	valid_0's auc: 0.806904
[384]	valid_0's auc: 0.806926
[385]	valid_0's auc: 0.806915
[386]	valid_0's auc: 0.806936
[387]	valid_0's auc: 0.80697
[388]	valid_0's auc: 0.806979
[389]	valid_0's auc: 0.80696
[390]	valid_0's auc: 0.806983
[391]	valid_0's auc: 0.807004
[392]	valid_0's auc: 0.807043
[393]	valid_0's auc: 0.807033
[394]	valid_0's auc: 0.80707
[395]	valid_0's auc: 0.80711
[396]	valid_0's auc: 0.80712
[397]	valid_0's auc: 0.807144
[398]	valid_0's auc: 0.807128
[399]	valid_0's auc: 0.807161
[400]	valid_0's auc: 0.807203
[401]	valid_0's auc: 0.807195
[402]	valid_0's auc: 0.807202
[403]	valid_0's auc: 0.807198
[404]	valid_0's auc: 0.807209
[405]	valid_0's auc: 0.807251
[406]	valid_0's 

[680]	valid_0's auc: 0.809737
[681]	valid_0's auc: 0.809749
[682]	valid_0's auc: 0.809739
[683]	valid_0's auc: 0.809724
[684]	valid_0's auc: 0.80973
[685]	valid_0's auc: 0.809731
[686]	valid_0's auc: 0.809742
[687]	valid_0's auc: 0.809745
[688]	valid_0's auc: 0.809725
[689]	valid_0's auc: 0.80974
[690]	valid_0's auc: 0.809773
[691]	valid_0's auc: 0.809781
[692]	valid_0's auc: 0.809771
[693]	valid_0's auc: 0.80975
[694]	valid_0's auc: 0.809758
[695]	valid_0's auc: 0.80975
[696]	valid_0's auc: 0.80976
[697]	valid_0's auc: 0.809743
[698]	valid_0's auc: 0.809751
[699]	valid_0's auc: 0.809751
[700]	valid_0's auc: 0.809761
[701]	valid_0's auc: 0.80976
[702]	valid_0's auc: 0.809746
[703]	valid_0's auc: 0.809724
[704]	valid_0's auc: 0.809735
[705]	valid_0's auc: 0.809726
[706]	valid_0's auc: 0.80973
[707]	valid_0's auc: 0.809746
[708]	valid_0's auc: 0.809747
[709]	valid_0's auc: 0.809734
[710]	valid_0's auc: 0.809751
[711]	valid_0's auc: 0.809769
[712]	valid_0's auc: 0.809787
[713]	valid_0's a

[1001]	valid_0's auc: 0.811317
[1002]	valid_0's auc: 0.81132
[1003]	valid_0's auc: 0.811312
[1004]	valid_0's auc: 0.811309
[1005]	valid_0's auc: 0.811325
[1006]	valid_0's auc: 0.811342
[1007]	valid_0's auc: 0.811324
[1008]	valid_0's auc: 0.811309
[1009]	valid_0's auc: 0.811319
[1010]	valid_0's auc: 0.811332
[1011]	valid_0's auc: 0.811348
[1012]	valid_0's auc: 0.811344
[1013]	valid_0's auc: 0.811367
[1014]	valid_0's auc: 0.811366
[1015]	valid_0's auc: 0.811346
[1016]	valid_0's auc: 0.81135
[1017]	valid_0's auc: 0.811347
[1018]	valid_0's auc: 0.811369
[1019]	valid_0's auc: 0.811361
[1020]	valid_0's auc: 0.811349
[1021]	valid_0's auc: 0.81134
[1022]	valid_0's auc: 0.811336
[1023]	valid_0's auc: 0.811338
[1024]	valid_0's auc: 0.811331
[1025]	valid_0's auc: 0.811322
[1026]	valid_0's auc: 0.811329
[1027]	valid_0's auc: 0.811337
[1028]	valid_0's auc: 0.811337
[1029]	valid_0's auc: 0.81134
[1030]	valid_0's auc: 0.811366
[1031]	valid_0's auc: 0.811367
[1032]	valid_0's auc: 0.811362
[1033]	valid

[1321]	valid_0's auc: 0.811861
[1322]	valid_0's auc: 0.81187
[1323]	valid_0's auc: 0.811871
[1324]	valid_0's auc: 0.81189
[1325]	valid_0's auc: 0.811903
[1326]	valid_0's auc: 0.811901
[1327]	valid_0's auc: 0.811898
[1328]	valid_0's auc: 0.811877
[1329]	valid_0's auc: 0.811892
[1330]	valid_0's auc: 0.811899
[1331]	valid_0's auc: 0.81188
[1332]	valid_0's auc: 0.811883
[1333]	valid_0's auc: 0.811896
[1334]	valid_0's auc: 0.811919
[1335]	valid_0's auc: 0.811927
[1336]	valid_0's auc: 0.811932
[1337]	valid_0's auc: 0.811925
[1338]	valid_0's auc: 0.811927
[1339]	valid_0's auc: 0.811935
[1340]	valid_0's auc: 0.811944
[1341]	valid_0's auc: 0.811947
[1342]	valid_0's auc: 0.811953
[1343]	valid_0's auc: 0.811946
[1344]	valid_0's auc: 0.811952
[1345]	valid_0's auc: 0.811943
[1346]	valid_0's auc: 0.811955
[1347]	valid_0's auc: 0.811958
[1348]	valid_0's auc: 0.81197
[1349]	valid_0's auc: 0.811967
[1350]	valid_0's auc: 0.811966
[1351]	valid_0's auc: 0.81199
[1352]	valid_0's auc: 0.812009
[1353]	valid_

[1642]	valid_0's auc: 0.812191
[1643]	valid_0's auc: 0.812204
[1644]	valid_0's auc: 0.812206
[1645]	valid_0's auc: 0.812218
[1646]	valid_0's auc: 0.812195
[1647]	valid_0's auc: 0.812199
[1648]	valid_0's auc: 0.812193
[1649]	valid_0's auc: 0.812222
[1650]	valid_0's auc: 0.812244
[1651]	valid_0's auc: 0.812241
[1652]	valid_0's auc: 0.812241
[1653]	valid_0's auc: 0.812241
[1654]	valid_0's auc: 0.812241
[1655]	valid_0's auc: 0.812241
[1656]	valid_0's auc: 0.81226
[1657]	valid_0's auc: 0.812245
[1658]	valid_0's auc: 0.812243
[1659]	valid_0's auc: 0.812249
[1660]	valid_0's auc: 0.812259
[1661]	valid_0's auc: 0.812264
[1662]	valid_0's auc: 0.812278
[1663]	valid_0's auc: 0.812286
[1664]	valid_0's auc: 0.812259
[1665]	valid_0's auc: 0.812264
[1666]	valid_0's auc: 0.812252
[1667]	valid_0's auc: 0.812229
[1668]	valid_0's auc: 0.81225
[1669]	valid_0's auc: 0.812246
[1670]	valid_0's auc: 0.812247
[1671]	valid_0's auc: 0.81224
[1672]	valid_0's auc: 0.812233
[1673]	valid_0's auc: 0.812235
[1674]	vali

[1960]	valid_0's auc: 0.812256
[1961]	valid_0's auc: 0.812259
[1962]	valid_0's auc: 0.812273
[1963]	valid_0's auc: 0.812267
[1964]	valid_0's auc: 0.812285
[1965]	valid_0's auc: 0.812287
[1966]	valid_0's auc: 0.812281
[1967]	valid_0's auc: 0.812307
[1968]	valid_0's auc: 0.81229
[1969]	valid_0's auc: 0.81229
[1970]	valid_0's auc: 0.812289
[1971]	valid_0's auc: 0.812292
[1972]	valid_0's auc: 0.812278
[1973]	valid_0's auc: 0.812281
[1974]	valid_0's auc: 0.812296
[1975]	valid_0's auc: 0.812291
[1976]	valid_0's auc: 0.812286
[1977]	valid_0's auc: 0.812275
[1978]	valid_0's auc: 0.812278
[1979]	valid_0's auc: 0.812273
[1980]	valid_0's auc: 0.812274
[1981]	valid_0's auc: 0.812285
[1982]	valid_0's auc: 0.812302
[1983]	valid_0's auc: 0.812293
[1984]	valid_0's auc: 0.812322
[1985]	valid_0's auc: 0.812306
[1986]	valid_0's auc: 0.812316
[1987]	valid_0's auc: 0.812321
[1988]	valid_0's auc: 0.812322
[1989]	valid_0's auc: 0.812343
[1990]	valid_0's auc: 0.812329
[1991]	valid_0's auc: 0.812327
[1992]	val

In [104]:
# lgb结果
new_result = (prob[0]+prob[1]+prob[2]+prob[3]+prob[4])/5
final_result = []
for i in new_result:
    if i[0]>0.5:
        final_result.append(0)
    else:
        final_result.append(1)
        
res = pd.DataFrame()
res['prediction'] = final_result
res['index'] = res.index
res.to_csv('./result/paws-x-zh.tsv', index=False, sep='\t')
res

Unnamed: 0,prediction,index
0,1,0
1,0,1
2,0,2
3,0,3
4,0,4
...,...,...
1995,0,1995
1996,0,1996
1997,0,1997
1998,0,1998
