In [1]:
import pickle
import glob
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
import lightgbm as lgb

# import time
# print('sleeping')
# time.sleep(24400)
# print('sleep done =======================')

# load feats
train_x,test_x = [],[]
for feat in sorted(glob.glob('../features/*.pkl')):
    if '3_feat' in feat or 'tfidf' in feat:
        continue
    print('file path',feat)
    a,b = pickle.load(open(feat,'rb'))
    print(a.shape,b.shape)
    train_x.append(a)
    test_x.append(b)
train_x = np.hstack(train_x)
test_x = np.hstack(test_x)
print(train_x.shape)
    
# load y
train = pd.read_csv("../input/train.csv")
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
train_y = train[list_classes].values.astype('int')
print(train_x.shape)

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score
def simple_ens(model_name,k=3,rnd=233,lr=0.05,feature_fraction=0.9,bagging_fraction=0.9,
               bag_frec=3,met='binary_logloss',max_d=3):
    kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=rnd)
    test_pred = np.zeros((153164,6))
    cls_auc_res = [0,0,0,0,0,0]
    all_train_loss_l,all_val_loss_l = 0,0
    all_train_auc_l,all_val_auc_l = 0,0
    
    for i in range(6):
        val_loss_l,train_loss_l = 0,0
        val_auc_l,train_auc_l = 0,0
        fold_cnt = 0
        for train_index, test_index in kf.split(train_x,train_y[:,i]):
            # x,y
            curr_x,curr_y = train_x[train_index],train_y[train_index]
            hold_out_x,hold_out_y = train_x[test_index],train_y[test_index]
            d_test = test_x

            # share params
            params = {
                    'application': 'binary',
                    #'num_leaves': 8,
                    #'lambda_l1': 1,
                    'lambda_l2': 1.0,
                    'max_depth': max_d,
                    'metric': met, # or auc
                    'data_random_seed': 2,
                    'learning_rate':lr,
                    # 'bagging_fraction': bagging_fraction,
                    # 'bagging_freq':bag_frec,
                    'feature_fraction': feature_fraction,

                    }
            if met == 'auc':
                s_round = 60
            else:
                s_round = 30
            # train for each class
            d_train = lgb.Dataset(curr_x, curr_y[:,i])
            d_valid = lgb.Dataset(hold_out_x, hold_out_y[:,i])
            watchlist = [d_train, d_valid]
            model = lgb.train(params,
                      train_set=d_train,
                      num_boost_round=2000,
                      valid_sets=watchlist,
                      early_stopping_rounds=s_round,
                      verbose_eval=None)
            print(fold_cnt,'fold: ',end='')
            fold_cnt += 1
            try:
                train_pred = model.predict(curr_x)
                tmp_test_pred = model.predict(hold_out_x)
                
                curr_train_loss = log_loss(curr_y[:,i],train_pred)
                curr_val_loss = log_loss(hold_out_y[:,i],tmp_test_pred)
                
                curr_train_auc = roc_auc_score(curr_y[:,i],train_pred)
                curr_val_auc = roc_auc_score(hold_out_y[:,i],tmp_test_pred)
                
                print('ls',curr_train_loss,curr_val_loss,'auc',curr_train_auc,curr_val_auc)
                val_loss_l += curr_val_loss
                train_loss_l += curr_train_loss
                val_auc_l += curr_val_auc
                train_auc_l += curr_train_auc
            except:
                pass
            curr_test_pred = model.predict(d_test)
            test_pred[:,i] += curr_test_pred
            
        # avg k fold
        train_loss_l = train_loss_l/k
        val_loss_l = val_loss_l/k
        train_auc_l = train_auc_l/k
        val_auc_l = val_auc_l/k
        print(list_classes[i], lr, feature_fraction, max_d)
        print('this class avg train',train_loss_l,'avg val',val_loss_l)
        print('this class auc train',train_auc_l,'auc val',val_auc_l)
        cls_auc_res[i] = val_auc_l
        
        
        # avg 6 class
        all_train_loss_l += train_loss_l/6
        all_val_loss_l += val_loss_l/6
        all_train_auc_l += train_auc_l/6
        all_val_auc_l += val_auc_l/6
        print('========================')
    test_pred = test_pred/k
    print('all loss avg',all_train_loss_l,all_val_loss_l)
    print('all auc avg',all_train_auc_l,all_val_auc_l)
    print('=======================================================')
    return test_pred, cls_auc_res

print('done')

sleeping
file path ../features/fasttext_cnn2d_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/fasttext_cnn_gru_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/fasttext_cnn_v1_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/fasttext_cnn_v2_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/fasttext_cudnn_gru_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/fasttext_gru_v1_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/fasttext_lstm_v1_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/glove_cnn2d_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/glove_cnn_gru_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/glove_cnn_v1_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/glove_cnn_v2_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/glove_cudnn_gru_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/glove_gru_v1_5_feat.pkl
(159571, 6) (153164, 6)
file path ../features/glove_lstm_v1_5_feat.pkl
(15957

In [2]:
lgb_res,tmp_auc_res = simple_ens('lgb',10,666,0.05,0.45)
sample_submission = pd.read_csv("../input/sample_submission.csv")
sample_submission[list_classes] = lgb_res
sample_submission.to_csv("../results/lgb_log_csv_fold10_stratified.gz", index=False, compression='gzip')
print(sample_submission.head())
print('save done')

# add 7 base fasttext NN models
# all loss avg 0.0319116484218 0.0358161833583 all auc avg 0.994546891137 0.991249510282 PUB 9866

# rm tfidf test
# all loss avg 0.0319555637279 0.0357756335619 all auc avg 0.994512718368 0.991251471241

# change to stratified
# all loss avg 0.0316412744167 0.0357232681944 all auc avg 0.994584962017 0.991293307537 pub 9866

# change to base model 5 fold, add word batch, tilli, lgb feat
# feat dim 217
# all loss avg 0.031983129767200906 0.035387924581 all auc avg 0.9945427088311004 0.99188514127 PUB 9870

# add more base models
# feat dim 247, all loss avg 0.031885221777487156 0.0353958  all auc avg 0.9945986685511962 0.9919487331094685

# change early stopping to 30, and test later part
# all loss avg 0.03208696729295824 0.035406264613808025 all auc avg 0.9945261773637045 0.991946174432887

# add fasttext lstm v1
# all loss avg 0.031977558955160815 0.03537909655655411 all auc avg 0.9945299073418443 0.9919307178575658

# add muse base model, feat dim 295, lower loss, but lower auc
# all loss avg 0.03196610276261484 0.03530962013098028 all auc avg 0.9945635742334386 0.9918682952070021

# fix pool gru fold to 5, and adj params
# all loss avg 0.03172365669814678 0.03528054768190897 all auc avg 0.9946437910139179 0.991903618166854

# updated pool gru v2
# all loss avg 0.03204678384090053 0.035272185628381025 all auc avg 0.9944980867573662 0.9919033708443966

# updated other feat, change some cnt to ratio, a bit worse
# all loss avg 0.03197308230179816 0.03527747086566205 all auc avg 0.9945654974291834 0.991902658184264

# updated pool gru v2 10 fold PUB 9870
# all loss avg 0.03188938973578164 0.035151701851945265 all auc avg 0.9945816630869728 0.9919824289801534

# rm lr, mnb feat1
# worse all loss avg 0.032097370918022436 0.03520186226431002
# all auc avg 0.9944811076726177 0.9919214297276806

# add ridge , change lr,mnb,ridge to fold 6
# all loss avg 0.03187703661408994 0.0351472518210873 all auc avg 0.9946115814252721 0.9920990121249438

# ridge, lr, mnb fold 10, feat frac 0.6
# all loss avg 0.031754244562510095 0.03510006533423048 all auc avg 0.9946626820706096 0.9921195758845225

# lgb v1 feat fold 10, 5 fold is better, change feat file back
# all loss avg 0.031846113030824186 0.03510235514884163 all auc avg 0.9946147807624219 0.9920710159886731

# feat frac 0.45 PUB 9871
# all loss avg 0.03178085998781136 0.035091249281527216 all auc avg 0.9946391992503673 0.9921624859169091

# tilli feat 10 fold
# all loss avg 0.031769047743715494 0.035076842284761586 all auc avg 0.9946504692097619 0.9921581333743735

# update pool gru v2
# all loss avg 0.031982729360866644 0.035041219881259106 all auc avg 0.9945421522346883 0.9922172582996719

0 fold: ls 0.06960238770416617 0.0781605646273722 auc 0.9907893554131592 0.9871128397759622
1 fold: ls 0.06965586749041318 0.07066149030897897 auc 0.9907712887458717 0.9900424193335037
2 fold: ls 0.07141288345672146 0.07499926491809923 auc 0.9901234160818309 0.9888034975564943
3 fold: ls 0.06657558553422858 0.07283725191739757 auc 0.9917452868695367 0.9892583592904864
4 fold: ls 0.06970514972282499 0.07728551992825755 auc 0.99067586389295 0.9880644114896857
5 fold: ls 0.06693818737383217 0.0751218141768801 auc 0.9916379375382057 0.9882031215010854
6 fold: ls 0.06974700654827182 0.07725293056797954 auc 0.9906617045532646 0.9880672219539689
7 fold: ls 0.07197855625583804 0.07002825637527361 auc 0.9899108479831085 0.9904254444796684
8 fold: ls 0.0680760847028484 0.073680745182608 auc 0.991238367889631 0.9895219989153576
9 fold: ls 0.0674869994506814 0.07622478882141936 auc 0.9914183749698059 0.9882837222537514
toxic 0.05 0.45 3
this class avg train 0.06911787082398262 avg val 0.0746252626

In [3]:
def special_ens(model_name,k=3,rnd=233):
    kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=rnd)
    test_pred = np.zeros((153164,6))
    all_train_loss_l,all_val_loss_l = 0,0
    all_train_auc_l,all_val_auc_l = 0,0
    
    params_list = [
        [0.05, 3,0.4], # depth should be 3
        [0.075,3,0.6],
        [0.095,3,0.6],
        [0.05, 3,0.6],
        [0.075,3,0.4],
        [0.095,3,0.4],
    ]
    
    for i in range(6):
        val_loss_l,train_loss_l = 0,0
        val_auc_l,train_auc_l = 0,0
        fold_cnt = 0
        
        # special params
        params = {
                'application': 'binary',
                #'num_leaves': 8,
                #'lambda_l1': 1,
                'lambda_l2': 1.0,
                'max_depth': params_list[i][1],
                'metric': 'binary_logloss', # or auc
                'data_random_seed': 2,
                'learning_rate':params_list[i][0],
                'feature_fraction': params_list[i][2],

                }
        print(params)
            
        for train_index, test_index in kf.split(train_x,train_y[:,i]):
            # x,y
            curr_x,curr_y = train_x[train_index],train_y[train_index]
            hold_out_x,hold_out_y = train_x[test_index],train_y[test_index]
            d_test = test_x
            s_round = 30
            # train for each class
            d_train = lgb.Dataset(curr_x, curr_y[:,i])
            d_valid = lgb.Dataset(hold_out_x, hold_out_y[:,i])
            watchlist = [d_train, d_valid]
            model = lgb.train(params,
                      train_set=d_train,
                      num_boost_round=2000,
                      valid_sets=watchlist,
                      early_stopping_rounds=s_round,
                      verbose_eval=None)
            print(fold_cnt,'fold: ',end='')
            fold_cnt += 1
            try:
                train_pred = model.predict(curr_x)
                tmp_test_pred = model.predict(hold_out_x)
                
                curr_train_loss = log_loss(curr_y[:,i],train_pred)
                curr_val_loss = log_loss(hold_out_y[:,i],tmp_test_pred)
                
                curr_train_auc = roc_auc_score(curr_y[:,i],train_pred)
                curr_val_auc = roc_auc_score(hold_out_y[:,i],tmp_test_pred)
                
                print('ls',curr_train_loss,curr_val_loss,'auc',curr_train_auc,curr_val_auc)
                val_loss_l += curr_val_loss
                train_loss_l += curr_train_loss
                val_auc_l += curr_val_auc
                train_auc_l += curr_train_auc
            except:
                pass
            curr_test_pred = model.predict(d_test)
            test_pred[:,i] += curr_test_pred
            
        # avg k fold
        train_loss_l = train_loss_l/k
        val_loss_l = val_loss_l/k
        train_auc_l = train_auc_l/k
        val_auc_l = val_auc_l/k
        print(list_classes[i])
        print('this class avg train',train_loss_l,'avg val',val_loss_l)
        print('this class auc train',train_auc_l,'auc val',val_auc_l)
        
        
        # avg 6 class
        all_train_loss_l += train_loss_l/6
        all_val_loss_l += val_loss_l/6
        all_train_auc_l += train_auc_l/6
        all_val_auc_l += val_auc_l/6
        print('========================')
    test_pred = test_pred/k
    print('all loss avg',all_train_loss_l,all_val_loss_l)
    print('all auc avg',all_train_auc_l,all_val_auc_l)
    print('=======================================================')
    return test_pred

print('done')

lgb_res = special_ens('lgb',10,666)
sample_submission = pd.read_csv("../input/sample_submission.csv")
sample_submission[list_classes] = lgb_res
sample_submission.to_csv("../results/lgb_log_csv_fold10_stratified_special.gz", index=False, compression='gzip')
print(sample_submission.head())
print('save done')

# best params changed when base models changed
# all loss avg 0.03111512578966158 0.03534320053008906 all auc avg 0.9948524214184179 0.9918450388634261

# change lr, ridge, mnb fold to 10, train loss too low ?
# all loss avg 0.030612393454975732 0.035095753229703264 all auc avg 0.9950485697696142 0.9921210772750391

# tilli feat fold 10
# all loss avg 0.03172720686364922 0.0350882288604753 all auc avg 0.9946451061433877 0.9921962852188195

# update pool gru v2
# all loss avg 0.03178018332247421 0.035037128263355345 all auc avg 0.9946071803186727 0.9922390115824994

done
{'application': 'binary', 'max_depth': 3, 'data_random_seed': 2, 'learning_rate': 0.05, 'feature_fraction': 0.4, 'lambda_l2': 1.0, 'metric': 'binary_logloss'}
0 fold: ls 0.07054908247034279 0.07799381380216308 auc 0.9904564854821903 0.9871499408376233
1 fold: ls 0.07026490907419657 0.0708493844601407 auc 0.9905569248947377 0.9899895084177281
2 fold: ls 0.07010288855777674 0.07511858865400087 auc 0.9905634430396628 0.9887420248572584
3 fold: ls 0.06708963346279326 0.07305350836513544 auc 0.9915805911764245 0.9891753688814958
4 fold: ls 0.06853288965246013 0.07729230316955127 auc 0.9910896922471606 0.9880766959384076
5 fold: ls 0.06850396201558229 0.07522622483776802 auc 0.9911423516975264 0.9881758327994962
6 fold: ls 0.06852703429135769 0.07702614104935028 auc 0.9911058203997253 0.9881049365714475
7 fold: ls 0.0691254261083487 0.06993777341552614 auc 0.9909102763380195 0.9904143831761563
8 fold: ls 0.06903230689714812 0.07367716090582431 auc 0.9908818403866766 0.9894666017313751
9

In [4]:
# find best params for each column, early stopping = 30

best_pred = np.zeros((153164,6))
val_auc_res = [0,0,0,0,0,0]
for lr in [0.095,0.075,0.05]:
    for max_d in [3]:
        for s_rate in [0.4,0.48,0.56,0.62]:
            print('learning rate',lr,'max depth',max_d,'feature fraction',s_rate)
            lgb_res,tmp_auc_res = simple_ens('lgb',k=10,rnd=666,lr=lr,
                                 feature_fraction=s_rate,bagging_fraction=0.9,
                                 bag_frec=3,met='binary_logloss',max_d=max_d)
            # check for each cls
            for i in range(6):
                # find better params for this class
                if tmp_auc_res[i] > val_auc_res[i]:
                    val_auc_res[i] = tmp_auc_res[i]
                    best_pred[:,i] = lgb_res[:,i]
                    print('FIND BETTER PARAMS',lr,max_d,s_rate,list_classes[i])
            print('TEST PARAM DONE ------------------------------------------')

print(val_auc_res)
print(np.mean(val_auc_res))
sample_submission = pd.read_csv("../input/sample_submission.csv")
sample_submission[list_classes] = best_pred
sample_submission.to_csv("../results/lgb_grid_search_fold10_stratified.gz", index=False, compression='gzip')
print(sample_submission.head())
print('save done')
                
            
# best auc params
# toxic 0.05,4,0.5
# severe toxic 0.075 3 0.5
# obs 0.075 3 0.6
# threat 0.095 3 0.5
# insult 0.075 0.5 4
# hate 0.05 0.5 3

# TEST PARAM DONE ------------------------------------------
# [0.9884873039634368, 0.9920148292218363, 0.9954573112511824, 
#  0.994769628570376, 0.9898761527824232, 0.9912880359235366]
# 0.9919822102854652 PUB 9870

# updated pool gru v2 10 fold
# TEST PARAM DONE ------------------------------------------
# [0.9887825645527965, 0.9921426168741385, 0.9955318446611215, 
#  0.9955039709421902, 0.9900536177079309, 0.9915995552784374]
# 0.9922690283361025 PUB 9869

learning rate 0.095 max depth 3 feature fraction 0.4
0 fold: ls 0.07085827353248232 0.07798084842168852 auc 0.9903233115480082 0.9872524557369384
1 fold: ls 0.06625255482436719 0.0705600833910215 auc 0.9918243707478177 0.9900669268724033
2 fold: ls 0.06818108014013745 0.0748847775176703 auc 0.9912062114694379 0.9887604168365433
3 fold: ls 0.06601199204171145 0.07305479782103391 auc 0.9919402453983566 0.9891495476297903
4 fold: ls 0.0693323629687326 0.07722464066960674 auc 0.990792059100126 0.9880921988220346
5 fold: ls 0.06440595724399693 0.07492308373241514 auc 0.9923908487328252 0.9881962313305844
6 fold: ls 0.06877299277394358 0.07731179436612658 auc 0.9910170358346071 0.9880255182904107
7 fold: ls 0.07217164787214733 0.07017723982894845 auc 0.98989274835965 0.9903317860655048
8 fold: ls 0.06644510648395821 0.07360303958204517 auc 0.9917515689468738 0.9895266229028914
9 fold: ls 0.06653699594977339 0.07624970565715444 auc 0.991711174745344 0.9883336341191891
toxic 0.095 0.4 3
this c

toxic 0.095 0.48 3
this class avg train 0.06899575049284278 avg val 0.07460666883292945
this class auc train 0.99093435365249 auc val 0.9887825645527965
0 fold: ls 0.017338010487293602 0.02082985361076282 auc 0.9947028294463096 0.9915349253070009
1 fold: ls 0.018022351308761387 0.020722802597329087 auc 0.9941522314154662 0.9915456070388657
2 fold: ls 0.01818077125666185 0.02086609192852913 auc 0.9940943562770582 0.991387754779086
3 fold: ls 0.017991023999446413 0.01950313932475885 auc 0.9941754221520326 0.992819502468667
4 fold: ls 0.01813337086122452 0.020977748020218647 auc 0.9941094107729609 0.9914779560703886
5 fold: ls 0.017902591874846427 0.019777176281426098 auc 0.9942654370520911 0.9924216185314437
6 fold: ls 0.018022509887937056 0.019134665205145934 auc 0.9941715013369021 0.9929566277810091
7 fold: ls 0.018005706358819408 0.020392079524569354 auc 0.9941866897314758 0.9919194911222295
8 fold: ls 0.017943241774021 0.020913292443824172 auc 0.9942410490985106 0.9913308513717476
9 

severe_toxic 0.095 0.56 3
this class avg train 0.01765646189864945 avg val 0.020145749555792215
this class auc train 0.9944809531992387 auc val 0.9921426168741385
0 fold: ls 0.03546032253236468 0.036789235477717584 auc 0.9961959904338261 0.9957806222708064
1 fold: ls 0.03567423895938666 0.036618632286133504 auc 0.9961456410102575 0.9957162159463795
2 fold: ls 0.0344042868464071 0.038957553457418546 auc 0.9964192552292838 0.995349281577241
3 fold: ls 0.035077750555038645 0.03364969878765119 auc 0.9962712678628814 0.9964768779699349
4 fold: ls 0.034458421082097984 0.04209645811697498 auc 0.9964134933993349 0.9945190310768354
5 fold: ls 0.034588086569177584 0.037392540130312686 auc 0.9963802499901278 0.9956211764779588
6 fold: ls 0.030969352557429462 0.03728326995636316 auc 0.997173883325641 0.9954494410179144
7 fold: ls 0.035132440276023615 0.03807799117253782 auc 0.9962577688013085 0.9955749339840434
8 fold: ls 0.03346531172800258 0.03886290368261612 auc 0.996639396003195 0.995223122969

obscene 0.095 0.62 3
this class avg train 0.034482859833172154 avg val 0.037816153077860445
this class auc train 0.9964097622774905 auc val 0.9955128594752386
0 fold: ls 0.003853004235446458 0.006753863671145623 auc 0.9993611850438916 0.9969686256023466
1 fold: ls 0.0049664287630285615 0.006373117100323511 auc 0.9986183074546494 0.9965574586214121
2 fold: ls 0.004238837363409342 0.007247360762065442 auc 0.999134072854238 0.9941533102870312
3 fold: ls 0.0036071039895356895 0.006393554573797135 auc 0.9994985473237233 0.9934117480671318
4 fold: ls 0.004340516439479098 0.006680940232611025 auc 0.9990511152343902 0.9966017662958074
5 fold: ls 0.0047168221443877 0.0068737882384817695 auc 0.9987889870766384 0.9958003331447608
6 fold: ls 0.004885128575826627 0.006268601765275631 auc 0.9986429073535564 0.9956916420474784
7 fold: ls 0.005192192179282805 0.007444071123339415 auc 0.9981775117136643 0.9955764032937331
8 fold: ls 0.004409209094146621 0.007036177273871708 auc 0.9990075479923743 0.994

0 fold: ls 0.04964303462319662 0.049855262350365 auc 0.9918298576759376 0.991470525248537
1 fold: ls 0.048738175748239236 0.05327538463273099 auc 0.9921642219751595 0.9900521668133405
2 fold: ls 0.05097845663910874 0.054979824420283326 auc 0.9913190548440731 0.9891233114382181
3 fold: ls 0.04764560117465539 0.05097872414886621 auc 0.9925681406283645 0.9909050222687712
4 fold: ls 0.049573945956825974 0.05518398631959181 auc 0.9918353399833425 0.9891773497444861
5 fold: ls 0.04865458548399649 0.05237936184561952 auc 0.9921926661951458 0.9905999010137224
6 fold: ls 0.04835672286624806 0.05552788819068433 auc 0.9922873764282304 0.989531649004967
7 fold: ls 0.049454954408126015 0.055150017757222745 auc 0.9918819328956269 0.9896573153818103
8 fold: ls 0.050641004089096345 0.052421299267702046 auc 0.9914398696850006 0.9904412823484798
9 fold: ls 0.04984429784189032 0.054342190610639086 auc 0.9917436795701485 0.9895776538169742
insult 0.075 0.4 3
this class avg train 0.049353077883138315 avg v

1 fold: ls 0.015159778320858072 0.017388537586225473 auc 0.9951875804038763 0.9922939543008982
2 fold: ls 0.014772610609073979 0.01718445722987947 auc 0.995476788706416 0.9931952199738409
3 fold: ls 0.014045528419067846 0.018196264636919892 auc 0.9961336723141937 0.9895072049688884
4 fold: ls 0.014641194824773072 0.018612900800294582 auc 0.9956428513056383 0.9893529585054593
5 fold: ls 0.014671101650000726 0.016259740318928895 auc 0.9956214777703422 0.993722396336672
6 fold: ls 0.014050255678070675 0.01631542972450636 auc 0.9960837217180278 0.9920911915600839
7 fold: ls 0.01510486453413881 0.018617819463198848 auc 0.9952407485308022 0.9876366157236794
8 fold: ls 0.013839593832324831 0.01681140004173444 auc 0.996239580947145 0.992537845942626
9 fold: ls 0.015527366560260719 0.016654931559284318 auc 0.9949141860720181 0.9923851976298865
identity_hate 0.075 0.48 3
this class avg train 0.0146198199154707 avg val 0.017312354461375662
this class auc train 0.9956366133768875 auc val 0.9915691

0 fold: ls 0.06977040812238773 0.07813686865585732 auc 0.9907088565051761 0.9870724770825064
1 fold: ls 0.06719504734725641 0.07031744553946755 auc 0.9915465666781276 0.9901108683007442
2 fold: ls 0.0714291865971861 0.07520362773084804 auc 0.9901083271876058 0.9886753879076814
3 fold: ls 0.06363302765107588 0.0729142339628574 auc 0.9926478094873569 0.9892821420223205
4 fold: ls 0.06882686256897019 0.07721411668362818 auc 0.9909576827561396 0.9881589700137967
5 fold: ls 0.06519716778950178 0.07483233914177997 auc 0.9921943836985853 0.9881977725529333
6 fold: ls 0.06884743802393967 0.0769723584790323 auc 0.9910137768590395 0.9881713904527258
7 fold: ls 0.07181699704085817 0.07030531122273023 auc 0.9899668266816009 0.99031410611317
8 fold: ls 0.0671012284265956 0.07369377328258528 auc 0.9915394960319831 0.9895506495047822
9 fold: ls 0.06869002389920277 0.0763340558031077 auc 0.9910364889158618 0.9882269197402244
toxic 0.075 0.62 3
this class avg train 0.06825073874669743 avg val 0.0745924

2 fold: ls 0.03408746089991449 0.03887395324466806 auc 0.9964901178089721 0.9953713652068501
3 fold: ls 0.035884110936046903 0.03364337157223497 auc 0.996090500721684 0.99650338615654
4 fold: ls 0.034644141352200404 0.04207084663045394 auc 0.9963759815561106 0.9945074410868279
5 fold: ls 0.034644973771366536 0.03737271456663951 auc 0.9963709835868196 0.9956301822134376
6 fold: ls 0.03357973791042103 0.037449149324098166 auc 0.9966103949213746 0.9954679223533318
7 fold: ls 0.03527977207618144 0.03802374898528234 auc 0.9962342051503856 0.995583861408779
8 fold: ls 0.03463678935379216 0.03867325940987267 auc 0.9963832711073748 0.995274259885165
9 fold: ls 0.03518953163301317 0.03862357182912659 auc 0.9962596472462263 0.9953482402484827
obscene 0.05 0.48 3
this class avg train 0.034777422449334225 avg val 0.03781175132180752
this class auc train 0.9963452928975747 auc val 0.9955167639900372
0 fold: ls 0.00467762539499971 0.006701278000037373 auc 0.9988255394097227 0.9968992248062015
1 fold

4 fold: ls 0.05066557238758293 0.05548348131566155 auc 0.991446947228025 0.989030945091395
5 fold: ls 0.04783885517132042 0.052316756205815294 auc 0.9925141713917555 0.9906306878207727
6 fold: ls 0.049806255746093 0.05569035584073277 auc 0.9917492056743997 0.9894456048988504
7 fold: ls 0.04925033099421955 0.055085217453466645 auc 0.9919615842456666 0.9896598283649283
8 fold: ls 0.048579188139600535 0.0523002826980512 auc 0.9922256126207402 0.9904226862734077
9 fold: ls 0.050719803656115746 0.05452334923615614 auc 0.9914181735909638 0.9895052799031798
insult 0.05 0.62 3
this class avg train 0.0496861417788766 avg val 0.053448066994698964
this class auc train 0.991811010766674 auc val 0.9899535498608067
0 fold: ls 0.014587980409584951 0.017034385269734092 auc 0.9956708817853712 0.9929409823437122
1 fold: ls 0.014631530705768914 0.017360155534442517 auc 0.9956465972248096 0.9922589798120973
2 fold: ls 0.014313852706702521 0.017212041128671134 auc 0.9958768767721977 0.9931006094977259
3 fo