In [1]:
file_dir='../input/entity-annotated-corpus/ner_dataset.csv'
target='Tag'
drop_cols=['Sentence #','POS']
word_col='Word'

USE_BERT=False
USE_W2V=True

In [2]:
!pip install pycaret

Collecting pycaret
  Downloading pycaret-2.3.3-py3-none-any.whl (264 kB)
[K     |████████████████████████████████| 264 kB 892 kB/s 
Collecting imbalanced-learn==0.7.0
  Downloading imbalanced_learn-0.7.0-py3-none-any.whl (167 kB)
[K     |████████████████████████████████| 167 kB 5.7 MB/s 
Collecting gensim<4.0.0
  Downloading gensim-3.8.3-cp37-cp37m-manylinux1_x86_64.whl (24.2 MB)
[K     |████████████████████████████████| 24.2 MB 439 kB/s 
Collecting mlflow
  Downloading mlflow-1.19.0-py3-none-any.whl (14.4 MB)
[K     |████████████████████████████████| 14.4 MB 40.4 MB/s 
Collecting pyod
  Downloading pyod-0.9.0.tar.gz (105 kB)
[K     |████████████████████████████████| 105 kB 67.0 MB/s 
[?25hCollecting scipy<=1.5.4
  Downloading scipy-1.5.4-cp37-cp37m-manylinux1_x86_64.whl (25.9 MB)
[K     |████████████████████████████████| 25.9 MB 52.4 MB/s 
Collecting databricks-cli>=0.8.7
  Downloading databricks-cli-0.14.3.tar.gz (54 kB)
[K     |███████████████████████████

In [3]:
import pandas as pd 
import numpy as np 
import sys
import gensim.downloader as api
import nltk
from gensim.models.word2vec import Word2Vec
from nltk.corpus import stopwords
from nltk.tag.stanford import StanfordNERTagger
import tensorflow as tf
import transformers
from transformers import BertTokenizer
import warnings
import sklearn
from pycaret.classification import *
from sklearn.metrics import average_precision_score
import sklearn
warnings.filterwarnings('ignore')


In [4]:
split_df=pd.read_csv(file_dir)
split_df.drop(drop_cols,inplace=True,axis=1)
split_df

Unnamed: 0,Word,Tag
0,Thousands,O
1,of,O
2,demonstrators,O
3,have,O
4,marched,O
...,...,...
1048570,they,O
1048571,responded,O
1048572,to,O
1048573,the,O


In [5]:
# pos tag
def get_pos(tokens):
    pos_tags_raw=nltk.pos_tag(tokens)
    pos_tags=list(map(lambda x:x[1],pos_tags_raw))
    return pos_tags

# stop word
def get_stop_word(tokens):
    stop_words_list=stopwords.words('english')
    stop_words=list(map(lambda x:x in stop_words_list,tokens))
    return stop_words

# NER
# https://nlp.stanford.edu/software/CRF-NER.shtml
def get_ner(tokens):
    model_path='stanford_ner\\english.muc.7class.distsim.crf.ser.gz'
    # model_path='stanford_ner\\english.all.3class.distsim.crf.ser.gz'
    jar_path='stanford_ner\\stanford-ner.jar'
    tagger = StanfordNERTagger(model_path,jar_path,encoding='utf-8')
    # tagger = StanfordNERTagger(model_path,encoding='utf-8')
    tagged = tagger.tag(tokens)
    tagged=list(map(lambda x:x[1],tagged))
    return tagged

# upper/lower case information, acronyms, punctuation marks, etc.
# % of upper case (or number)
def get_syntactic(tokens):
    return list(map(lambda x:sum(np.array(list(x.upper()))==np.array(list(x)))/len(x),tokens))

# word2vec or bert representation
def get_word2vec(tokens):
    res=[]
    for token in tokens:
        try:wt=w2v_model.wv.get_vector(token)
        except:wt=np.zeros(100)
        res.append(wt)
    return np.array(res)

# bert representation
def load_bert():
    input_ids = tf.keras.layers.Input(
        shape=(3), dtype=tf.int32, name="input_ids")

    embedding = transformers.TFBertModel.from_pretrained("cambridgeltl/BioRedditBERT-uncased")
    out = embedding(input_ids)

    model = tf.keras.models.Model(inputs=[input_ids], outputs=out[0])
    model.compile(optimizer=tf.keras.optimizers.Adam(),loss="categorical_crossentropy",metrics=["acc"],)
    return model

def get_bert(tokens):
    def tokenize_data(data):
        tokenizer = BertTokenizer.from_pretrained("cambridgeltl/BioRedditBERT-uncased")
        encoded = tokenizer.batch_encode_plus(
            data,
            add_special_tokens=True,
            max_length=3,
            return_attention_mask=True,
            return_token_type_ids=True,
            pad_to_max_length=True,
            return_tensors="tf",
        )

        return np.array(encoded["input_ids"], dtype="int32")

    token_bert=tokenize_data(tokens)
    emb=bert_model.predict(token_bert)
    return emb.mean(axis=1)

In [6]:
%%time
print('get pos tag...')
split_df['pos']=get_pos(split_df[word_col].values)

print('get stop word...')
split_df['stop_word']=get_stop_word(split_df[word_col].values)

print('get synt...')
split_df['#_of_non_lower']=split_df[word_col].transform(lambda x:sum(np.array(list(x.upper()))==np.array(list(x))))
split_df['len']=split_df[word_col].transform(lambda x:len(x))
tokens_lower=list(map(lambda x:x.lower(),split_df[word_col].values))


print('get lag...')
lag_feas=['#_of_non_lower','pos']
lags=3
for col in lag_feas:
    for lag in range(lags):
        split_df['prev_%s_%s'%(col,lag+1)]=split_df[col].shift(lag+1)
        split_df['next_%s_%s'%(col,lag+1)]=split_df[col].shift(-(lag+1))   
    
if USE_W2V:
    print('get word2vec...')
    corpus = api.load('text8') 
    w2v_model = Word2Vec(corpus)  # time consuming
    temp=get_word2vec(tokens_lower)
    for i in range(temp.shape[1]):
        split_df['word_2vec_%s'%(i+1)]=temp[:,i]

if USE_BERT:
    print('get BERT...')
    bert_model=load_bert()
    temp=get_bert(tokens)
    for i in range(temp.shape[1]):
        split_df['bert_%s'%(i+1)]=temp[:,i]

split_df.head()

get pos tag...
get stop word...
get synt...
get lag...
get word2vec...
CPU times: user 6min 35s, sys: 5.43 s, total: 6min 40s
Wall time: 3min 54s


Unnamed: 0,Word,Tag,pos,stop_word,#_of_non_lower,len,prev_#_of_non_lower_1,next_#_of_non_lower_1,prev_#_of_non_lower_2,next_#_of_non_lower_2,prev_#_of_non_lower_3,next_#_of_non_lower_3,prev_pos_1,next_pos_1,prev_pos_2,next_pos_2,prev_pos_3,next_pos_3,word_2vec_1,word_2vec_2,word_2vec_3,word_2vec_4,word_2vec_5,word_2vec_6,word_2vec_7,word_2vec_8,word_2vec_9,word_2vec_10,word_2vec_11,word_2vec_12,word_2vec_13,word_2vec_14,word_2vec_15,word_2vec_16,word_2vec_17,word_2vec_18,word_2vec_19,word_2vec_20,word_2vec_21,word_2vec_22,word_2vec_23,word_2vec_24,word_2vec_25,word_2vec_26,word_2vec_27,word_2vec_28,word_2vec_29,word_2vec_30,word_2vec_31,word_2vec_32,word_2vec_33,word_2vec_34,word_2vec_35,word_2vec_36,word_2vec_37,word_2vec_38,word_2vec_39,word_2vec_40,word_2vec_41,word_2vec_42,word_2vec_43,word_2vec_44,word_2vec_45,word_2vec_46,word_2vec_47,word_2vec_48,word_2vec_49,word_2vec_50,word_2vec_51,word_2vec_52,word_2vec_53,word_2vec_54,word_2vec_55,word_2vec_56,word_2vec_57,word_2vec_58,word_2vec_59,word_2vec_60,word_2vec_61,word_2vec_62,word_2vec_63,word_2vec_64,word_2vec_65,word_2vec_66,word_2vec_67,word_2vec_68,word_2vec_69,word_2vec_70,word_2vec_71,word_2vec_72,word_2vec_73,word_2vec_74,word_2vec_75,word_2vec_76,word_2vec_77,word_2vec_78,word_2vec_79,word_2vec_80,word_2vec_81,word_2vec_82,word_2vec_83,word_2vec_84,word_2vec_85,word_2vec_86,word_2vec_87,word_2vec_88,word_2vec_89,word_2vec_90,word_2vec_91,word_2vec_92,word_2vec_93,word_2vec_94,word_2vec_95,word_2vec_96,word_2vec_97,word_2vec_98,word_2vec_99,word_2vec_100
0,Thousands,O,NNS,False,1,9,,0.0,,0.0,,0.0,,IN,,NNS,,VBP,1.421284,0.016402,1.911949,2.714994,-0.593004,0.702406,-1.034192,-2.068728,-0.408649,0.381074,0.865582,-1.855979,-0.823347,0.467789,-1.359929,0.018515,-1.942731,-1.428731,-2.489092,-0.381786,0.660762,1.207184,-0.545124,-2.484401,-1.774102,0.598076,-1.164143,-1.470461,2.599328,-0.156253,-1.073933,1.261038,-0.321448,-0.246528,0.14131,0.289644,-0.768058,1.22907,-0.110938,-1.039593,-0.119137,1.135345,-3.822168,-0.425287,-0.202676,2.962387,0.634075,-1.478015,-0.930128,0.417163,1.882714,-0.177405,-0.769823,-0.55994,0.861612,1.28262,2.051453,1.995044,-0.017428,0.288403,0.431086,2.822564,0.189342,0.211175,-1.070597,-0.61563,-0.36336,0.446175,-2.191967,0.177563,-1.566767,-1.636885,0.441744,-0.379262,1.413376,1.127055,-0.529256,-0.095967,-1.963953,-0.536709,0.331026,-0.812682,-2.626156,1.000246,0.261747,-1.495933,1.591521,3.208377,-0.302237,-1.319445,0.800637,2.006147,-0.984088,-0.130014,-1.689246,1.100079,-3.610959,-0.156966,-5.156607,-2.026332
1,of,O,IN,True,0,2,1.0,0.0,,0.0,,0.0,NNS,NNS,,VBP,,VBN,-1.149262,-1.992989,0.035231,-0.153007,-0.639457,0.022881,-1.475076,0.509012,-0.995976,0.357413,-1.066453,1.025311,1.517382,0.047156,0.351239,0.359121,0.03914,0.516663,0.30795,-0.927842,1.621768,-0.092729,0.210708,-0.917248,-0.296736,0.484773,1.903922,0.068347,-0.02178,-1.31308,-0.319395,-0.759531,0.054108,-0.771156,-1.187336,1.194884,0.606647,1.064426,2.214024,-0.339128,0.433639,0.052514,-0.160136,-1.186648,1.352166,-1.151753,-0.609818,0.136408,0.489762,-0.034606,-0.621802,0.338283,-2.085926,1.747095,1.006143,-1.2128,0.972605,0.154178,0.646629,0.26045,1.462575,0.625859,-0.20744,-0.609215,-1.373447,-0.783509,3.014327,-0.055424,-2.375887,-2.033319,0.559523,-1.253653,0.484823,2.217039,0.947496,0.893382,0.176258,1.075231,-0.437817,1.420588,0.541593,0.299269,-0.133498,-2.166051,-1.094762,-0.431305,0.878967,-0.586833,0.940492,0.547173,0.975732,-1.034483,-0.250373,0.976415,-0.988696,-0.33574,0.371434,0.387279,-1.860047,0.009753
2,demonstrators,O,NNS,False,0,13,0.0,0.0,1.0,0.0,,0.0,IN,VBP,NNS,VBN,,IN,0.137477,0.230787,-0.095987,0.084007,-0.169975,-0.177495,-0.044059,0.0887,-0.078582,0.290504,0.087377,0.08551,-0.06576,-0.303749,-0.170995,0.04557,0.03434,0.158335,-0.108031,0.016771,0.202512,0.115745,-0.102669,-0.058525,-0.016854,0.014082,-0.189936,0.041302,0.24321,-0.069483,0.004008,-0.18334,-0.127313,0.296356,0.082296,0.194734,0.063824,-0.05202,0.109019,-0.064111,0.057769,-0.169914,-0.505544,0.058979,0.206954,0.299975,-0.00642,0.27555,-0.192647,-0.311903,0.05038,0.020638,-0.003417,0.022165,-0.053109,0.20321,-0.219391,0.154743,0.200362,-0.159666,0.15609,0.130938,-0.140867,-0.099557,0.300172,0.224615,-0.221569,0.005049,-0.289773,-0.17528,-0.083681,-0.004815,0.107343,-0.144377,0.20973,-0.003395,0.097399,-0.202108,-0.081539,-0.239281,0.003118,0.273144,-0.419758,-0.219899,0.102212,0.020766,0.022946,0.116971,-0.153152,0.235253,0.163868,-0.116273,-0.039078,0.027755,0.037274,0.053789,-0.029286,-0.053606,-0.268725,-0.252668
3,have,O,VBP,True,0,4,0.0,0.0,0.0,0.0,1.0,1.0,NNS,VBN,IN,IN,NNS,NNP,2.090069,-1.812068,-2.197685,2.722306,2.60569,3.491157,-1.292626,-3.119389,2.436323,-1.15151,-2.047331,-0.551283,1.110074,2.124008,-2.71804,-3.708123,2.090313,-1.492092,2.338343,3.175877,0.890655,0.912291,0.484741,-3.374462,-2.962702,-0.842215,2.523262,-1.002738,-1.659017,3.192202,0.818362,-4.548322,1.677391,-2.807899,0.68066,1.825887,-2.03991,0.23739,-0.063092,-1.452062,-1.323773,-0.024037,0.23977,0.442529,-1.508307,1.839036,-1.319872,1.873304,4.535566,-1.23699,1.799664,-0.77958,-1.789186,-0.737814,1.795693,4.513585,-0.229651,0.034174,4.074395,2.764338,1.116387,2.004232,-0.182654,0.085112,4.788013,-3.399225,2.766286,-3.467458,-2.807043,4.709053,3.010117,-6.029093,-0.188674,-1.9022,1.691224,1.066547,-2.882914,0.878694,2.370013,-0.872481,-1.736553,-1.920965,-1.952746,-1.258536,5.04474,0.822054,1.284306,2.400402,-0.932554,-2.358478,-0.161113,1.777335,0.710516,-3.776827,-0.802076,-1.574183,-0.619652,0.691545,-1.525634,-0.489425
4,marched,O,VBN,False,0,7,0.0,0.0,0.0,1.0,0.0,0.0,VBP,IN,NNS,NNP,IN,TO,-0.098993,-0.351241,0.045595,0.082748,-2.66917,-0.188572,-0.467314,-0.310395,0.100967,-0.787336,-0.191643,1.172913,-1.171194,-0.34098,-0.887675,0.282462,1.445185,1.000591,0.062731,0.095487,1.729253,-0.020369,1.326744,0.774305,0.473092,0.387156,0.235342,-0.274326,0.140168,-0.164282,-0.057838,-0.24213,1.087761,0.300526,-0.418901,0.275942,0.924548,0.349776,0.812754,-0.402203,0.350108,-0.623377,-1.679596,0.309446,0.878259,0.1602,0.733855,0.312354,-0.737085,0.628817,0.400997,-0.165322,1.795239,-0.744014,1.235429,-1.103393,-0.381673,0.296464,0.240507,-0.904262,0.569788,0.059933,0.021111,-0.528754,-0.331516,-0.528799,-0.372057,-1.031721,-1.361546,-0.456266,-0.611946,1.302222,-1.020462,-0.530331,1.759166,0.020704,-0.599289,-0.030225,-0.505129,-1.38547,0.37518,0.221483,-0.52105,-0.29062,-0.327605,-0.518238,1.060176,0.693499,-1.053499,-0.523511,0.342218,-0.5056,0.744427,-0.672348,0.29409,0.736028,-1.151521,0.355139,0.207277,-0.024097


In [7]:
import pandas as pd
import numpy as np 

from pycaret.classification import *
from sklearn.metrics import average_precision_score
import sklearn

warnings.filterwarnings('ignore')

cat_feas=['pos','stop_word','len','#_of_non_lower',
          'prev_#_of_non_lower_1','prev_#_of_non_lower_2','prev_#_of_non_lower_3',
          'next_#_of_non_lower_1','next_#_of_non_lower_2','next_#_of_non_lower_3',
          'prev_pos_1','prev_pos_2','prev_pos_3',
          'next_pos_1','next_pos_2','next_pos_3']

num_feas=[x for x in split_df.columns if x not in drop_cols+cat_feas and x!=target and x!=word_col ]

target='Tag'

split_df=pd.concat((pd.get_dummies(split_df[cat_feas]),
                  split_df[num_feas+[target]]),axis=1)

org_names=split_df.columns.tolist()
split_df.columns=['col_%s'%x for x in range(len(org_names))]

target='col_%s'%org_names.index(target) 

split_df

Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,col_11,col_12,col_13,col_14,col_15,col_16,col_17,col_18,col_19,col_20,col_21,col_22,col_23,col_24,col_25,col_26,col_27,col_28,col_29,col_30,col_31,col_32,col_33,col_34,col_35,col_36,col_37,col_38,col_39,col_40,col_41,col_42,col_43,col_44,col_45,col_46,col_47,col_48,col_49,col_50,col_51,col_52,col_53,col_54,col_55,col_56,col_57,col_58,col_59,col_60,col_61,col_62,col_63,col_64,col_65,col_66,col_67,col_68,col_69,col_70,col_71,col_72,col_73,col_74,col_75,col_76,col_77,col_78,col_79,col_80,col_81,col_82,col_83,col_84,col_85,col_86,col_87,col_88,col_89,col_90,col_91,col_92,col_93,col_94,col_95,col_96,col_97,col_98,col_99,col_100,col_101,col_102,col_103,col_104,col_105,col_106,col_107,col_108,col_109,col_110,col_111,col_112,col_113,col_114,col_115,col_116,col_117,col_118,col_119,col_120,col_121,col_122,col_123,col_124,col_125,col_126,col_127,col_128,col_129,col_130,col_131,col_132,col_133,col_134,col_135,col_136,col_137,col_138,col_139,col_140,col_141,col_142,col_143,col_144,col_145,col_146,col_147,col_148,col_149,col_150,col_151,col_152,col_153,col_154,col_155,col_156,col_157,col_158,col_159,col_160,col_161,col_162,col_163,col_164,col_165,col_166,col_167,col_168,col_169,col_170,col_171,col_172,col_173,col_174,col_175,col_176,col_177,col_178,col_179,col_180,col_181,col_182,col_183,col_184,col_185,col_186,col_187,col_188,col_189,col_190,col_191,col_192,col_193,col_194,col_195,col_196,col_197,col_198,col_199,col_200,col_201,col_202,col_203,col_204,col_205,col_206,col_207,col_208,col_209,col_210,col_211,col_212,col_213,col_214,col_215,col_216,col_217,col_218,col_219,col_220,col_221,col_222,col_223,col_224,col_225,col_226,col_227,col_228,col_229,col_230,col_231,col_232,col_233,col_234,col_235,col_236,col_237,col_238,col_239,col_240,col_241,col_242,col_243,col_244,col_245,col_246,col_247,col_248,col_249,col_250,col_251,col_252,col_253,col_254,col_255,col_256,col_257,col_258,col_259,col_260,col_261,col_262,col_263,col_264,col_265,col_266,col_267,col_268,col_269,col_270,col_271,col_272,col_273,col_274,col_275,col_276,col_277,col_278,col_279,col_280,col_281,col_282,col_283,col_284,col_285,col_286,col_287,col_288,col_289,col_290,col_291,col_292,col_293,col_294,col_295,col_296,col_297,col_298,col_299,col_300,col_301,col_302,col_303,col_304,col_305,col_306,col_307,col_308,col_309,col_310,col_311,col_312,col_313,col_314,col_315,col_316,col_317,col_318,col_319,col_320,col_321,col_322,col_323,col_324,col_325,col_326,col_327,col_328,col_329,col_330,col_331,col_332,col_333,col_334,col_335,col_336,col_337,col_338,col_339,col_340,col_341,col_342,col_343,col_344,col_345,col_346,col_347,col_348,col_349,col_350,col_351,col_352,col_353,col_354,col_355,col_356,col_357,col_358,col_359,col_360,col_361,col_362,col_363,col_364,col_365,col_366,col_367,col_368,col_369,col_370,col_371,col_372,col_373,col_374,col_375,col_376,col_377,col_378,col_379,col_380,col_381,col_382,col_383,col_384,col_385,col_386,col_387,col_388,col_389,col_390,col_391,col_392,col_393,col_394,col_395,col_396,col_397,col_398,col_399,col_400,col_401,col_402,col_403,col_404,col_405,col_406,col_407,col_408,col_409,col_410
0,False,9,1,,,,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1.421284,0.016402,1.911949,2.714994,-0.593004,0.702406,-1.034192,-2.068728,-0.408649,0.381074,0.865582,-1.855979,-0.823347,0.467789,-1.359929,0.018515,-1.942731,-1.428731,-2.489092,-0.381786,0.660762,1.207184,-0.545124,-2.484401,-1.774102,0.598076,-1.164143,-1.470461,2.599328,-0.156253,-1.073933,1.261038,-0.321448,-0.246528,0.141310,0.289644,-0.768058,1.229070,-0.110938,-1.039593,-0.119137,1.135345,-3.822168,-0.425287,-0.202676,2.962387,0.634075,-1.478015,-0.930128,0.417163,1.882714,-0.177405,-0.769823,-0.559940,0.861612,1.282620,2.051453,1.995044,-0.017428,0.288403,0.431086,2.822564,0.189342,0.211175,-1.070597,-0.615630,-0.363360,0.446175,-2.191967,0.177563,-1.566767,-1.636885,0.441744,-0.379262,1.413376,1.127055,-0.529256,-0.095967,-1.963953,-0.536709,0.331026,-0.812682,-2.626156,1.000246,0.261747,-1.495933,1.591521,3.208377,-0.302237,-1.319445,0.800637,2.006147,-0.984088,-0.130014,-1.689246,1.100079,-3.610959,-0.156966,-5.156607,-2.026332,O
1,True,2,0,1.0,,,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,-1.149262,-1.992989,0.035231,-0.153007,-0.639457,0.022881,-1.475076,0.509012,-0.995976,0.357413,-1.066453,1.025311,1.517382,0.047156,0.351239,0.359121,0.039140,0.516663,0.307950,-0.927842,1.621768,-0.092729,0.210708,-0.917248,-0.296736,0.484773,1.903922,0.068347,-0.021780,-1.313080,-0.319395,-0.759531,0.054108,-0.771156,-1.187336,1.194884,0.606647,1.064426,2.214024,-0.339128,0.433639,0.052514,-0.160136,-1.186648,1.352166,-1.151753,-0.609818,0.136408,0.489762,-0.034606,-0.621802,0.338283,-2.085926,1.747095,1.006143,-1.212800,0.972605,0.154178,0.646629,0.260450,1.462575,0.625859,-0.207440,-0.609215,-1.373447,-0.783509,3.014327,-0.055424,-2.375887,-2.033319,0.559523,-1.253653,0.484823,2.217039,0.947496,0.893382,0.176258,1.075231,-0.437817,1.420588,0.541593,0.299269,-0.133498,-2.166051,-1.094762,-0.431305,0.878967,-0.586833,0.940492,0.547173,0.975732,-1.034483,-0.250373,0.976415,-0.988696,-0.335740,0.371434,0.387279,-1.860047,0.009753,O
2,False,13,0,0.0,1.0,,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.137477,0.230787,-0.095987,0.084007,-0.169975,-0.177495,-0.044059,0.088700,-0.078582,0.290504,0.087377,0.085510,-0.065760,-0.303749,-0.170995,0.045570,0.034340,0.158335,-0.108031,0.016771,0.202512,0.115745,-0.102669,-0.058525,-0.016854,0.014082,-0.189936,0.041302,0.243210,-0.069483,0.004008,-0.183340,-0.127313,0.296356,0.082296,0.194734,0.063824,-0.052020,0.109019,-0.064111,0.057769,-0.169914,-0.505544,0.058979,0.206954,0.299975,-0.006420,0.275550,-0.192647,-0.311903,0.050380,0.020638,-0.003417,0.022165,-0.053109,0.203210,-0.219391,0.154743,0.200362,-0.159666,0.156090,0.130938,-0.140867,-0.099557,0.300172,0.224615,-0.221569,0.005049,-0.289773,-0.175280,-0.083681,-0.004815,0.107343,-0.144377,0.209730,-0.003395,0.097399,-0.202108,-0.081539,-0.239281,0.003118,0.273144,-0.419758,-0.219899,0.102212,0.020766,0.022946,0.116971,-0.153152,0.235253,0.163868,-0.116273,-0.039078,0.027755,0.037274,0.053789,-0.029286,-0.053606,-0.268725,-0.252668,O
3,True,4,0,0.0,0.0,1.0,0.0,0.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.090069,-1.812068,-2.197685,2.722306,2.605690,3.491157,-1.292626,-3.119389,2.436323,-1.151510,-2.047331,-0.551283,1.110074,2.124008,-2.718040,-3.708123,2.090313,-1.492092,2.338343,3.175877,0.890655,0.912291,0.484741,-3.374462,-2.962702,-0.842215,2.523262,-1.002738,-1.659017,3.192202,0.818362,-4.548322,1.677391,-2.807899,0.680660,1.825887,-2.039910,0.237390,-0.063092,-1.452062,-1.323773,-0.024037,0.239770,0.442529,-1.508307,1.839036,-1.319872,1.873304,4.535566,-1.236990,1.799664,-0.779580,-1.789186,-0.737814,1.795693,4.513585,-0.229651,0.034174,4.074395,2.764338,1.116387,2.004232,-0.182654,0.085112,4.788013,-3.399225,2.766286,-3.467458,-2.807043,4.709053,3.010117,-6.029093,-0.188674,-1.902200,1.691224,1.066547,-2.882914,0.878694,2.370013,-0.872481,-1.736553,-1.920965,-1.952746,-1.258536,5.044740,0.822054,1.284306,2.400402,-0.932554,-2.358478,-0.161113,1.777335,0.710516,-3.776827,-0.802076,-1.574183,-0.619652,0.691545,-1.525634,-0.489425,O
4,False,7,0,0.0,0.0,0.0,0.0,1.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,-0.098993,-0.351241,0.045595,0.082748,-2.669170,-0.188572,-0.467314,-0.310395,0.100967,-0.787336,-0.191643,1.172913,-1.171194,-0.340980,-0.887675,0.282462,1.445185,1.000591,0.062731,0.095487,1.729253,-0.020369,1.326744,0.774305,0.473092,0.387156,0.235342,-0.274326,0.140168,-0.164282,-0.057838,-0.242130,1.087761,0.300526,-0.418901,0.275942,0.924548,0.349776,0.812754,-0.402203,0.350108,-0.623377,-1.679596,0.309446,0.878259,0.160200,0.733855,0.312354,-0.737085,0.628817,0.400997,-0.165322,1.795239,-0.744014,1.235429,-1.103393,-0.381673,0.296464,0.240507,-0.904262,0.569788,0.059933,0.021111,-0.528754,-0.331516,-0.528799,-0.372057,-1.031721,-1.361546,-0.456266,-0.611946,1.302222,-1.020462,-0.530331,1.759166,0.020704,-0.599289,-0.030225,-0.505129,-1.385470,0.375180,0.221483,-0.521050,-0.290620,-0.327605,-0.518238,1.060176,0.693499,-1.053499,-0.523511,0.342218,-0.505600,0.744427,-0.672348,0.294090,0.736028,-1.151521,0.355139,0.207277,-0.024097,O
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048570,True,4,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.634087,-0.229604,2.184750,-0.959303,1.557830,1.232350,1.822617,0.901927,2.588472,-0.418204,1.341030,-2.690629,1.851479,-2.350780,1.404580,1.468830,1.516479,-0.484351,-0.715594,-1.475930,2.582160,0.739414,0.888071,-0.189270,-1.573725,-0.283301,-2.805363,1.800015,-1.054920,0.069724,-1.682240,1.646677,1.314239,-0.437426,0.517264,1.181697,4.887108,1.472316,1.364720,0.711518,-1.779899,-2.031670,-0.384850,0.219324,0.485560,-0.145322,-0.697402,-1.486423,0.389015,-0.145080,0.263587,1.144372,1.929009,1.925950,-1.426481,-1.361605,-2.264597,-1.807176,0.493250,0.869934,0.784335,1.066044,-2.601629,0.130180,3.666509,1.161478,1.512381,-1.477005,-0.812016,0.107503,-1.190643,-4.215543,1.440976,-1.768518,3.591475,-1.837505,-2.325049,-2.798322,-0.135818,-0.940787,-1.037494,-1.823845,-2.986848,0.971511,4.113118,-0.973515,1.893611,-1.580040,-1.382103,1.860086,4.808342,1.776879,1.503128,1.622366,-0.951380,1.803817,-2.050308,-2.990460,-1.043495,1.026276,O
1048571,False,9,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.767796,0.700173,-0.321800,1.251387,-1.378242,-0.578797,-1.315752,-0.498804,-0.890975,-0.311410,0.019772,-0.078224,-2.193024,0.614221,0.210100,-0.211439,-0.955832,-0.490339,0.127646,0.237027,0.888443,0.304612,0.823322,-0.268531,0.480006,-1.235760,-0.407670,-0.219313,-1.298039,-0.977296,0.118190,-1.008157,-0.085093,0.443779,-0.547795,-0.206034,0.578445,0.534431,0.845237,-0.151058,0.923352,-0.445414,-1.613648,0.545203,-0.021421,1.652500,-0.207008,1.004704,-0.826247,1.018412,0.087931,-0.564344,0.556840,-0.615818,-0.716493,-1.338288,0.938290,-0.592005,-0.352542,-0.540096,0.201188,-0.747040,-0.875564,1.058750,-0.149474,-0.367371,0.285864,-0.511794,-1.876094,0.139965,-1.486247,1.410204,-0.298826,-0.891655,1.153022,-0.364915,0.743151,-0.063990,-0.473219,-1.486051,-0.276771,1.200137,-0.442354,-0.100176,-0.892479,0.113659,-0.835417,-0.238185,-0.566845,-0.025591,0.533255,-1.333234,-1.095484,-0.404175,0.459731,0.382190,-0.094189,-0.464520,0.132546,-0.622001,O
1048572,True,2,0,0.0,0.0,0.0,0.0,0.0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.436359,0.229533,-0.217747,-0.945047,-2.538283,-0.967594,2.646727,1.056987,0.252743,-1.176846,1.988734,1.402299,1.542645,2.374114,-1.293120,1.136028,-0.683156,0.457499,0.864080,-2.535765,0.166190,-0.843871,1.951779,2.279999,-4.716136,-3.295635,0.525813,0.345188,0.239860,0.179604,-1.462707,1.404047,-1.633124,0.470653,2.010504,0.670870,1.295102,0.396091,1.356043,1.122696,-0.630100,0.724816,0.253797,-3.551372,-1.777225,1.491656,-2.049211,-1.714734,1.340884,0.472378,-0.709600,2.346744,-0.631495,0.373077,-1.316770,-2.145293,0.492911,-0.699369,-0.192442,-2.795595,1.431925,1.379387,-2.206303,-0.364165,1.035386,1.434358,1.601001,-1.212048,1.237371,0.283384,-2.846901,-1.509189,1.031615,1.738187,0.133411,0.380173,-0.559824,-2.251546,-0.010583,-1.209521,1.240973,2.103541,-2.304016,-1.797628,-0.222922,-1.610461,-0.445044,-0.650274,2.033443,-0.208048,2.646747,0.030761,3.664226,-0.115696,0.351084,4.237526,-1.887074,-0.391481,0.214970,0.058401,O
1048573,True,3,0,0.0,0.0,0.0,0.0,,,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.140445,-1.616587,-0.072080,0.953251,-0.478461,-0.222708,0.702499,0.414355,0.267786,-0.405711,-0.343530,1.204372,0.076902,1.088544,0.120131,1.047200,-0.615536,-0.837141,-0.604917,-0.021784,1.845573,1.052767,0.465614,0.459787,0.645039,-1.356714,0.321062,-0.733001,1.183017,0.117784,-1.869574,0.648526,1.292015,-0.681153,-1.365167,0.360856,1.125031,0.195133,0.903543,0.711281,-0.662205,0.413907,-0.502954,-1.050177,0.440565,-1.269414,0.406765,0.744066,1.550692,-1.639663,0.586932,-0.886845,0.229665,1.258815,-0.827698,-1.455973,-0.792383,-1.770086,0.165655,-0.166849,0.667936,1.139397,0.536774,-1.417201,-1.100771,0.062753,1.875738,0.116413,0.021875,0.648698,0.301423,0.158437,0.274583,1.630830,0.502091,0.203829,-1.006045,-0.357751,-0.693568,0.132790,-0.068324,0.418513,-1.653194,-0.143282,-1.210671,-0.478533,0.690857,1.357493,-1.131298,-0.791775,1.308736,-1.388928,-1.059147,0.215068,0.888015,0.154003,-0.998676,-0.542450,-1.635553,0.990500,O


In [8]:
clf1 = setup(data = split_df.head(500000),
             target = target,
             fold=5,
             train_size=0.7,
             silent=True)

# best = compare_models(sort = 'f1',
#                       exclude = ['svm','knn','qda','nb','lda','ridge','ada'],
#                       n_select = 8) # return top 6 best models

lgb=create_model('lightgbm')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9207,0.9219,0.3701,0.9214,0.9204,0.7168,0.717
1,0.8703,0.7596,0.2434,0.8508,0.8592,0.4813,0.4861
2,0.9114,0.9149,0.3444,0.9129,0.9118,0.6826,0.6826
3,0.9133,0.9035,0.3425,0.9124,0.9125,0.6879,0.688
4,0.9099,0.8875,0.311,0.908,0.9084,0.6682,0.6687
Mean,0.9051,0.8775,0.3223,0.9011,0.9024,0.6474,0.6485
SD,0.0178,0.0601,0.0437,0.0255,0.022,0.0845,0.0827


In [9]:
cols=['Accuracy','Micro Precision','Micro Recall','Micro F1','Macro Precision','Macro Recall','Macro F1']
evals=[]

# for model in best:
pred_holdout = predict_model(lgb)

# evals=([ sklearn.metrics.accuracy_score(pred_holdout[target],pred_holdout['Label']),
#          sklearn.metrics.precision_score(pred_holdout[target],pred_holdout['Label'],average='micro'),
#          sklearn.metrics.recall_score(pred_holdout[target],pred_holdout['Label'],average='micro'),
#          sklearn.metrics.f1_score(pred_holdout[target],pred_holdout['Label'],average='micro'),

#          sklearn.metrics.precision_score(pred_holdout[target],pred_holdout['Label'],average='macro'),
#          sklearn.metrics.recall_score(pred_holdout[target],pred_holdout['Label'],average='macro'),
#          sklearn.metrics.f1_score(pred_holdout[target],pred_holdout['Label'],average='macro')])
# pd.DataFrame(evals,index=cols).T

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Light Gradient Boosting Machine,0.8812,0.8836,0.2588,0.8923,0.883,0.5926,0.5945
