In [39]:
import gc
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import tensorflow_addons as tfa
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import log_loss
from tqdm.notebook import tqdm

print("Tensorflow version " + tf.__version__)
AUTO = tf.data.experimental.AUTOTUNE

Tensorflow version 2.1.0


In [40]:
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


In [41]:
MIXED_PRECISION = False
XLA_ACCELERATE = True

if MIXED_PRECISION:
    from tensorflow.keras.mixed_precision import experimental as mixed_precision
    if tpu: policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
    else: policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    mixed_precision.set_policy(policy)
    print('Mixed precision enabled')

if XLA_ACCELERATE:
    tf.config.optimizer.set_jit(True)
    print('Accelerated Linear Algebra enabled')

Accelerated Linear Algebra enabled


In [42]:
import sys
sys.path.append('iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [44]:
train_features = pd.read_csv('train_features.csv')
train_targets = pd.read_csv('train_targets_scored.csv')
test_features = pd.read_csv('test_features.csv')

ss = pd.read_csv('sample_submission.csv')

In [45]:
def preprocess(df):
    df = df.copy()
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 1, 'ctl_vehicle': 0})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    
    return df

train_features = preprocess(train_features)
test_features = preprocess(test_features)

train = train_features.copy()
test = test_features.copy()

In [46]:
train_attr = pd.DataFrame()
test_attr = pd.DataFrame()

In [47]:
import tqdm

GENES = [col for col in train.columns if col.startswith('g-')]
CELLS = [col for col in train.columns if col.startswith('c-')]
for stats in tqdm.tqdm(['sum', 'mean', 'std', 'kurt', 'skew']):
    train_attr['g_'+stats] = getattr(train[GENES], stats)(axis=1)
    train_attr['c_'+stats] = getattr(train[CELLS], stats)(axis=1)
    train_attr['gc_'+stats] = getattr(train[GENES+CELLS], stats)(axis=1)

100%|██████████| 5/5 [00:03<00:00,  1.41it/s]


In [48]:
GENES = [col for col in test.columns if col.startswith('g-')]
CELLS = [col for col in test.columns if col.startswith('c-')]
for stats in tqdm.tqdm(['sum', 'mean', 'std', 'kurt', 'skew']):
    test_attr['g_'+stats] = getattr(test[GENES], stats)(axis=1)
    test_attr['c_'+stats] = getattr(test[CELLS], stats)(axis=1)
    test_attr['gc_'+stats] = getattr(test[GENES+CELLS], stats)(axis=1)

100%|██████████| 5/5 [00:00<00:00,  8.57it/s]


In [49]:
train_genes = [s for s in train_features.columns if "g-" in s]
train_cellvia = [s for s in train_features.columns if "c-" in s]

In [50]:
from sklearn.decomposition import PCA

cell_comp = 50
gene_comp = 600

pca = PCA(n_components=gene_comp)

# pca.fit(train[:,2:])
# train_pca = pca.transform(train[:,2:])
# test_pca = pca.transform(test[:,2:])

pca.fit(train.loc[:, train_genes])
train_gene = pca.transform(train.loc[:, train_genes])
test_gene = pca.transform(test.loc[:, train_genes])

train_gene = pd.DataFrame(train_gene, columns=[f'pca_g-{i}' for i in range(gene_comp)])
test_gene = pd.DataFrame(test_gene, columns=[f'pca_g-{i}' for i in range(gene_comp)])

pca = PCA(n_components=cell_comp)
pca.fit(train.loc[:, train_cellvia])
train_cell = pca.transform(train.loc[:, train_cellvia])
test_cell = pca.transform(test.loc[:, train_cellvia])

train_cell = pd.DataFrame(train_cell, columns=[f'pca_c-{i}' for i in range(cell_comp)])
test_cell = pd.DataFrame(test_cell, columns=[f'pca_c-{i}' for i in range(cell_comp)])

train_cat = train.iloc[:,:4]
test_cat = test.iloc[:,:4]

# train = np.concatenate([train_pca, train_cat], axis=1)
# test = np.concatenate([test_pca, test_cat], axis=1)

train_pca = pd.concat([train_gene, train_cell], axis=1)
test_pca = pd.concat([test_gene, test_cell], axis =1)

In [51]:
train = pd.concat([train.iloc[:,4:], train_pca], axis=1)
test = pd.concat([test.iloc[:,4:], test_pca], axis=1)

In [52]:
from sklearn.feature_selection import VarianceThreshold

train = pd.DataFrame(train)
test = pd.DataFrame(test)


var_thresh = VarianceThreshold(0.8)  #<-- Update
data = train.append(test)
data_transformed = var_thresh.fit_transform(data)

train_features_transformed = data_transformed[ : train.shape[0]]
test_features_transformed = data_transformed[-test.shape[0] : ]

In [53]:
train = pd.concat([train_cat, pd.DataFrame(train_features_transformed)], axis=1)
test = pd.concat([test_cat, pd.DataFrame(test_features_transformed)], axis=1)

In [54]:
from sklearn.cluster import KMeans
def fe_cluster(train, test, n_clusters_g = 35, n_clusters_c = 5, SEED = 123):
    
    features_g = list(train.columns[4:776])
    features_c = list(train.columns[776:876])
    
    def create_cluster(train, test, features, kind = 'g', n_clusters = n_clusters_g):
        train_ = train[features].copy()
        test_ = test[features].copy()
        data = pd.concat([train_, test_], axis = 0)
        kmeans = KMeans(n_clusters = n_clusters, random_state = SEED).fit(data)
        train[f'clusters_{kind}'] = kmeans.labels_[:train.shape[0]]
        test[f'clusters_{kind}'] = kmeans.labels_[train.shape[0]:]
        train = pd.get_dummies(train, columns = [f'clusters_{kind}'])
        test = pd.get_dummies(test, columns = [f'clusters_{kind}'])
        return train, test
    
    train, test = create_cluster(train, test, features_g, kind = 'g', n_clusters = n_clusters_g)
    train, test = create_cluster(train, test, features_c, kind = 'c', n_clusters = n_clusters_c)
    return train, test

train , test = fe_cluster(train,test)

In [55]:
train = pd.concat([train, train_attr], axis=1)
test = pd.concat([test, test_attr], axis=1)

In [56]:
train = train.merge(train_targets, on='sig_id')
train = train[train['cp_type']!=0].reset_index(drop=True)

cp_type0 = test[test['cp_type']==0].reset_index(drop=True)
test = test[test['cp_type']!=0].reset_index(drop=True)


train_targets = train[train_targets.columns]

target_cols = train_targets.drop('sig_id', axis=1).columns.values.tolist()

train_cols = [ c for c in train if c not in target_cols]

train = train[train_cols]

In [57]:
del test['sig_id'], train["sig_id"], train_targets['sig_id']

train = train.drop('cp_type', axis=1)
test = test.drop('cp_type', axis=1)

In [58]:
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    return data

train = process_data(train)
test = process_data(test)

In [59]:
top_feats = [   0,    1,    2,    3,    4,    6,    7,    8,    9,   10,   11,
         12,   14,   15,   16,   17,   18,   19,   20,   21,   22,   25,
         26,   27,   28,   29,   30,   31,   32,   33,   34,   35,   36,
         37,   39,   40,   42,   43,   44,   45,   46,   47,   48,   49,
         50,   51,   52,   53,   54,   55,   56,   57,   58,   59,   60,
         61,   62,   64,   65,   68,   69,   71,   72,   73,   75,   76,
         77,   78,   79,   81,   82,   83,   84,   85,   86,   87,   88,
         89,   90,   91,   92,   94,   95,   96,   97,   98,   99,  101,
        102,  103,  104,  105,  106,  107,  108,  109,  110,  111,  114,
        115,  116,  117,  119,  120,  121,  122,  123,  124,  125,  126,
        127,  128,  129,  130,  131,  132,  133,  134,  135,  136,  137,
        138,  139,  140,  141,  142,  143,  144,  145,  146,  147,  148,
        149,  150,  151,  152,  153,  154,  156,  157,  158,  159,  160,
        162,  163,  164,  165,  166,  167,  168,  169,  170,  171,  172,
        173,  175,  176,  177,  178,  179,  181,  183,  184,  185,  186,
        187,  189,  190,  191,  192,  193,  195,  196,  197,  198,  199,
        200,  201,  202,  203,  205,  206,  207,  209,  210,  211,  214,
        216,  218,  220,  221,  223,  225,  227,  228,  229,  230,  231,
        232,  233,  234,  235,  236,  237,  238,  239,  240,  241,  242,
        244,  245,  246,  247,  248,  249,  250,  253,  254,  255,  256,
        257,  258,  259,  260,  262,  263,  265,  266,  267,  268,  269,
        270,  272,  273,  275,  276,  277,  278,  279,  282,  283,  284,
        285,  286,  287,  288,  289,  290,  291,  292,  293,  294,  296,
        297,  298,  299,  300,  301,  302,  303,  304,  305,  306,  307,
        308,  309,  311,  312,  313,  314,  315,  316,  317,  318,  319,
        320,  321,  323,  325,  326,  327,  328,  329,  330,  332,  333,
        336,  337,  338,  339,  340,  341,  342,  343,  344,  345,  346,
        348,  349,  350,  351,  352,  353,  354,  356,  357,  358,  359,
        360,  361,  362,  363,  365,  368,  370,  371,  372,  373,  375,
        376,  377,  378,  379,  380,  381,  382,  383,  384,  385,  387,
        388,  390,  392,  393,  394,  397,  398,  400,  401,  402,  403,
        404,  405,  406,  407,  408,  410,  411,  413,  414,  415,  416,
        417,  418,  419,  420,  421,  422,  423,  424,  425,  426,  427,
        428,  429,  432,  433,  435,  436,  437,  438,  439,  440,  441,
        442,  443,  444,  447,  448,  449,  451,  452,  453,  454,  455,
        456,  458,  460,  461,  462,  463,  464,  465,  468,  469,  471,
        472,  473,  474,  475,  476,  478,  479,  480,  481,  483,  485,
        486,  487,  488,  489,  490,  491,  494,  495,  496,  497,  498,
        502,  503,  504,  505,  506,  507,  508,  509,  510,  511,  512,
        513,  514,  515,  518,  519,  520,  521,  522,  524,  525,  528,
        529,  530,  531,  532,  538,  539,  540,  541,  542,  544,  545,
        546,  548,  549,  551,  552,  553,  554,  555,  558,  559,  560,
        561,  563,  565,  566,  567,  568,  569,  570,  571,  572,  573,
        574,  575,  576,  577,  578,  579,  580,  581,  584,  586,  588,
        590,  591,  592,  595,  596,  597,  598,  599,  600,  601,  602,
        603,  604,  605,  606,  607,  608,  610,  611,  612,  613,  615,
        616,  617,  619,  620,  621,  622,  623,  625,  626,  627,  628,
        629,  632,  633,  634,  636,  637,  638,  639,  640,  643,  645,
        646,  647,  649,  650,  651,  652,  656,  657,  659,  660,  661,
        662,  663,  664,  665,  666,  667,  668,  669,  670,  671,  673,
        674,  675,  677,  679,  680,  681,  683,  684,  685,  686,  688,
        689,  690,  691,  692,  693,  694,  695,  696,  697,  698,  700,
        701,  702,  703,  704,  705,  706,  707,  708,  709,  710,  711,
        712,  713,  714,  715,  716,  717,  718,  719,  720,  721,  722,
        723,  724,  725,  726,  727,  728,  729,  730,  731,  732,  733,
        734,  735,  736,  737,  738,  739,  740,  741,  742,  744,  745,
        746,  747,  750,  751,  752,  753,  754,  755,  756,  757,  758,
        759,  760,  761,  762,  763,  764,  765,  767,  768,  769,  770,
        771,  772,  773,  774,  775,  776,  777,  778,  779,  780,  781,
        782,  783,  784,  785,  786,  787,  788,  789,  790,  791,  792,
        793,  794,  795,  796,  797,  798,  799,  800,  801,  802,  803,
        804,  805,  806,  807,  808,  809,  810,  811,  812,  813,  814,
        815,  816,  817,  818,  819,  820,  821,  822,  823,  824,  825,
        826,  827,  828,  829,  830,  831,  832,  833,  834,  835,  836,
        837,  839,  840,  841,  842,  843,  844,  845,  846,  847,  848,
        849,  850,  851,  852,  853,  854,  855,  856,  857,  858,  859,
        860,  861,  862,  863,  864,  865,  866,  867,  868,  870,  872,
        873,  874,  875,  876,  878,  879,  880,  881,  882,  883,  885,
        887,  888,  889,  890,  892,  893,  894,  895,  896,  898,  899,
        900,  901,  902,  903,  905,  906,  907,  908,  910,  913,  914,
        917,  918,  920,  921,  922,  923,  924,  926,  928,  929,  931,
        932,  933,  934,  935,  936,  937,  938,  939,  940,  942,  943,
        944,  945,  946,  948,  949,  950,  951,  952,  953,  954,  956,
        957,  959,  960,  961,  962,  963,  964,  965,  966,  967,  969,
        971,  972,  973,  976,  978,  980,  982,  983,  985,  986,  987,
        988,  989,  990,  991,  992,  993,  994,  995,  996,  997,  998,
        999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1008, 1009, 1010,
       1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021,
       1022, 1026, 1027, 1029, 1030, 1031, 1034, 1037, 1038, 1040, 1045,
       1047, 1048, 1051, 1052, 1053, 1055, 1056, 1057, 1062, 1063, 1064,
       1066, 1068, 1069, 1070, 1071, 1072, 1073, 1075, 1077, 1079, 1080,
       1081]

In [60]:
train = train.values[:, top_feats]
test = test.values[:, top_feats]

In [61]:
train = pd.DataFrame(train)
test = pd.DataFrame(test)

In [62]:
col_num_feat = train.shape[1]
col_num_tar = len(train_targets.columns)

print(col_num_feat)
print(col_num_tar)

892
206


In [63]:
train_targets

Unnamed: 0,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21943,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21944,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21945,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21946,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [64]:
from sklearn.model_selection import train_test_split,cross_validate
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import one_hot,Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, AlphaDropout, Dense , Flatten ,Embedding, Input, LSTM, Bidirectional, BatchNormalization, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import text_to_word_sequence
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.losses import BinaryCrossentropy

import tensorflow_addons as tfa

from sklearn.preprocessing import MultiLabelBinarizer

In [66]:
def create_model(num_columns, hidden_units, dropout_rate):
    
    inp = tf.keras.layers.Input(shape = (num_columns, ))
#     x = tf.keras.layers.Dropout(dropout_rate)(inp)
    x = tf.keras.layers.BatchNormalization()(inp)
#     x = tfa.layers.WeightNormalization(Dense(2048, activation = 'elu'))(x)
    x = tf.keras.layers.Dropout(dropout_rate)(x)
    
    for units in hidden_units:
        
        x = tfa.layers.WeightNormalization(Dense(units))(x)
        x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(dropout_rate)(x)
#         x = tf.keras.layers.BatchNormalization()(x)
       
#     x = tf.keras.layers.BatchNormalization()(x)
    
    out = tfa.layers.WeightNormalization(Dense(206, activation = 'sigmoid'))(x)
    
    model = tf.keras.models.Model(inputs = inp, outputs = out)
    
    
    model.compile(optimizer=tfa.optimizers.AdamW(lr = 5e-4, weight_decay = 1e-5, clipvalue = 900), 
                  loss=BinaryCrossentropy(label_smoothing=5e-4))
    
    return model

In [67]:
ss = ss.set_index('sig_id')
ids = list(cp_type0['sig_id'])
cp_type0 = ss[ss.index.isin(ids)].reset_index()
ss = ss[~ss.index.isin(ids)].reset_index()

In [68]:
N_STARTS = 1
HIDDEN_UNITS = [4096, 512, 4096]
# HIDDEN_UNITS = 400
DROPOUT_RATE = 0.43912157
EPOCHS = 100
BATCH_SIZE = 128
N_SPLITS = 10

res = train_targets.copy()
ss.loc[:, train_targets.columns] = 0
res.loc[:, train_targets.columns] = 0

for seed in range(N_STARTS):
    
    for n, (tr, te) in enumerate(MultilabelStratifiedKFold(n_splits = N_SPLITS, 
                                                 random_state = seed, 
                                                 shuffle = True).split(train_targets, 
                                                                       train_targets)):
        print(f'Seed {seed}, Fold {n}:')
        start_time = time.time()
        
        with strategy.scope():
        
            model = create_model(col_num_feat, HIDDEN_UNITS, DROPOUT_RATE)
            
        rlr = ReduceLROnPlateau(monitor='val_loss', factor = 0.1, patience = 3, 
                                verbose = 0, epsilon = 1e-4, mode = 'min')
        
        ckp = ModelCheckpoint(f'model_{seed}_{n}.hdf5', monitor = 'val_loss', verbose = 1, 
                              save_best_only = True, save_weights_only = True, mode = 'min')
        
        erl = EarlyStopping(monitor = 'val_loss', 
                                min_delta = 1e-4, 
                                patience = 10, mode = 'min', 
                                baseline = None, 
                                restore_best_weights = True, 
                                verbose = 1)
        
        x_tr, x_val = train.values[tr], train.values[te]
        y_tr, y_val = train_targets.astype(float).values[tr], train_targets.astype(float).values[te]
        
        history = model.fit(x_tr, y_tr, validation_data = (x_val, y_val), epochs = EPOCHS, 
                            batch_size = BATCH_SIZE, callbacks = [rlr, ckp, erl], verbose = 2)
        
        hist = pd.DataFrame(history.history)
        
        model.load_weights(f'model_{seed}_{n}.hdf5')

        ss.loc[:, train_targets.columns] += model.predict(test)
        res.loc[te, train_targets.columns] += model.predict(train.values[te])
        print('Best Validation Loss:\t', hist['val_loss'].min())
        print('Cost Time:\t', time.time() - start_time)
        print('-' * 50)
        
        K.clear_session()
        del model, history, hist
        gc.collect()
    
ss.loc[:, train_targets.columns] /= ((n + 1) * N_STARTS)
res.loc[:, train_targets.columns] /= N_STARTS



Seed 0, Fold 0:
Train on 19753 samples, validate on 2195 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.09893, saving model to model_0_0.hdf5
19753/19753 - 4s - loss: 0.4692 - val_loss: 0.0989
Epoch 2/100

Epoch 00002: val_loss improved from 0.09893 to 0.03083, saving model to model_0_0.hdf5
19753/19753 - 2s - loss: 0.0543 - val_loss: 0.0308
Epoch 3/100

Epoch 00003: val_loss improved from 0.03083 to 0.02370, saving model to model_0_0.hdf5
19753/19753 - 2s - loss: 0.0278 - val_loss: 0.0237
Epoch 4/100

Epoch 00004: val_loss improved from 0.02370 to 0.02127, saving model to model_0_0.hdf5
19753/19753 - 2s - loss: 0.0235 - val_loss: 0.0213
Epoch 5/100

Epoch 00005: val_loss improved from 0.02127 to 0.02045, saving model to model_0_0.hdf5
19753/19753 - 2s - loss: 0.0218 - val_loss: 0.0205
Epoch 6/100

Epoch 00006: val_loss improved from 0.02045 to 0.01998, saving model to model_0_0.hdf5
19753/19753 - 2s - loss: 0.0211 - val_loss: 0.0200
Epoch 7/100

Epoch 00007: val_los

Epoch 29/100

Epoch 00029: val_loss improved from 0.01765 to 0.01764, saving model to model_0_1.hdf5
19753/19753 - 2s - loss: 0.0159 - val_loss: 0.0176
Epoch 30/100

Epoch 00030: val_loss improved from 0.01764 to 0.01762, saving model to model_0_1.hdf5
19753/19753 - 2s - loss: 0.0158 - val_loss: 0.0176
Epoch 31/100

Epoch 00031: val_loss did not improve from 0.01762
19753/19753 - 2s - loss: 0.0158 - val_loss: 0.0176
Epoch 32/100

Epoch 00032: val_loss did not improve from 0.01762
19753/19753 - 2s - loss: 0.0157 - val_loss: 0.0176
Epoch 33/100

Epoch 00033: val_loss did not improve from 0.01762
19753/19753 - 2s - loss: 0.0158 - val_loss: 0.0176
Epoch 34/100

Epoch 00034: val_loss did not improve from 0.01762
19753/19753 - 2s - loss: 0.0157 - val_loss: 0.0176
Epoch 35/100

Epoch 00035: val_loss did not improve from 0.01762
19753/19753 - 2s - loss: 0.0158 - val_loss: 0.0176
Epoch 36/100

Epoch 00036: val_loss did not improve from 0.01762
19753/19753 - 2s - loss: 0.0157 - val_loss: 0.0176


Epoch 17/100

Epoch 00017: val_loss improved from 0.01831 to 0.01810, saving model to model_0_3.hdf5
19753/19753 - 2s - loss: 0.0181 - val_loss: 0.0181
Epoch 18/100

Epoch 00018: val_loss did not improve from 0.01810
19753/19753 - 2s - loss: 0.0180 - val_loss: 0.0182
Epoch 19/100

Epoch 00019: val_loss improved from 0.01810 to 0.01800, saving model to model_0_3.hdf5
19753/19753 - 2s - loss: 0.0177 - val_loss: 0.0180
Epoch 20/100

Epoch 00020: val_loss did not improve from 0.01800
19753/19753 - 2s - loss: 0.0175 - val_loss: 0.0180
Epoch 21/100

Epoch 00021: val_loss did not improve from 0.01800
19753/19753 - 2s - loss: 0.0174 - val_loss: 0.0180
Epoch 22/100

Epoch 00022: val_loss improved from 0.01800 to 0.01788, saving model to model_0_3.hdf5
19753/19753 - 2s - loss: 0.0174 - val_loss: 0.0179
Epoch 23/100

Epoch 00023: val_loss did not improve from 0.01788
19753/19753 - 2s - loss: 0.0171 - val_loss: 0.0179
Epoch 24/100

Epoch 00024: val_loss did not improve from 0.01788
19753/19753 - 2

KeyboardInterrupt: 

In [None]:
metrics = []
for _target in train_targets.columns:
    metrics.append(log_loss(train_targets.loc[:, _target], res.loc[:, _target]))
    
print(f'OOF Metric: {np.mean(metrics)}')

In [37]:
res_ = res.copy()

In [38]:
metrics = []


f = 0.01
factor = np.arange(0, 0.1, 0.005)

for f in factor:
    res_ = res.copy()
    for col in res.columns:
        res_[col] *= (1 - f)
#         res_[col] += (f / len(res_[col]))


#     res_.loc[train_features['cp_type'] == 0, train_targets.columns] = 0

    for _target in train_targets.columns:
        metrics.append(log_loss(train_targets.loc[:, _target], res_.loc[:, _target]))

    print(f'OOF Metric with postprocessing {f}: {np.mean(metrics)}')

OOF Metric with postprocessing 0.0: 0.015759747867676487
OOF Metric with postprocessing 0.005: 0.01575452165456358
OOF Metric with postprocessing 0.01: 0.01575104883280062
OOF Metric with postprocessing 0.015: 0.015748368467450594
OOF Metric with postprocessing 0.02: 0.01574617859511822
OOF Metric with postprocessing 0.025: 0.015744339118709753
OOF Metric with postprocessing 0.03: 0.01574277134476866
OOF Metric with postprocessing 0.035: 0.015741425697267483
OOF Metric with postprocessing 0.04: 0.015740268463382848
OOF Metric with postprocessing 0.045: 0.015739275435586328
OOF Metric with postprocessing 0.05: 0.015738428511502027
OOF Metric with postprocessing 0.055: 0.015737713723114766
OOF Metric with postprocessing 0.06: 0.01573712002129769
OOF Metric with postprocessing 0.065: 0.01573663848825139
OOF Metric with postprocessing 0.07: 0.015736261806283187
OOF Metric with postprocessing 0.075: 0.0157359838873396
OOF Metric with postprocessing 0.08: 0.015735799607277563
OOF Metric with

In [None]:
metrics = []

# for col in res.columns:
#     res[col] *= 0.986
#     res.loc[res[col] >= 0.030184] = 0.03
#     res.loc[res[col] <= 0.000411] = 0.0008

res.loc[train_features['cp_type'] == 0, train_targets.columns] = 0

for _target in train_targets.columns:
    metrics.append(log_loss(train_targets.loc[:, _target], res.loc[:, _target]))
    
print(f'OOF Metric with postprocessing: {np.mean(metrics)}')

In [None]:
for col in res.columns:
        ss[col] *= (1 - f)
        ss[col] += (f / len(res[col]))
        
ss.loc[test_features['cp_type'] == 0, train_targets.columns] = 0

In [None]:
ss.to_csv('submission', index = False)