In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss

import gc
import time

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers

from tensorflow.keras.models import Model

In [122]:
data_train = pd.read_csv('train.csv');
data_train.set_index(['id'],inplace=True);
data_train['target'] = data_train['target'].apply(lambda s:int(s[-1])-1)

data_test = pd.read_csv('test.csv');
data_test.set_index(['id'],inplace=True);

X_train = data_train.copy().drop('target',axis=1);
y_train = data_train['target'];

X_test = data_test.copy();

In [123]:
Y_train = pd.get_dummies(data_train['target'])

In [4]:
import tensorflow.keras.backend as K

cce = tf.keras.losses.CategoricalCrossentropy()
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15) # restrict values between 1e-15 and 1-1e-15
    loss = K.mean(cce(y_true, y_pred))
    return loss


es = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric', min_delta=1e-05, patience=5, verbose=0,
    mode='min', baseline=None, restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric', factor=0.7, patience=2, verbose=0,
    mode='min')

In [5]:
def conv_model():

    conv_inputs = layers.Input(shape = (75))
    #----------- Embedding layers ----------------------
    embed = layers.Embedding (input_dim = 354, 
                              output_dim = 7,
                              embeddings_regularizer='l2')(conv_inputs)
    #----------- Convolution layers ----------------------
    embed = layers.Conv1D(12,1,activation = 'relu')(embed)        
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(0.3)(embed)
    
    #----------- Residual blocks layers ----------------------
    hidden = tfa.layers.WeightNormalization(
        layers.Dense(
                units=32,
                activation ='selu',
                kernel_initializer = "lecun_normal"))(hidden)
    
    output = layers.Dropout(0.3)(layers.Concatenate()([embed, hidden]))
    output = tfa.layers.WeightNormalization(
        layers.Dense(
                units = 32,
                activation='relu',
                kernel_initializer = "lecun_normal"))(output) 
    output = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden, output]))
    output = tfa.layers.WeightNormalization(
        layers.Dense(
                units = 32, 
                activation = 'elu',
                kernel_initializer = "lecun_normal"))(output)
    
    #----------- Final layer -----------------------
    conv_outputs = layers.Dense(
                units = 9, 
                activation ='softmax',
                kernel_initializer ="lecun_normal")(output)
    
    #----------- Model instantiation  ---------------
    model = Model(conv_inputs,conv_outputs)
    
    return model

In [21]:
t = tf.constant([ [[1, 1, 1],
                   [2, 2, 2]],
                 
                  [[3, 3, 3],
                   [4, 4, 4]],
                 
                  [[5, 5, 5],
                   [6, 6, 6]] ])
tf.slice(t, [1, 0, 0], [1, 1, 3])  # [[[3, 3, 3]]]
tf.slice(t, [1, 0, 0], [1, 2, 3])  # [[[3, 3, 3],
                                   #   [4, 4, 4]]]
tf.slice(t, [1, 0, 0], [2, 1, 3])  # [[[3, 3, 3]],
                                   #  [[5, 5, 5]]]
    
t    

<tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=
array([[[1, 1, 1],
        [2, 2, 2]],

       [[3, 3, 3],
        [4, 4, 4]],

       [[5, 5, 5],
        [6, 6, 6]]])>

In [183]:
def conv_model_extend():

    conv_inputs = layers.Input(shape = (75+9))
    #conv_inputs = tf.slice(conv_inputs,[0,0],[-1, 75]);
        
    feature_inputs = layers.Lambda(lambda x: x[:,0:75])(conv_inputs);
    lgbm_predictions = layers.Lambda(lambda x: x[:,75:])(conv_inputs);
    
    #conv_inputs = layers.Cropping1D(cropping=(0,0))(conv_inputs);
    #conv_inputs = layers.Cropping2D(cropping=( (0,0), (0,0)) )(conv_inputs);
    
    #----------- Embedding layers ----------------------
    embed = layers.Embedding (input_dim = 354, 
                              output_dim = 7,
                              embeddings_regularizer='l2')(feature_inputs)
    #----------- Convolution layers ----------------------
    embed = layers.Conv1D(12,1,activation = 'relu')(embed)        
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(0.3)(embed)
    
    #----------- Residual blocks layers ----------------------
    hidden = tfa.layers.WeightNormalization(
        layers.Dense(
                units=32,
                activation ='selu',
                kernel_initializer = "lecun_normal"))(hidden)
    
    output = layers.Dropout(0.3)(layers.Concatenate()([embed, hidden]))
    output = tfa.layers.WeightNormalization(
        layers.Dense(
                units = 32,
                activation='relu',
                kernel_initializer = "lecun_normal"))(output) 
    output = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden, output]))
    output = tfa.layers.WeightNormalization(
        layers.Dense(
                units = 32, 
                activation = 'elu',
                kernel_initializer = "lecun_normal"))(output)
    
    #----------- Final layer -----------------------
    
    glue = layers.Concatenate()([output,lgbm_predictions]);
    
    conv_outputs = layers.Dense(
                units = 9, 
                activation ='softmax',
                kernel_initializer ="lecun_normal")(glue)
    
    #----------- Model instantiation  ---------------
    model = Model(conv_inputs,conv_outputs)
    
    return model

In [178]:
from lightgbm import LGBMClassifier


from sklearn.model_selection import train_test_split


X,X_valid, Y,Y_valid, y,y_valid = train_test_split(X_train, Y_train, y_train, test_size=0.5, random_state=1, stratify=y_train)

#X_appended = X;
#for jj in range(9):
    #X_appended['col'+str(jj)]=0;

myLGBM = LGBMClassifier(reg_alpha=60.0,n_estimators=200)
myLGBM.fit(X,y);

In [179]:
Y_pred_lgbm = myLGBM.predict_proba(X);
Y_valid_pred_lgbm = myLGBM.predict_proba(X_valid);

print(log_loss(Y_valid,Y_valid_pred_lgbm));

1.7487281091111961


In [180]:
X_appended = X.join(pd.DataFrame(Y_pred_lgbm,index=X.index),on=X.index);
X_valid_appended = X_valid.join(pd.DataFrame(Y_valid_pred_lgbm,index=X_valid.index),on=X_valid.index);

In [181]:
SEED = 2021
EPOCH = 30

K.clear_session()

#================= NN CONV MODEL training =========

print("\n-----Convolution model Training----\n")

model_conv = conv_model_extend()

model_conv.compile(loss='categorical_crossentropy', 
                        optimizer = keras.optimizers.Adam(learning_rate=2e-4), 
                        metrics=custom_metric)
start = time.time();
model_conv.fit(X_appended, Y,
          batch_size = 256, epochs = EPOCH,
          validation_data=(X_valid_appended, Y_valid),
          callbacks=[es, plateau],
          verbose = 1)
end = time.time();
print(end-start)


-----Convolution model Training----

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
44.004634141922


In [182]:
#============== Convolution Model prediction ==========
Y_valid_pred = model_conv.predict(X_valid_appended)
print(log_loss(Y_valid,Y_valid_pred));

1.7467987277264894


In [164]:
#============== Convolution Model prediction ==========
Y_valid_pred = model_conv.predict(X_valid_appended)
print(log_loss(Y_valid,Y_valid_pred));

1.742095083164703


In [158]:
X,X_valid, Y,Y_valid, y,y_valid = train_test_split(X_train, Y_train, y_train, test_size=0.3, random_state=1, stratify=y_train)

Y_pred_oof = np.zeros( (Y.shape[0],9) );
Y_valid_pred = np.zeros( (Y_valid.shape[0],9) );

from sklearn.model_selection import StratifiedKFold

N_FOLDS = 3;
skf = StratifiedKFold(n_splits=N_FOLDS)

EPOCH = 30
for fold_idx, oof_idx in skf.split(X, y):
    
    X_fold = X.iloc[fold_idx];
    X_oof  = X.iloc[oof_idx];
    
    Y_fold = Y.iloc[fold_idx];
    Y_oof  = Y.iloc[oof_idx];
    
    y_fold = y.iloc[fold_idx];
    y_oof  = y.iloc[oof_idx];
    
    myLGBM = LGBMClassifier(reg_alpha=60.0,n_estimators=200)
    myLGBM.fit(X_fold,y_fold);
    
    Y_fold_pred_lgbm = myLGBM.predict_proba(X_fold);
    Y_oof_pred_lgbm = myLGBM.predict_proba(X_oof);
    Y_valid_pred_lgbm = myLGBM.predict_proba(X_valid);
    
    X_fold_appended = X_fold.join(pd.DataFrame(Y_fold_pred_lgbm,index=X_fold.index),on=X_fold.index);
    X_oof_appended = X_oof.join(pd.DataFrame(Y_oof_pred_lgbm,index=X_oof.index),on=X_oof.index);
    X_valid_appended = X_valid.join(pd.DataFrame(Y_valid_pred_lgbm,index=X_valid.index),on=X_valid.index);
    
    #================= NN CONV MODEL training =========
    print("\n-----Convolution model Training----\n")

    K.clear_session()
    model_conv = conv_model_extend()
    model_conv.compile(loss='categorical_crossentropy', 
                            optimizer = keras.optimizers.Adam(learning_rate=2e-4), 
                            metrics=custom_metric)
    model_conv.fit(X_fold_appended, Y_fold,
              batch_size = 256, epochs = EPOCH,
              validation_data=(X_oof_appended, Y_oof),
              callbacks=[es, plateau],
              verbose = 0)
    Y_fold_pred = model_conv.predict(X_fold_appended);
    Y_oof_pred = model_conv.predict(X_oof_appended);
    Y_valid_pred_fold = model_conv.predict(X_valid_appended);
    
    fold_loss = log_loss(Y_fold,Y_fold_pred);
    oof_loss = log_loss(Y_oof,Y_oof_pred);
    
    # note the distinction.
    # (Y_pred)_oof is the out-of-fold prediction on Y_pred, or the entire Y dataset.
    # (Y_oof)_pred is the prediction on the Y_oof, or out-of-fold subset of Y.
    Y_pred_oof[oof_idx] = Y_oof_pred;
    Y_valid_pred += Y_valid_pred_fold / N_FOLDS;
    
    print('fold loss=%0.6f, oof_loss=%0.6f'%(fold_loss,oof_loss));


-----Convolution model Training----

fold loss=1.724925, oof_loss=1.743792

-----Convolution model Training----

fold loss=1.723106, oof_loss=1.747159

-----Convolution model Training----

fold loss=1.726711, oof_loss=1.743512


In [159]:
print( log_loss(Y_valid,Y_valid_pred) )

1.7400409719754455


In [157]:
print( log_loss(Y_valid,Y_valid_pred) )

1.7425347444839003


In [193]:
#X,X_valid, Y,Y_valid, y,y_valid = train_test_split(X_train, Y_train, y_train, test_size=0.3, random_state=1, stratify=y_train)

X = X_train;
Y = Y_train;
y = y_train;

Y_pred_oof = np.zeros( (X.shape[0],9) );
Y_test_pred = np.zeros( (X_test.shape[0],9) );

from sklearn.model_selection import StratifiedKFold

N_FOLDS = 18;
skf = StratifiedKFold(n_splits=N_FOLDS)

EPOCH = 100
for fold_idx, oof_idx in skf.split(X, y):
    
    X_fold = X.iloc[fold_idx];
    X_oof  = X.iloc[oof_idx];
    
    Y_fold = Y.iloc[fold_idx];
    Y_oof  = Y.iloc[oof_idx];
    
    y_fold = y.iloc[fold_idx];
    y_oof  = y.iloc[oof_idx];
    
    myLGBM = LGBMClassifier(reg_alpha=60.0,n_estimators=200)
    myLGBM.fit(X_fold,y_fold);
    
    Y_fold_pred_lgbm = myLGBM.predict_proba(X_fold);
    Y_oof_pred_lgbm = myLGBM.predict_proba(X_oof);
    Y_test_pred_lgbm = myLGBM.predict_proba(X_test);
    
    X_fold_appended = X_fold.join(pd.DataFrame(Y_fold_pred_lgbm,index=X_fold.index),on=X_fold.index);
    X_oof_appended = X_oof.join(pd.DataFrame(Y_oof_pred_lgbm,index=X_oof.index),on=X_oof.index);
    X_test_appended = X_test.join(pd.DataFrame(Y_test_pred_lgbm,index=X_test.index),on=X_test.index);
    
    #================= NN CONV MODEL training =========
    print("\n-----Convolution model Training----\n")

    K.clear_session()
    model_conv = conv_model_extend()
    model_conv.compile(loss='categorical_crossentropy', 
                            optimizer = keras.optimizers.Adam(learning_rate=2e-4), 
                            metrics=custom_metric)
    model_conv.fit(X_fold_appended, Y_fold,
              batch_size = 256, epochs = EPOCH,
              validation_data=(X_oof_appended, Y_oof),
              callbacks=[es, plateau],
              verbose = 0)
    Y_fold_pred = model_conv.predict(X_fold_appended);
    Y_oof_pred = model_conv.predict(X_oof_appended);
    Y_test_pred_fold = model_conv.predict(X_test_appended);
    
    fold_loss = log_loss(Y_fold,Y_fold_pred);
    oof_loss = log_loss(Y_oof,Y_oof_pred);
    
    # note the distinction.
    # (Y_pred)_oof is the out-of-fold prediction on Y_pred, or the entire Y dataset.
    # (Y_oof)_pred is the prediction on the Y_oof, or out-of-fold subset of Y.
    Y_pred_oof[oof_idx] = Y_oof_pred;
    Y_test_pred += Y_test_pred_fold / N_FOLDS;
    
    print('fold loss=%0.6f, oof_loss=%0.6f'%(fold_loss,oof_loss));


-----Convolution model Training----

fold loss=1.727654, oof_loss=1.743492

-----Convolution model Training----

fold loss=1.727700, oof_loss=1.736980

-----Convolution model Training----

fold loss=1.728198, oof_loss=1.742514

-----Convolution model Training----

fold loss=1.728713, oof_loss=1.745147

-----Convolution model Training----

fold loss=1.723519, oof_loss=1.732308

-----Convolution model Training----

fold loss=1.729258, oof_loss=1.733326

-----Convolution model Training----

fold loss=1.724473, oof_loss=1.746538

-----Convolution model Training----

fold loss=1.729436, oof_loss=1.744463

-----Convolution model Training----

fold loss=1.725399, oof_loss=1.745519

-----Convolution model Training----

fold loss=1.726905, oof_loss=1.741560

-----Convolution model Training----

fold loss=1.722599, oof_loss=1.732601

-----Convolution model Training----

fold loss=1.729700, oof_loss=1.744953

-----Convolution model Training----

fold loss=1.727233, oof_loss=1.747715

-----Convol

In [194]:
mysubmission = Y_test_pred;

mysubmission = pd.DataFrame(mysubmission);
mysubmission.set_index(X_test.index,inplace=True);
mysubmission.columns = ['Class_'+str(jj) for jj in range(1,10)]
mysubmission.to_csv('submission20.csv',index=True);

In [187]:
Y_test_pred.shape

(100000, 9)

In [189]:
X_test.shape

(100000, 75)