In [None]:
import cupy, cudf
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, gc, os
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
modelpath = "model/"

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim=95, feat_dim=188, num_heads=4, ff_dim=254, rate=0.35):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="gelu"), layers.Dense(feat_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [None]:
num_blocks = 2

def build_model():
    
    inp = layers.Input(shape=(13,188))
    
#     embeddings = []
#     for k in range(11):
#         emb = layers.Embedding(10,4)
#         embeddings.append( emb(inp[:,:,k]) )
#     x = layers.Concatenate()([inp[:,:,11:]]+embeddings)
#     x = layers.Dense(feat_dim)(x)
    
    x = inp
    for k in range(num_blocks):
        # x_old = x
        transformer_block = TransformerBlock(embed_dim, feat_dim, num_heads, ff_dim, dropout_rate)
        x = transformer_block(x)
        # x = 0.9*x + 0.1*x_old
    
    x = layers.Dense(84, activation="relu")(x[:,-1,:])
    x = layers.Dense(42, activation="relu")(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)
    
    model = keras.Model(inputs=inp, outputs=outputs)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy()
    model.compile(loss=loss, optimizer = opt)
        
    return model

In [None]:
def lrd(epoch):
    if epoch == 1:
        return 1e-3
    elif epoch == 2:
        return 1e-3
    elif epoch == 3:
        return 1e-4
    elif epoch == 4:
        return 1e-4
    elif epoch == 5:
        return 1e-5:
    elif epoch == 6:
        return 1e-6

LR = tf.keras.callbacks.LearningRateScheduler(lrd, verbose = True)

In [None]:
def amex_metric_mod(y_true, y_pred):

    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)

In [None]:
true = np.array([])
oof = np.array([])

for fold in range(5):

    valid_idx = [2*fold+1, 2*fold+2]
    train_idx = [x for x in [1,2,3,4,5,6] if x not in valid_idx]

    print(f'### Fold {fold+1} with valid files', valid_idx)

    X_train = []; y_train = []
    for k in train_idx:
        X_train.append( np.load(f'data_{k}.npy'))
        y_train.append( pd.read_parquet(f'targets_{k}.pqt') )
    X_train = np.concatenate(X_train,axis=0)
    y_train = pd.concat(y_train).target.values
    print('### Training data shapes', X_train.shape, y_train.shape)
    
    X_valid = []; y_valid = []
    for k in valid_idx:
        X_valid.append( np.load(f'data_{k}.npy'))
        y_valid.append( pd.read_parquet(f'targets_{k}.pqt') )
    X_valid = np.concatenate(X_valid,axis=0)
    y_valid = pd.concat(y_valid).target.values
    
    print('### Validation data shapes', X_valid.shape, y_valid.shape)
    print('#'*25)

    K.clear_session()
    model = build_model()
    h = model.fit(X_train,y_train, 
                  validation_data = (X_valid,y_valid),
                  batch_size=512, epochs=EPOCHS, verbose=VERBOSE,
                  callbacks = [LR])
    if not os.path.exists(modelpath): os.makedirs(modelpath)
    model.save_weights(f'{modelpath}transformer_fold_{fold+1}.h5')

    print('Inferring validation data...')
    p = model.predict(X_valid, batch_size=512, verbose=VERBOSE).flatten()

    print()
    print(f'Fold {fold+1} CV=', amex_metric_mod(y_valid, p) )
    print()
    true = np.concatenate([true, y_valid])
    oof = np.concatenate([oof, p])

print(f'Overall CV =', amex_metric_mod(true, oof) )

In [None]:
K.clear_session()
model = build_model()

start = 0; end = 0
sub = cudf.read_csv('data/sample_submission.csv')

sub['hash'] = sub['customer_ID'].str[-16:].str.hex_to_int().astype('int64')
test_hash_index = cupy.load(f'data/TestData/test_hashes_data.npy')
sub = sub.set_index('hash').loc[test_hash_index].reset_index(drop=True)

for k in range(20):
    print(f'Inferring Test_File_{k+1}')
    X_test = np.load(f'data/TestData/test_data_{k+1}.npy')

    end = start + X_test.shape[0]

    model.load_weights(f'{modelpath}transformer_fold_1.h5')
    p = model.predict(X_test, batch_size=512, verbose=0).flatten() 
    for j in range(1,5):
        model.load_weights(f'{modelpath}transformer_fold_{j+1}.h5')
        p += model.predict(X_test, batch_size=512, verbose=0).flatten()
    p /= 5.0

    sub.loc[start:end-1,'prediction'] = p
    start = end

In [None]:
sub.to_csv(f'submission_8.csv',index=False)