In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#경로 설정
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/운동동작분류AI경진대회')

In [3]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
tf.random.set_seed(42)
import tensorflow.keras.backend as K
import tensorflow.keras.layers as layers
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

import os, gc, random, datetime
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from joblib import dump, load
from time import time

print("Tensorflow version " + tf.__version__)
AUTO = tf.data.experimental.AUTOTUNE

Tensorflow version 2.4.1


# Preprocessing

In [4]:
# 데이터 불러오기

path = './data/'
train = pd.read_csv(path + 'train_features.csv')
train_label = pd.read_csv(path + 'train_labels.csv')
test = pd.read_csv(path + 'test_features.csv')
submission = pd.read_csv(path + 'sample_submission.csv')

In [5]:
# Pre-Processing Effect on the Accuracy of Event-Based Activity Segmentation and Classification through Inertial Sensors 
# https://www.researchgate.net/publication/281836367_Pre-Processing_Effect_on_the_Accuracy_of_Event-Based_Activity_Segmentation_and_Classification_through_Inertial_Sensors

train['acc_t']  = train.apply(lambda x : (x['acc_x']**2 + x['acc_y'] **2 +  x['acc_z'] ** 2 )**(1/3), axis=1)
test['acc_t']  = test.apply(lambda x : (x['acc_x']**2 + x['acc_y'] **2 +  x['acc_z'] ** 2 )**(1/3), axis=1)

In [6]:
x = np.array(train.iloc[:,2:]).reshape(-1, 600, 7)
y = np.array(train_label['label'])
test = np.array(test.iloc[:,2:]).reshape(-1, 600, 7)

In [7]:
# 26번을 제외한 id 리스트
feature = list(train_label[train_label['label'] != 26]['id'])

In [8]:
# train 데이터에서 26번을 삭제시킨다.
temp = []
for n in tqdm(range(train.shape[0])):
    if train['id'][n] in feature:
        temp.append(train.iloc[n])

HBox(children=(FloatProgress(value=0.0, max=1875000.0), HTML(value='')))




In [9]:
# 26번을 삭제시킨 데이터프레임
without = pd.DataFrame(data=np.array(temp), columns=train.columns)
without = without.astype({'id':int, 'time':int})
without = np.array(without.iloc[:,2:]).reshape(-1, 600, 7)
without_label = train_label[train_label['label'] != 26]['label']
without.shape, without_label.shape

((1607, 600, 7), (1607,))

In [19]:
# 데이터 증강
def aug(data, shift):
    shift_data = np.roll(data, shift, axis=2)
    return shift_data

shift_data = []
shift_label = []
for n in tqdm(range(20)):
    shifted = aug(without, n*30)
    shift_data.append(shifted)
    shift_label.append(without_label)

shift_data = np.array(shift_data).reshape(-1,600,7)
shift_label = np.array(shift_label).reshape(1,-1)
shift_label = [element for array in shift_label for element in array]

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [20]:
# 원본 데이터와 증강 데이터 합치기
concat_train = np.concatenate((x, shift_data), axis=0)
concat_label = np.concatenate((y, shift_label), axis=0)
print(concat_train.shape)
print(concat_label.shape)

(35265, 600, 7)
(35265,)


# Training

Base Transformer structure from https://www.tensorflow.org/tutorials/text/transformer, modified with Swish activation function.

In [21]:
def scaled_dot_product_attention(q, k, v, mask):
    """Calculate the attention weights.
    q, k, v must have matching leading dimensions.
    k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
    The mask has different shapes depending on its type(padding or look ahead) 
    but it must be broadcastable for addition.

    Args:
    q: query shape == (..., seq_len_q, depth)
    k: key shape == (..., seq_len_k, depth)
    v: value shape == (..., seq_len_v, depth_v)
    mask: Float tensor with shape broadcastable 
          to (..., seq_len_q, seq_len_k). Defaults to None.

    Returns:
    output, attention_weights
    """

    matmul_qk = tf.matmul(q, k, transpose_b = True)  # (..., seq_len_q, seq_len_k)

    # scale matmul_qk
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    # add the mask to the scaled tensor.
    if mask is not None:
        
        scaled_attention_logits += (mask * -1e9)  

    # softmax is normalized on the last axis (seq_len_k) so that the scores
    # add up to 1.
    attention_weights = tf.nn.softmax(scaled_attention_logits, axis = -1)  # (..., seq_len_q, seq_len_k)

    output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)

    return output, attention_weights

class MultiHeadAttention(tf.keras.layers.Layer):
    
    def __init__(self, d_model, num_heads):
        
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)

        self.dense = tf.keras.layers.Dense(d_model)
        
    def split_heads(self, x, batch_size):
        """Split the last dimension into (num_heads, depth).
        Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)
        """
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm = [0, 2, 1, 3])
    
    def call(self, v, k, q, mask):
        
        batch_size = tf.shape(q)[0]

        q = self.wq(q)  # (batch_size, seq_len, d_model)
        k = self.wk(k)  # (batch_size, seq_len, d_model)
        v = self.wv(v)  # (batch_size, seq_len, d_model)

        q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
        k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
        v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

        # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
        # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
        scaled_attention, attention_weights = scaled_dot_product_attention(
            q, k, v, mask)

        scaled_attention = tf.transpose(scaled_attention, perm = [0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

        concat_attention = tf.reshape(scaled_attention, 
                                      (batch_size, -1, self.d_model))  # (batch_size, seq_len_q, d_model)

        output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)
        
        return output, attention_weights

def point_wise_feed_forward_network(d_model, dff):
    
    return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation = 'relu'),  # (batch_size, seq_len, dff)
      tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
    ])

class EncoderLayer(tf.keras.layers.Layer):
    
    def __init__(self, d_model, num_heads, dff, rate = 0.1):
        
        super(EncoderLayer, self).__init__()

        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon = 1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon = 1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask):

        attn_output, _ = self.mha(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
        attn_output = self.dropout1(attn_output, training = training)
        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)

        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.dropout2(ffn_output, training = training)
        out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)

        return out2

class TransformerEncoder(tf.keras.layers.Layer):
    
    def __init__(self, num_layers, d_model, num_heads, dff, 
                 maximum_position_encoding, rate = 0.1):
        
        super(TransformerEncoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers
        self.num_heads = num_heads
        self.dff = dff
        self.maximum_position_encoding = maximum_position_encoding
        self.rate = rate

#         self.pos_encoding = positional_encoding(self.maximum_position_encoding, 
#                                                 self.d_model)
#         self.embedding = tf.keras.layers.Dense(self.d_model)
        self.pos_emb = tf.keras.layers.Embedding(input_dim = self.maximum_position_encoding, 
                                                 output_dim = self.d_model)

        self.enc_layers = [EncoderLayer(self.d_model, self.num_heads, self.dff, self.rate) 
                           for _ in range(self.num_layers)]

        self.dropout = tf.keras.layers.Dropout(self.rate)
        
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'num_layers': self.num_layers,
            'd_model': self.d_model,
            'num_heads': self.num_heads,
            'dff': self.dff,
            'maximum_position_encoding': self.maximum_position_encoding,
            'dropout': self.dropout,
        })
        return config

    def call(self, x, training, mask = None):

        seq_len = tf.shape(x)[1]

        # adding embedding and position encoding.
#         x += self.pos_encoding[:, :seq_len, :]
#         x = self.embedding(x)
        positions = tf.range(start = 0, limit = seq_len, delta = 1)
        x += self.pos_emb(positions)

        x = self.dropout(x, training = training)

        for i in range(self.num_layers):

            x = self.enc_layers[i](x, training, mask)

        return x  # (batch_size, input_seq_len, d_model)

In [22]:
def create_transformer_model(num_columns, num_labels, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate):
    
    inp = tf.keras.layers.Input(shape = (window_size, num_columns))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dense(d_model)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.SpatialDropout1D(dropout_rate)(x)
    x = TransformerEncoder(num_layers, d_model, num_heads, dff, window_size, dropout_rate)(x)
    out = tf.keras.layers.Dense(num_labels, activation = 'softmax')(x[:, -1, :])
    
    model = tf.keras.models.Model(inputs = inp, outputs = out)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['AUC'])
    
    return model

In [23]:
batch_size = 64
num_layers = 1
d_model = 256
num_heads = 1
dff = 2048
window_size = 600
dropout_rate = 0.15
weight_decay = 0
label_smoothing = 1e-2
learning_rate = 1e-3
verbose = 1

# Train-Test-Split Training

Split the train set into three folds, i.e., training-1, training-2 and validation sets. First, train the more on training-1 set and validate it on the validation set. Then use the training-2 set to find the best number of finetuning epochs. Finally, finetune on both training-2 and validation sets and submit.

In [24]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(concat_train, concat_label, test_size=0.2, random_state=42)
y_train = tf.keras.utils.to_categorical(y_train)
y_val = tf.keras.utils.to_categorical(y_val)

In [25]:
start_time_fold = time()

ckp_path = 'JSTransformer.hdf5'
model = create_transformer_model(x.shape[2], 61, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate)
model.summary()

rlr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, patience = 3, verbose = verbose, min_delta = 1e-4, mode = 'min')
ckp = ModelCheckpoint(ckp_path, monitor = 'val_loss', verbose = 0, save_best_only = True, save_weights_only = True, mode = 'min')
es = EarlyStopping(monitor = 'val_loss', min_delta = 1e-4, patience = 5, mode = 'min', baseline = None, restore_best_weights = True, verbose = 0)

history = model.fit(X_train, y_train,
                    validation_data = (X_val, y_val),
                    batch_size = batch_size,
                    epochs = 1000,
                    callbacks = [rlr, ckp, es],
                    verbose = verbose)

hist = pd.DataFrame(history.history)
print(f'[{str(datetime.timedelta(seconds = time() - start_time_fold))[0:7]}] ROC loss:\t', hist['val_loss'].min())

del model
rubbish = gc.collect()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 600, 7)]          0         
_________________________________________________________________
batch_normalization (BatchNo (None, 600, 7)            28        
_________________________________________________________________
dense (Dense)                (None, 600, 256)          2048      
_________________________________________________________________
batch_normalization_1 (Batch (None, 600, 256)          1024      
_________________________________________________________________
activation (Activation)      (None, 600, 256)          0         
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 600, 256)          0         
_________________________________________________________________
transformer_encoder (Transfo (None, 600, 256)          146867

# Load Model

In [26]:
model = create_transformer_model(x.shape[2], 61, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate)
model.load_weights(ckp_path)

# Submitting

In [27]:
test_pred = model.predict(test)

In [28]:
sample_submssion = pd.read_csv(path + 'sample_submission.csv')
sample_submssion.iloc[:,1:] = test_pred
sample_submssion.to_csv("transformer.csv", index = False)
sample_submssion

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60
0,3125,2.126891e-04,1.065281e-06,1.000387e-05,2.395879e-09,6.525254e-06,1.018646e-09,7.055928e-02,1.363940e-04,3.264240e-08,1.843692e-07,8.089150e-06,4.310654e-05,8.022334e-06,7.745991e-04,8.911251e-01,1.159552e-06,2.438852e-07,8.252284e-09,9.231488e-11,3.380202e-08,6.840420e-07,1.133943e-07,2.105044e-09,4.710884e-07,1.641274e-02,2.477653e-07,0.010784,9.029970e-06,6.166005e-07,2.936537e-07,6.259574e-06,2.328473e-08,2.851499e-10,1.991623e-11,3.856387e-09,2.351703e-06,1.543913e-08,8.042113e-06,1.321023e-06,2.678637e-07,1.905799e-08,1.574370e-07,1.413292e-05,2.150609e-08,2.838985e-04,3.034849e-06,5.654870e-08,1.426984e-09,8.285498e-06,5.321518e-08,8.868165e-03,5.980089e-04,3.581215e-05,8.205200e-09,5.585874e-07,1.167901e-07,5.905737e-05,3.947421e-11,8.094799e-06,1.944105e-06,5.213298e-06
1,3126,7.885522e-10,1.055551e-06,8.721771e-06,2.028633e-07,4.641043e-08,3.149164e-02,2.521717e-07,9.036010e-01,8.885154e-06,2.172957e-05,1.391279e-08,2.447960e-07,8.706434e-09,4.744099e-04,1.658126e-08,5.000627e-06,2.783955e-08,2.782661e-07,3.833583e-08,1.575984e-08,8.344176e-05,1.230902e-08,1.879319e-04,1.152085e-04,1.517773e-06,2.719485e-07,0.059510,5.134079e-04,1.068652e-09,3.832934e-11,1.575524e-07,1.463507e-07,8.900003e-10,3.692954e-07,3.407800e-06,1.810020e-09,7.665891e-06,1.601894e-07,5.802873e-07,8.461827e-08,2.720129e-04,5.039849e-07,1.471541e-07,8.273526e-07,3.596776e-03,5.990785e-08,1.998441e-07,2.489497e-09,1.374358e-06,3.841076e-09,9.763748e-06,2.602903e-08,8.476406e-10,3.646425e-06,5.696763e-10,1.653541e-05,1.710061e-06,1.951390e-05,1.145579e-09,3.831389e-05,1.567380e-07
2,3127,3.082519e-05,6.902708e-05,1.479054e-07,4.582374e-07,2.840427e-07,5.588576e-07,1.546478e-04,3.937004e-01,2.397854e-04,6.683425e-06,3.487549e-07,2.012567e-02,3.326028e-05,2.444995e-07,9.304542e-04,6.264757e-06,3.767976e-08,6.417699e-07,2.762596e-07,3.221906e-08,2.263820e-06,8.483208e-05,2.549096e-09,1.031511e-03,5.168565e-08,1.383908e-07,0.451802,8.009289e-07,1.108412e-04,1.124595e-06,8.920994e-04,1.460240e-05,9.930548e-05,2.391724e-09,6.792374e-05,2.131650e-05,3.076509e-06,2.755387e-02,4.037925e-03,1.403809e-05,5.882907e-11,2.527814e-08,7.101571e-07,2.941807e-06,7.885009e-06,4.370196e-03,1.627367e-08,4.349165e-12,5.932013e-05,1.208317e-07,8.479905e-02,1.058335e-04,1.162084e-04,2.516901e-05,3.392106e-11,5.982601e-08,4.948650e-03,5.000131e-09,2.969836e-08,1.409325e-06,4.524831e-03
3,3128,7.997050e-04,1.133802e-07,1.253678e-09,5.898038e-09,2.131822e-08,2.575670e-05,1.678392e-04,7.351980e-06,7.777659e-06,5.361857e-07,1.259219e-05,1.061837e-07,1.155872e-10,3.860556e-08,9.083570e-07,1.905802e-09,1.583825e-08,2.422280e-10,1.735683e-07,2.309622e-08,1.301249e-12,1.152238e-08,2.486306e-06,3.734811e-07,6.113001e-03,5.055030e-09,0.032107,1.455435e-05,5.509997e-08,5.737109e-09,2.860268e-07,1.330627e-07,7.110240e-09,8.084776e-05,4.595432e-08,6.047186e-08,8.307565e-08,1.164432e-08,8.836111e-11,1.295581e-08,2.116248e-06,1.024002e-07,6.529297e-11,4.179801e-08,9.586948e-01,5.458782e-08,1.814689e-10,2.350910e-06,8.845674e-04,8.989075e-04,3.580881e-05,6.892421e-07,1.058100e-07,7.515327e-09,2.228728e-06,2.152861e-09,9.324058e-07,1.977398e-08,3.737633e-05,9.742452e-05,3.219506e-08
4,3129,1.415563e-03,3.333942e-09,4.709638e-08,6.530272e-06,3.057835e-08,3.686935e-07,4.515118e-07,1.025697e-11,8.947000e-10,5.657124e-10,8.588579e-08,2.661241e-09,2.605229e-11,1.153348e-07,7.783218e-09,5.124185e-11,7.425396e-09,4.167602e-07,9.821284e-09,1.363485e-12,3.994247e-10,1.442488e-09,5.476716e-07,3.087005e-07,7.251893e-07,9.286513e-10,0.998116,3.433246e-08,1.493086e-08,1.951436e-10,6.517365e-07,2.481416e-05,1.189355e-07,3.000812e-09,3.892325e-10,2.011029e-06,1.101837e-04,1.632064e-05,6.483557e-08,1.041651e-11,2.040545e-09,1.388297e-09,2.442349e-12,4.090329e-08,2.971467e-10,1.486130e-10,2.860934e-08,8.129356e-10,3.129102e-08,2.956286e-04,3.018969e-06,6.372161e-09,1.334381e-09,1.975161e-11,1.881568e-08,2.791591e-07,2.316927e-07,3.766061e-08,3.949417e-09,4.705754e-06,2.390782e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,3902,2.645198e-06,1.491880e-02,3.951535e-03,1.144515e-08,3.290720e-02,7.957361e-10,1.181593e-06,5.971185e-06,1.615224e-05,1.125257e-04,6.890712e-05,6.266847e-03,8.234084e-06,2.145155e-05,1.141516e-06,4.154022e-06,1.204708e-08,2.749767e-05,8.591838e-05,3.955629e-07,9.744840e-10,2.042198e-07,2.279693e-03,5.228193e-01,4.868780e-03,1.304350e-05,0.281311,9.311557e-08,2.140865e-05,2.633618e-03,2.112642e-06,8.130202e-08,1.393083e-09,9.658860e-08,9.678048e-08,4.708879e-08,3.840161e-08,2.316173e-02,7.232383e-05,9.995877e-06,6.853754e-07,2.301059e-04,9.616008e-05,7.289503e-07,1.205236e-04,2.158560e-05,3.278505e-03,6.725710e-07,4.883858e-06,2.147391e-08,1.303154e-05,6.256751e-06,2.971182e-04,2.168681e-06,5.241154e-08,1.670553e-05,1.695011e-07,3.329912e-02,3.157727e-02,2.651319e-02,8.926505e-03
778,3903,3.211467e-04,1.631296e-01,2.790250e-07,2.169322e-07,1.033806e-04,2.174156e-07,8.423044e-09,9.776104e-07,3.108332e-05,9.453482e-09,6.256610e-09,4.924730e-05,7.343302e-10,3.309971e-08,2.149082e-09,1.927298e-07,4.099048e-05,5.048131e-07,9.287733e-05,7.581265e-09,4.076622e-10,5.721755e-07,6.365858e-10,4.936562e-06,5.826053e-04,1.297076e-07,0.626907,2.491700e-07,4.629826e-06,3.001813e-08,1.075062e-04,4.386474e-07,1.042043e-08,9.154542e-07,3.558041e-09,1.978439e-08,4.055906e-06,1.418075e-04,9.786910e-08,1.009546e-08,4.782006e-10,1.714200e-11,6.831797e-10,2.573849e-07,6.775953e-07,1.028707e-09,6.877654e-07,5.078310e-07,4.647050e-07,3.333837e-09,5.041821e-06,1.238298e-06,1.052725e-07,1.585246e-07,6.641799e-12,8.608171e-08,4.691814e-06,2.347545e-05,9.609820e-07,2.084343e-01,1.363378e-06
779,3904,4.779145e-07,1.249035e-08,2.293448e-09,2.388420e-08,3.772523e-10,6.402012e-09,6.295384e-08,4.299762e-06,5.643910e-08,3.135410e-10,1.921254e-05,1.477010e-08,1.213234e-09,1.734427e-09,1.024439e-08,2.455216e-10,4.383014e-09,9.717986e-07,4.937782e-11,4.411008e-09,7.168077e-10,3.312780e-08,1.765967e-08,6.676192e-08,1.433280e-09,2.108323e-08,0.998875,2.163433e-05,5.207016e-08,3.093100e-10,3.977840e-08,1.617360e-10,7.979766e-09,4.043355e-09,1.245414e-09,6.020039e-07,7.036133e-05,1.227192e-06,4.174959e-04,7.325600e-11,1.119124e-08,5.801034e-10,5.424137e-11,1.452691e-07,2.339909e-09,4.007452e-10,6.646391e-10,4.314648e-10,6.927020e-09,2.971474e-05,2.905728e-07,5.212570e-08,1.439179e-08,8.066630e-12,1.948675e-05,1.106705e-08,1.323282e-08,5.159681e-04,4.786531e-10,1.367019e-05,8.699005e-06
780,3905,7.280117e-05,5.802110e-06,7.991491e-05,1.132583e-06,6.107889e-05,1.032036e-06,6.044957e-07,1.532528e-07,8.008286e-08,7.247918e-07,6.334944e-04,2.729880e-05,2.209158e-07,2.702101e-07,3.703205e-07,4.180835e-07,4.063803e-04,2.860059e-06,2.037325e-07,1.378971e-10,8.366890e-09,2.202371e-06,2.535994e-08,9.839263e-08,1.501304e-02,1.465687e-05,0.644974,1.256819e-08,2.230774e-07,1.079156e-05,9.325752e-02,3.783287e-02,4.961118e-08,1.783219e-01,2.496603e-05,1.905306e-06,9.020455e-07,8.226671e-05,1.267798e-08,6.242498e-05,5.095342e-07,4.388401e-06,1.812730e-03,2.138370e-05,2.472414e-06,5.096350e-06,2.451563e-02,2.713196e-03,5.806927e-09,2.537102e-08,2.891091e-08,9.262920e-07,1.699291e-07,2.885146e-09,9.737131e-06,7.492901e-08,4.285176e-07,6.211857e-08,1.783122e-05,3.449905e-07,4.465059e-08


In [29]:
# https://www.kaggle.com/gogo827jz/jane-street-ffill-transformer-baseline
# https://wikidocs.net/31379
# https://www.tensorflow.org/tutorials/text/transformer