In [1]:
#############################################################
# 1. Libraries

import pandas as pd
import numpy as np 
import os
import glob
from tqdm import tqdm
from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold
import tensorflow as tf
tf.keras.backend.clear_session()

physical_devices = tf.config.list_physical_devices('GPU')

try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
    print('Invalid device or cannot modify virtual devices once initialized.')

from tensorflow.keras import models, layers, regularizers, metrics, losses, optimizers
from tensorflow.keras.utils import Sequence

import matplotlib.pyplot as plt
import gc

from sklearn.linear_model import LinearRegression

#############################################################

In [2]:
#############################################################
# 2. Paths & Global Variables

## 2.1 Paths

path = '../../01_Data/'
path_sequences = path + '01_GeneratedSequences/'


df_train = pd.read_csv(path + 'train.csv')
df_sample_submission = pd.read_csv(path + 'sample_submission.csv') 

train_paths = glob.glob(path + 'train/*')
test_paths = glob.glob(path + 'test/*')

unique_segments_id_train = set(df_train['segment_id'])
unique_segments_id_test = set(df_sample_submission['segment_id'])

dict_unique_segments_id = { v : k for k, v in enumerate(unique_segments_id_train)}
dict_unique_segments_id_inv = { k : v for k, v in enumerate(unique_segments_id_train)}

## 2.2 Global Variables

SEQ_LENGTH = 60_001

#############################################################

In [3]:
#############################################################
# 3. Global Functions

def buildSequences(df, dict_segment_paths, training=True, mask_value=-1.0):
    x = np.zeros((len(dict_segment_paths), SEQ_LENGTH, 10))
    if training:
        y = np.zeros(len(dict_segment_paths))
    for i, segment in enumerate(tqdm(dict_segment_paths, total=len(dict_segment_paths), position=0)):
        segment_path = dict_segment_paths[segment]
        df_tmp = pd.read_csv(segment_path)
        df_tmp = df_tmp.fillna(mask_value)
        x[i] = df_tmp.values[-SEQ_LENGTH:]
        if training:
            y[i] = df['time_to_eruption'][df['segment_id']==segment].values[0]
    if training:
        return x, y
    else:
        return x


def scale(x, mean_, std_):
    return (x - mean_) / std_


def unscale(x, mean_, std_):
    return (x * std_) + mean_

#############################################################

In [4]:
#############################################################
# 4. Preprocess

dict_segments_sequences_paths_train = {
    segment : path_sequences + 'train/' + str(segment) + '.npy' for segment in unique_segments_id_train
}

dict_segments_sequences_paths_test = {
    segment : path_sequences + 'test/' + str(segment) + '.npy' for segment in unique_segments_id_test
}

dict_positions_segments = {k : i for i, k in enumerate(dict_segments_sequences_paths_train.keys())}

df_train['time_to_eruption'] = df_train['time_to_eruption']/(10**6)

dict_labels = {
    segment : df_train['time_to_eruption'][df_train['segment_id']==segment].values.flatten()
    for segment in unique_segments_id_train
}


#############################################################

In [5]:
#############################################################
# 5. Build Sequences

class VolcanoSequencesGenerator(Sequence):
    
    def __init__(self, segments, path_sequences, batch_size, dict_labels, augmentations, training=True):
        super(VolcanoSequencesGenerator, self).__init__()
        
        self.dict_means = {0: 0.09421943291597953, 1: 0.9208114415834104, 2: -0.026617075839858038, 
                           3: 0.09724443370400684, 4: 1.704695380910225, 5: -0.1180321202370159, 6: 0.7667902421713446, 
                           7: 0.7804286101804458, 8: -0.2075797991904395, 9: 0.014516944212624944} 
        
        self.dict_stds =  {0: 1820.6211174856987, 1: 1931.0901612736805, 2: 1738.1671740163413, 
                           3: 1669.8837574619292, 4: 568.5221048211192, 5: 1848.4917466767877, 6: 1623.353060255481, 
                           7: 1618.2714709240895, 8: 1590.9403316558762, 9: 1906.41447528788}
        
        self.segments = segments
        self.path_sequences = path_sequences
        self.batch_size = batch_size
        self.dict_labels = dict_labels
        self.augmentations = augmentations
        self.training = training
        self.on_epoch_end()
        
    def __len__(self):
        self.num_steps = int(np.ceil(len(self.segments) / self.batch_size))
        return self.num_steps
        
    def __getitem__(self, idx):
        indexes = self.indexes[idx*self.batch_size:(idx+1)*self.batch_size]
        list_batch_segments = [self.segments[k] for k in indexes]
        
        
        array_sequences = np.asarray([np.load(self.path_sequences[segment], allow_pickle=True)[-SEQ_LENGTH:, :]
                                     for segment in list_batch_segments])
        
        if self.augmentations:
            array_sequences = self.augmentBatch(array_sequences)
        
        array_sequences[:, :, 0] = scale(array_sequences[:, :, 0], self.dict_means[0], self.dict_stds[0])
        array_sequences[:, :, 1] = scale(array_sequences[:, :, 1], self.dict_means[1], self.dict_stds[1])
        array_sequences[:, :, 2] = scale(array_sequences[:, :, 2], self.dict_means[2], self.dict_stds[2])
        array_sequences[:, :, 3] = scale(array_sequences[:, :, 3], self.dict_means[3], self.dict_stds[3])
        array_sequences[:, :, 4] = scale(array_sequences[:, :, 4], self.dict_means[4], self.dict_stds[4])
        array_sequences[:, :, 5] = scale(array_sequences[:, :, 5], self.dict_means[5], self.dict_stds[5])
        array_sequences[:, :, 6] = scale(array_sequences[:, :, 6], self.dict_means[6], self.dict_stds[6])
        array_sequences[:, :, 7] = scale(array_sequences[:, :, 7], self.dict_means[7], self.dict_stds[7])
        array_sequences[:, :, 8] = scale(array_sequences[:, :, 8], self.dict_means[8], self.dict_stds[8])
        array_sequences[:, :, 9] = scale(array_sequences[:, :, 9], self.dict_means[9], self.dict_stds[9])
        
        if self.training:
            array_labels = np.asarray([self.dict_labels[segment] for segment in list_batch_segments])

        if self.training:
            return array_sequences, array_labels
        else:
            return array_sequences
        
        
    def noiseInjection(self, batch_sequences, noise_factor=0.075):
        noise = np.random.randn(batch_sequences.shape[0], batch_sequences.shape[1], batch_sequences.shape[2])
        augmented_data = batch_sequences + noise_factor * noise
        return augmented_data
    
    
    def timeShifting(self, batch_sequences, shift_max):
        shift = np.random.randint(shift_max)
        for sensor in range(10):
            batch_sequences[:, :, sensor] = np.roll(batch_sequences[:, :, sensor], shift)
        return batch_sequences
       
    
    def augmentBatch(self, batch_sequences):
        
        # Add random noise
        if np.random.random() > 0.5:
            batch_sequences = self.noiseInjection(batch_sequences, noise_factor=0.005)   
            
        # Time shifting
        if np.random.random() > 0.5:
            batch_sequences = self.timeShifting(batch_sequences, shift_max=600) 
        
#         # Random batch sequence sensors slices to null
#         if np.random.random() > 0.5:
#             num_random_sensors = np.random.choice([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
#             random_sensors = list(set(np.random.randint(0, 9, size=num_random_sensors)))
#             random_ini_position = np.random.randint(0, 60_001, size=num_random_sensors)
#             random_length = np.random.randint(random_ini_position, random_ini_position+6_000, size=num_random_sensors)
#             random_length -= random_ini_position

#             if num_random_sensors!=0:
#                 for i, sensor in enumerate(random_sensors):
#                     batch_sequences[:, random_ini_position[i]:random_ini_position[i]+random_length[i], sensor] = 0.0
                    
        # Shut-down sensor
        if np.random.random() > 0.5:
            sensor = np.random.randint(0, 9)
            batch_sequences[:, :, sensor] = 0.0
                
        return batch_sequences
    
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.segments))
        np.random.shuffle(self.indexes)
        
        
    def generateOrderedSequences(self, list_segments):
        X = np.empty((len(list_segments), SEQ_LENGTH, 10))
        
        if self.training:
            y = np.empty(len(list_segments))
            
        for i, segment in enumerate(list_segments):
            X[i] = np.load(self.path_sequences[segment], allow_pickle=True)[-SEQ_LENGTH:, :]
            if self.training:
                y[i] = self.dict_labels[segment]
                
        if self.augmentations:
            X = self.augmentBatch(X)
            
        X[:, :, 0] = scale(X[:, :, 0], self.dict_means[0], self.dict_stds[0])
        X[:, :, 1] = scale(X[:, :, 1], self.dict_means[1], self.dict_stds[1])
        X[:, :, 2] = scale(X[:, :, 2], self.dict_means[2], self.dict_stds[2])
        X[:, :, 3] = scale(X[:, :, 3], self.dict_means[3], self.dict_stds[3])
        X[:, :, 4] = scale(X[:, :, 4], self.dict_means[4], self.dict_stds[4])
        X[:, :, 5] = scale(X[:, :, 5], self.dict_means[5], self.dict_stds[5])
        X[:, :, 6] = scale(X[:, :, 6], self.dict_means[6], self.dict_stds[6])
        X[:, :, 7] = scale(X[:, :, 7], self.dict_means[7], self.dict_stds[7])
        X[:, :, 8] = scale(X[:, :, 8], self.dict_means[8], self.dict_stds[8])
        X[:, :, 9] = scale(X[:, :, 9], self.dict_means[9], self.dict_stds[9])
        
        
        
        if self.training:
            return X, y
        else:
            return X
    
#############################################################

In [6]:
#############################################################
# 6. Models

class ReturnBestEarlyStopping(tf.keras.callbacks.EarlyStopping):
    def __init__(self, **kwargs):
        super(ReturnBestEarlyStopping, self).__init__(**kwargs)

    def on_train_end(self, logs=None):
        if self.stopped_epoch > 0:
            if self.verbose > 0:
                print(f'\nEpoch {self.stopped_epoch + 1}: early stopping')
        elif self.restore_best_weights:
            if self.verbose > 0:
                print('Restoring model weights from the end of the best epoch.')
            self.model.set_weights(self.best_weights)


def scaled_dot_product_attention(q, k, v, mask):
    """Calculate the attention weights.
    q, k, v must have matching leading dimensions.
    k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
    The mask has different shapes depending on its type(padding or look ahead) 
    but it must be broadcastable for addition.

    Args:
    q: query shape == (..., seq_len_q, depth)
    k: key shape == (..., seq_len_k, depth)
    v: value shape == (..., seq_len_v, depth_v)
    mask: Float tensor with shape broadcastable 
          to (..., seq_len_q, seq_len_k). Defaults to None.

    Returns:
    output, attention_weights
    """

    matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)

    # scale matmul_qk
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    # add the mask to the scaled tensor.
    if mask is not None:
        scaled_attention_logits += (mask * -1e9)  

    # softmax is normalized on the last axis (seq_len_k) so that the scores
    # add up to 1.
    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)  # (..., seq_len_q, seq_len_k)

    output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)

    return output, attention_weights
    

class MultiHeadAttention(layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__(name='MultiHeadAttention')
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.wq = layers.Dense(d_model, kernel_regularizer=regularizers.l2(1e-5))
        self.wk = layers.Dense(d_model, kernel_regularizer=regularizers.l2(1e-5))
        self.wv = layers.Dense(d_model, kernel_regularizer=regularizers.l2(1e-5))

        self.dense = layers.Dense(d_model, kernel_regularizer=regularizers.l2(1e-5))


    def split_heads(self, x, batch_size):

        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])
    

    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]

        q = self.wq(q)  # (batch_size, seq_len, d_model)
        k = self.wk(k)  # (batch_size, seq_len, d_model)
        v = self.wv(v)  # (batch_size, seq_len, d_model)

        q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
        k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
        v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

        # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
        # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
        scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)

        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

        concat_attention = tf.reshape(scaled_attention, 
                                      (batch_size, -1, self.d_model))  # (batch_size, seq_len_q, d_model)

        output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)

        return output, attention_weights

    
class WaveNet(layers.Layer):
    def __init__(self, num_filters, kernel_size, deep_dilations, **kwargs): 
        super().__init__(**kwargs, name='WaveNet')
        
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.deep_dilations = deep_dilations
        
        self.list_dilation_rates = [2**i for i in range(self.deep_dilations)]
        
        self.conv_casual = layers.Conv1D(filters=self.num_filters, kernel_size=1, padding='causal',
                                          kernel_regularizer=regularizers.l2(1e-5))
        
        self.list_conv_tanh = [layers.Conv1D(filters=self.num_filters, kernel_size=self.kernel_size,
                                             kernel_regularizer=regularizers.l2(1e-5),
                                             padding='same', activation='tanh', dilation_rate=rate) #tanh
                               for rate in self.list_dilation_rates]
        
        self.list_conv_sigm = [layers.Conv1D(filters=self.num_filters, kernel_size=self.kernel_size,
                                             kernel_regularizer=regularizers.l2(1e-5),
                                             padding='same', activation='sigmoid', dilation_rate=rate) #sigmoid
                               for rate in self.list_dilation_rates]
        
        self.list_conv_bottleneck = [layers.Conv1D(filters=self.num_filters, kernel_size=1, padding='same',
                                                  kernel_regularizer=regularizers.l2(1e-5))
                                     for rate in self.list_dilation_rates]

    def call(self, inputs):
        
        x = self.conv_casual(inputs)
        x_residual = x
        
        for i in range(len(self.list_dilation_rates)):
            z1 = self.list_conv_tanh[i](x)
            z2 = self.list_conv_sigm[i](x)
            x = tf.multiply(z1, z2)
            x = self.list_conv_bottleneck[i](x)
            x_residual = tf.add(x_residual, x)  

        return x_residual
    
    
class ConvWaveNetHead(models.Model):
    def __init__(self):
        super(ConvWaveNetHead, self).__init__()
        self.wave_1 = WaveNet(num_filters=16, kernel_size=3, deep_dilations=9)
        self.wave_2 = WaveNet(num_filters=32, kernel_size=3, deep_dilations=7)
        self.wave_3 = WaveNet(num_filters=64, kernel_size=3, deep_dilations=6)
        
        self.avg_pool1 = layers.MaxPooling1D(10)
        self.avg_pool2 = layers.MaxPooling1D(10)
        self.avg_pool3 = layers.MaxPooling1D(10)

    def call(self, inputs, training):
        
        x = self.wave_1(inputs)
        x = self.avg_pool1(x)
        
        x = self.wave_2(x)
        x = self.avg_pool2(x)
        
        x = self.wave_3(x)
        x = self.avg_pool3(x)
        
        return x
    
    
class VolcanoMHAttentionSequenceModel(models.Model): 
    def __init__(self, d_model, num_heads):
        super(VolcanoMHAttentionSequenceModel, self).__init__()
        self.mha = MultiHeadAttention(d_model=d_model, num_heads=num_heads)
        self.layernorm = layers.LayerNormalization(epsilon=1e-6)
        
    def call(self, inputs, training):
        x, _ = self.mha(inputs, inputs, inputs, mask=None)
#         x = self.layernorm(x)
        return x
        

# Custom Loss

def quantileLoss(y_true, y_pred):
    quantiles = tf.constant([0.4, 0.5, 0.6])
    e = y_true - y_pred
    v = tf.maximum(quantiles * e, (quantiles-1) * e)
    return tf.reduce_mean(v)

# Model Wrapper

def buildmodel(num_heads, d_model=64, summary=False):
        
    conv_head = ConvWaveNetHead()
    att_model = VolcanoMHAttentionSequenceModel(d_model=d_model, num_heads=num_heads)
    
    in_ = layers.Input(shape=(SEQ_LENGTH, 10))
    x = conv_head(in_)
    x = att_model(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(64, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.2)(x)
    
    out_1 = layers.Dense(1, activation='relu', name='time_to_eruption')(x)
    out_2 = layers.Dense(3, activation='relu', name='quantile')(x)
    
    model = models.Model(inputs=[in_], outputs=[out_1, out_2])

    model.compile(optimizer=optimizers.Adam(learning_rate=8e-4, 
                                           beta_1=0.9, beta_2=0.98, 
                                           epsilon=1e-9), 
                  loss=[losses.MeanAbsoluteError(), quantileLoss],
                  loss_weights=[4, 1],
                  metrics=['mae'])
    if summary:
        print(model.summary())
    
    return model


def scheduler(epoch, lr):
    if epoch % 10 == 0:
        return lr*0.9
    else:
        return lr
    
#############################################################

In [7]:
#############################################################
# 7. Training

list_segments_train = list(unique_segments_id_train)

batch_size = 8
num_heads = 5
d_model = 300

callback_early_stopping = ReturnBestEarlyStopping(monitor='val_time_to_eruption_loss', 
                                                  patience=20, verbose=1, restore_best_weights=True)
callback_lrsched = tf.keras.callbacks.LearningRateScheduler(scheduler)
list_callbacks = [callback_early_stopping, callback_lrsched]

kf = StratifiedKFold(n_splits = 5, shuffle=True, random_state=12)
list_history, list_reg_models = [], []

for num_fold, (train_index, val_index) in enumerate(kf.split(list_segments_train,
                                                             np.zeros(len(list_segments_train)))):
    segments_train_fold = np.asarray(list_segments_train)[train_index]
    segments_val_fold = np.asarray(list_segments_train)[val_index]

    X_train_generator = VolcanoSequencesGenerator(segments_train_fold, dict_segments_sequences_paths_train,
                                                 batch_size=batch_size, dict_labels=dict_labels, 
                                                 augmentations=True, training=True)

    X_val_generator = VolcanoSequencesGenerator(segments_val_fold, dict_segments_sequences_paths_train,
                                                 batch_size=batch_size, dict_labels=dict_labels, 
                                                 augmentations=False, training=True)

    print(f'Num Fold: {num_fold + 1}')
    print(f'Train segments: {len(train_index)} Val segments: {len(val_index)}')

    model = buildmodel(num_heads=num_heads, d_model=d_model, summary=True)

    history = model.fit(X_train_generator,
                        validation_data=X_val_generator,
                        batch_size=batch_size,
                        callbacks=list_callbacks,
                        epochs=100,
                        verbose=1)

    # Eval

    X_val_sequences, y_val_target = X_val_generator.generateOrderedSequences(list(segments_val_fold))

    y_pred_val = model.predict(X_val_sequences)
    y_pred_val[0], y_pred_val[1] = y_pred_val[0]*(10**6), y_pred_val[1]*(10**6)
    y_val_target = y_val_target*(10**6)

    df_train_lreg = pd.DataFrame({
        'pred_q_20' : y_pred_val[1][:, 0],
        'pred_q_50' : y_pred_val[1][:, 1],
        'pred_q_80' : y_pred_val[1][:, 2],
    })

    reg_model = LinearRegression().fit(df_train_lreg.values, y_val_target)

    df_val = pd.DataFrame({
        'segment_id' : list(segments_val_fold),
        'target_time_to_eruption' : y_val_target,
        'pred_time_to_eruption' : y_pred_val[0].squeeze(),
        'pred_q_20' : y_pred_val[1][:, 0],
        'pred_q_50' : y_pred_val[1][:, 1],
        'pred_q_80' : y_pred_val[1][:, 2],
        'pred_linear_reg' : reg_model.predict(df_train_lreg),
        'mae_pred' : np.abs(y_val_target - y_pred_val[0].squeeze()),
        'mae_q_50' : np.abs(y_val_target -  y_pred_val[1][:, 1]),
        'mae_linear_reg' : np.abs(y_val_target -  reg_model.predict(df_train_lreg)),
    })

    print('***'*20)
    print(f"Prediction MAE: {df_val['mae_pred'].mean()}\n \
            Quantile-P50 MAE: {df_val['mae_q_50'].mean()}\n \
            Linear-Reg MAE: {df_val['mae_linear_reg'].mean()}\n \
            Mean Prediction&Linear Reg: {((df_val['mae_pred']+df_val['mae_linear_reg'])/2).mean()}\n \
            Mean Prediction&Quantile-P50: {((df_val['mae_pred']+df_val['mae_q_50'])/2).mean()}")
    print('***'*20)

    list_history.append(history)
    model.save(f'./models/model_mha_{num_fold}')
    print('***'*20)

#############################################################

Num Fold: 1
Train segments: 3544 Val segments: 887
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 60001, 10)]  0                                            
__________________________________________________________________________________________________
conv_wave_net_head (ConvWaveNet (None, 60, 64)       243424      input_1[0][0]                    
__________________________________________________________________________________________________
volcano_mh_attention_sequence_m (None, None, 300)    148800      conv_wave_net_head[0][0]         
__________________________________________________________________________________________________
global_average_pooling1d (Globa (None, 300)          0           volcano_mh_attention_sequence_mod
____________________________________

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Enric\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-3c8ebb41b027>", line 41, in <module>
    verbose=1)
  File "C:\Users\Enric\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 108, in _method_wrapper
    return method(self, *args, **kwargs)
  File "C:\Users\Enric\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1098, in fit
    tmp_logs = train_function(iterator)
  File "C:\Users\Enric\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 780, in __call__
    result = self._call(*args, **kwds)
  File "C:\Users\Enric\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 807, in _call
    return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
  File "C:\Users\Enric\anaconda3\lib\site-packages\tensorflow\

KeyboardInterrupt: 

In [None]:
#############################################################
# 8. Cross Val Score

list_segments_train = list(unique_segments_id_train)
batch_size = 16

kf = StratifiedKFold(n_splits = 5, shuffle=True, random_state=12)
df_val_all = pd.DataFrame()

for num_fold, (train_index, val_index) in tqdm(enumerate(kf.split(list_segments_train,
                                                             np.zeros(len(list_segments_train)))), 
                                               total=5, position=0):
    
    segments_train_fold = np.asarray(list_segments_train)[train_index]
    segments_val_fold = np.asarray(list_segments_train)[val_index]

    X_val_generator = VolcanoSequencesGenerator(segments_val_fold, dict_segments_sequences_paths_train,
                                                batch_size=batch_size, dict_labels=dict_labels, 
                                                augmentations=False, training=True)   

    model = models.load_model(f'./models/model_mha_{num_fold}', compile=False)

    X_val_sequences, y_val_target = X_val_generator.generateOrderedSequences(segments_val_fold)
    y_pred_val = model.predict(X_val_sequences)

    df_tmp = pd.DataFrame({
            'pred' :  y_pred_val[0].squeeze()*(10**6),
            'pred_q_30' : y_pred_val[1][:, 0]*(10**6),
            'pred_q_50' : y_pred_val[1][:, 1]*(10**6),
            'pred_q_70' : y_pred_val[1][:, 2]*(10**6),
            'y_true' : y_val_target*(10**6)
    })

    df_val_all = pd.concat([df_val_all, df_tmp], axis=0)

print('***'*20)
print(np.mean(np.abs(df_tmp['y_true'] - df_tmp['pred'])))
print(np.mean(np.abs(df_tmp['y_true'] - df_tmp['pred_q_50'])))
print('***'*20)

#############################################################

In [None]:
#############################################################
# 9. Inference


# del X_val_sequences, y_val_target, list_cv_pred, y_pred_cv, y_cv_target, df_cv, X_cv_sequences, y_cv_target
gc.collect()

list_models = [models.load_model(f'./models/model_mha_{i}', compile=False) for i in range(5)]

X_test_generator = VolcanoSequencesGenerator(list(unique_segments_id_test), dict_segments_sequences_paths_test,
                                             batch_size=16, dict_labels=dict_labels, 
                                             augmentations=False, training=False)

batch_size_prediction = 16
idx = 0
num_test_steps = int(np.ceil(len(unique_segments_id_test) / batch_size_prediction))
list_test_segments = list(unique_segments_id_test)
array_predictions = np.zeros((len(list_test_segments)))
for i in tqdm(range(num_test_steps), total=num_test_steps, position=0):
    list_tmp_segments = list_test_segments[idx:(idx+batch_size_prediction)]
    X_test_sequences = X_test_generator.generateOrderedSequences(list_tmp_segments)
     
    predictions = [model.predict(X_test_sequences)[0].squeeze() for model in list_models]
    predictions = np.mean(np.asarray(predictions), axis=0)*(10**6)
    array_predictions[idx:(idx+batch_size_prediction)] = predictions
    idx += batch_size_prediction   

df_submission = pd.DataFrame({
    'segment_id' : list_test_segments,
    'time_to_eruption' : array_predictions
})

df_submission.to_csv('./FinalSubmissions/' + 'submission_mha.csv', index=False)
df_submission.describe()

#############################################################

In [6]:
#############################################################
# 9. Time Test Augmentation
## Time Test augmentations on validation set (for training blend model) and  

path_output = './02_Files_TTA_Blend/'
qt_augmentations = 10
batch_size = 8
list_segments_train = list(unique_segments_id_train)
pbar = tqdm(total=5, position=0)
kf = StratifiedKFold(n_splits = 5, shuffle=True, random_state=12)
dict_predictions = {}

for num_fold, (train_index, val_index) in enumerate(kf.split(list_segments_train,
                                                     np.zeros(len(list_segments_train)))):
    
    segments_train_fold = np.asarray(list_segments_train)[train_index]
    segments_val_fold = np.asarray(list_segments_train)[val_index]
    
    X_val_generator = VolcanoSequencesGenerator(segments_val_fold, 
                                                  dict_segments_sequences_paths_train,
                                                  batch_size=batch_size, dict_labels=dict_labels, 
                                                  augmentations=True, training=True)   
    
    y_val_target = np.asarray([dict_labels[segment] for segment in segments_val_fold])
        
    idx = 0
    num_steps = int(np.ceil(segments_val_fold.shape[0]/ batch_size))
    list_segments = list(segments_val_fold)
    
    array_predictions = np.zeros((len(list_segments)), dtype=np.float32)
    array_confidence = np.zeros((len(list_segments)), dtype=np.float32)
    array_predictions_std = np.zeros((len(list_segments)), dtype=np.float32)
    array_confidence_std = np.zeros((len(list_segments)), dtype=np.float32)
    
    model = models.load_model(f'./models/model_mha_{num_fold}', compile=False)
    
    for i in range(num_steps):
        print(f'{i}/{num_steps}')
        list_tmp_segments = list_segments[idx:(idx+batch_size)]
        list_predictions, list_conf = [], []
        for i in range(qt_augmentations):
            
            X_val_sequences = X_val_generator.generateOrderedSequences(list_tmp_segments)
            y_pred_val = model(X_val_sequences)
            
            list_predictions.append(y_pred_val[0].numpy())
            list_conf.append(y_pred_val[1].numpy()[:, 2] - y_pred_val[1].numpy()[:, 0])

        array_predictions[idx:(idx+batch_size)] = np.asarray(list_predictions).mean(axis=0).squeeze().astype(np.float32)
        array_confidence[idx:(idx+batch_size)] = np.asarray(list_conf).mean(axis=0).squeeze().astype(np.float32)
        array_predictions_std[idx:(idx+batch_size)] = np.asarray(list_predictions).std(axis=0).squeeze().astype(np.float32)
        array_confidence_std[idx:(idx+batch_size)] = np.asarray(list_conf).std(axis=0).squeeze().astype(np.float32)
        idx += batch_size
        
        
    dict_predictions[num_fold] = {
        'segment_id' : segments_val_fold, 'y_true' : y_val_target,
        'pred_mean': array_predictions, 'pred_std': array_predictions_std,
        'conf_mean' : array_confidence, 'conf_std' : array_confidence_std
    }
    
    pbar.update(1)
    
    
pbar.close()

# Prepare output

columns_names = ['segment_id', 'y_true', 'mha_pred_mean', 'mha_pred_std', 'mha_conf_mean', 'mha_conf_std']
dict_df = {}
for col in columns_names:
    if col.split('_')[0] in ['mha']:
        key_1, key_2, key_3 = col.split('_')[0], col.split('_')[1], '_'.join([col.split('_')[2], col.split('_')[3]])
        dict_df[col] = dict_predictions[key_1][int(key_2)][key_3]
    else:
        dict_df[col] = np.concatenate(list(dict_predictions[col].values()))


dict_build_df = {
    'segment_id' : dict_df['segment_id'].squeeze(),
    'mha_pred_mean' : np.concatenate([dict_df['mha_0_pred_mean'], dict_df['mha_1_pred_mean'], dict_df['mha_2_pred_mean'],
                                      dict_df['mha_3_pred_mean'], dict_df['mha_4_pred_mean']]).squeeze(),
    'mha_pred_std' : np.concatenate([dict_df['mha_0_pred_std'], dict_df['mha_1_pred_std'], dict_df['mha_2_pred_std'],
                                     dict_df['mha_3_pred_std'], dict_df['mha_4_pred_std']]).squeeze(),
    'mha_conf_mean' : np.concatenate([dict_df['mha_0_conf_mean'], dict_df['mha_1_conf_mean'], dict_df['mha_2_conf_mean'],
                                      dict_df['mha_3_conf_mean'], dict_df['mha_4_conf_mean']]).squeeze(),
    'mha_conf_std' : np.concatenate([dict_df['mha_0_conf_std'], dict_df['mha_1_conf_std'], dict_df['mha_2_conf_std'],
                                     dict_df['mha_3_conf_std'], dict_df['mha_4_conf_std']]).squeeze()
}    


#############################################################

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]

0/111
1/111
2/111
3/111
4/111
5/111
6/111
7/111
8/111
9/111
10/111
11/111
12/111
13/111
14/111
15/111
16/111
17/111
18/111
19/111
20/111
21/111
22/111
23/111
24/111
25/111
26/111
27/111
28/111
29/111
30/111
31/111
32/111
33/111
34/111
35/111
36/111
37/111
38/111
39/111
40/111
41/111
42/111
43/111
44/111
45/111
46/111
47/111
48/111
49/111
50/111
51/111
52/111
53/111
54/111
55/111
56/111
57/111
58/111
59/111
60/111
61/111
62/111
63/111
64/111
65/111
66/111
67/111
68/111
69/111
70/111
71/111
72/111
73/111
74/111
75/111
76/111
77/111
78/111
79/111
80/111
81/111
82/111
83/111
84/111
85/111
86/111
87/111
88/111
89/111
90/111
91/111
92/111
93/111
94/111
95/111
96/111
97/111
98/111
99/111
100/111
101/111
102/111
103/111
104/111
105/111
106/111
107/111
108/111
109/111
110/111


 20%|████████████████▌                                                                  | 1/5 [05:02<20:08, 302.09s/it]

0/111
1/111
2/111
3/111
4/111
5/111
6/111
7/111
8/111
9/111
10/111
11/111
12/111
13/111
14/111
15/111
16/111
17/111
18/111
19/111
20/111
21/111
22/111
23/111
24/111
25/111
26/111
27/111
28/111
29/111
30/111
31/111
32/111
33/111
34/111
35/111
36/111
37/111
38/111
39/111
40/111
41/111
42/111
43/111
44/111
45/111
46/111
47/111
48/111
49/111
50/111
51/111
52/111
53/111
54/111
55/111
56/111
57/111
58/111
59/111
60/111
61/111
62/111
63/111
64/111
65/111
66/111
67/111
68/111
69/111
70/111
71/111
72/111
73/111
74/111
75/111
76/111
77/111
78/111
79/111
80/111
81/111
82/111
83/111
84/111
85/111
86/111
87/111
88/111
89/111
90/111
91/111
92/111
93/111
94/111
95/111
96/111
97/111
98/111
99/111
100/111
101/111
102/111
103/111
104/111
105/111
106/111
107/111
108/111
109/111
110/111


 40%|█████████████████████████████████▏                                                 | 2/5 [10:01<15:03, 301.16s/it]

0/111
1/111
2/111
3/111
4/111
5/111
6/111
7/111
8/111
9/111
10/111
11/111
12/111
13/111
14/111
15/111
16/111
17/111
18/111
19/111
20/111
21/111
22/111
23/111
24/111
25/111
26/111
27/111
28/111
29/111
30/111
31/111
32/111
33/111
34/111
35/111
36/111
37/111
38/111
39/111
40/111
41/111
42/111
43/111
44/111
45/111
46/111
47/111
48/111
49/111
50/111
51/111
52/111
53/111
54/111
55/111
56/111
57/111
58/111
59/111
60/111
61/111
62/111
63/111
64/111
65/111
66/111
67/111
68/111
69/111
70/111
71/111
72/111
73/111
74/111
75/111
76/111
77/111
78/111
79/111
80/111
81/111
82/111
83/111
84/111
85/111
86/111
87/111
88/111
89/111
90/111
91/111
92/111
93/111
94/111


KeyboardInterrupt: 