In [None]:
import re, os, gc, time, pandas as pd, numpy as np
import tqdm

np.random.seed(32)
#os.environ["OMP_NUM_THREADS"] = "5"
from nltk import tokenize, word_tokenize
from keras.layers import Dense, Input, LSTM, GRU, Embedding, Dropout, Activation, Conv1D
from keras.layers import Bidirectional, Add, Flatten, TimeDistributed,CuDNNGRU,CuDNNLSTM
from keras.optimizers import Adam, RMSprop
from keras.models import Model, load_model
from keras import initializers, regularizers, constraints, optimizers, layers
from keras import backend as K
# from keras.engine.topology import Layer
from keras.engine import InputSpec, Layer
from preprocess_utils import preprocess
from global_variables import TRAIN_FILENAME, TEST_FILENAME, COMMENT, LIST_CLASSES, UNKNOWN_WORD
import logging
from collections import Counter
from sklearn.metrics import roc_auc_score
from keras.callbacks import Callback
from nltk.tokenize import TweetTokenizer
from keras.preprocessing import text, sequence
from keras.callbacks import EarlyStopping,ModelCheckpoint
   
embed_size = 300
max_features = 150000
max_text_len = 300

# EMBEDDING_FILE = "../input/glove840b300dtxt/glove.840B.300d.txt
EMBEDDING_FILE = "assets/embedding_models/ft_300d_crawl/crawl-300d-2M.vec"

class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: {:d} - score: {:.6f}".format(epoch + 1, score))

def rm_hyperlinks(words):
    words = [w if not (w.startswith('http') or
                       w.startswith('www') or
                       w.endswith('.com') or
                        w.startswith('en.wikipedia.org/')) else 'url' for w in words]
    return words

def strip_spaces(words):
    return [w.replace(' ', '') for w in words]

def tokenize_sentences(sentences):
    twitter_tokenizer = TweetTokenizer()
    tokenized_sentences = []
    for sentence in tqdm.tqdm(sentences,mininterval=5):
        if hasattr(sentence, "decode"):
            sentence = sentence.decode("utf-8")
        tokens = twitter_tokenizer.tokenize(sentence)
        tokenized_sentences.append(tokens)
    return tokenized_sentences

def tokenize_list_of_sentences(list_of_sentences):

    list_of_tokenized_sentences = []
    for sentences in list_of_sentences:
        tokenized_sentences = tokenize_sentences(sentences)

        # more preprocess on word level
        tokenized_sentences = [rm_hyperlinks(s) for s in tokenized_sentences]
        tokenized_sentences = [strip_spaces(s) for s in tokenized_sentences]
        list_of_tokenized_sentences.append(tokenized_sentences)

    return list_of_tokenized_sentences

def create_word2id(list_of_tokenized_sentences,max_features):
    word_counter = Counter()
    print('CREATING VOCABULARY')
    for tokenized_sentences in list_of_tokenized_sentences:
        for tokens in tqdm.tqdm(tokenized_sentences):
            word_counter.update(tokens)

    raw_counts = word_counter.most_common(max_features)
    vocab = [char_tuple[0] for char_tuple in raw_counts]
    print('%s words detected, keeping %s words' % (len(word_counter), len(vocab)))
    word2id = {word: (ind + 1) for ind, word in enumerate(vocab)}
    word2id[UNKNOWN_WORD] = len(word2id)
    id2word = dict((id, word) for word, id in word2id.items())
    return word2id, id2word

def tokenized_sentences2seq(tokenized_sentences, words_dict):
    print('converting to sequence')
    sequences = []
    for sentence in tqdm.tqdm(tokenized_sentences, mininterval=5):
        seq = []
        for token in sentence:
            try:
                seq.append(words_dict[token])
            except KeyError:
                seq.append(words_dict[UNKNOWN_WORD])
        sequences.append(seq)
    return sequences

def tokenized_sentences2seq2(tokenized_sentences, words_dict):
    print('converting to sequence')
    sequences = [words_dict[token] if token in words_dict else words_dict[UNKNOWN_WORD] for token in tqdm.tqdm(tokenized_sentences, mininterval=5)]
    return sequences

def get_coefs(word, *arr): return word, np.asarray(arr, dtype='float32')

def convert_tokens_to_ids(tokenized_sentences, embedding_word_dict, id2word):
    words_train = []
    'converting word index to embedding index'
    for sentence in tqdm.tqdm(tokenized_sentences):
        current_words = []
        for word_index in sentence:
            try:
                word = id2word[word_index]
                word_id = embedding_word_dict.get(word, len(embedding_word_dict) - 2)
            except KeyError:
                word_id = embedding_word_dict.get(UNKNOWN_WORD, len(embedding_word_dict) - 2)
            current_words.append(word_id)

        if len(current_words) >= max_text_len:
            current_words = current_words[:max_text_len]
        else:
            current_words += [len(embedding_word_dict) - 1] * (max_text_len - len(current_words))
        words_train.append(current_words)
    return words_train

tic = time.time()

train_data = pd.read_csv(TRAIN_FILENAME)
test_data = pd.read_csv(TEST_FILENAME)
Y = train_data[LIST_CLASSES].values

test_data = preprocess(test_data)
train_data = preprocess(train_data)

train_data = train_data["comment_text"].fillna("fillna").values
test_data = test_data["comment_text"].fillna("fillna").values

tokenizer = text.Tokenizer(num_words=max_features)
print('fitting tokenizer')
tokenizer.fit_on_texts(list(train_data) + list(test_data))
train_data = tokenizer.texts_to_sequences(train_data)
test_data = tokenizer.texts_to_sequences(test_data)
X = sequence.pad_sequences(train_data, maxlen=max_text_len)
X_test = sequence.pad_sequences(test_data, maxlen=max_text_len)

del train_data
del test_data

print('getting embeddings')
def get_coefs(word, *arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.rstrip().rsplit(' ')) for o in open(EMBEDDING_FILE))

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.zeros((nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector

print('done')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


preprocessing
lowercase
removing breaks
expanding contractions
replacing smileys
replacing ip
removing links
replacing numbers
removing bigrams
isolating punct
preprocessing
lowercase
removing breaks
expanding contractions
replacing smileys
replacing ip
removing links
replacing numbers
removing bigrams
isolating punct
fitting tokenizer


In [2]:
from keras.layers import K, Activation
from keras.engine import Layer
from keras.layers import Dense, Input, Embedding, Dropout, Bidirectional, GRU, Flatten, SpatialDropout1D


def squash(x, axis=-1):
    # s_squared_norm is really small
    # s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    # scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    # return scale * x
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale

class AttentionWeightedAverage(Layer):
    """
    Computes a weighted average of the different channels across timesteps.
    Uses 1 parameter pr. channel to compute the attention value for a single timestep.
    """

    def __init__(self, return_attention=False, **kwargs):
        self.init = initializers.get('uniform')
        self.supports_masking = True
        self.return_attention = return_attention
        super(AttentionWeightedAverage, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(ndim=3)]
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[2], 1),
                                 name='{}_W'.format(self.name),
                                 initializer=self.init)
        self.trainable_weights = [self.W]
        super(AttentionWeightedAverage, self).build(input_shape)

    def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result

    def get_output_shape_for(self, input_shape):
        return self.compute_output_shape(input_shape)

    def compute_output_shape(self, input_shape):
        output_len = input_shape[2]
        if self.return_attention:
            return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
        return (input_shape[0], output_len)

    def compute_mask(self, input, input_mask=None):
        if isinstance(input_mask, list):
            return [None] * len(input_mask)
        else:
            return None

# A Capsule Implement with Pure Keras
class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_size = kernel_size
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = squash
        else:
            self.activation = Activation(activation)

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     # shape=self.kernel_size,
                                     initializer='glorot_uniform',
                                     trainable=True)
        else:
            input_num_capsule = input_shape[-2]
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)


In [14]:
Routings = 5
Num_capsule = 6
Dim_capsule = 32
dropout_p = 0.4
rate_drop_dense = 0.4

def build_model(lr=0.0):
    inp = Input(shape=(max_text_len, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable=False)(inp)
    x = SpatialDropout1D(rate_drop_dense)(x)
    capsule = Capsule(num_capsule=64, dim_capsule=8, routings=Routings,share_weights=True)(x)
    capsule = Capsule(num_capsule=32, dim_capsule=8, routings=Routings,share_weights=True)(capsule)
    capsule = Capsule(num_capsule=16, dim_capsule=8, routings=Routings,share_weights=True)(capsule)
    capsule = Capsule(num_capsule=8, dim_capsule=8, routings=Routings,share_weights=True)(capsule)
    # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule)
    capsule = Flatten()(capsule)
    capsule = Dropout(dropout_p)(capsule)
    output = Dense(6, activation='sigmoid')(capsule)
    model = Model(inputs=inp, outputs=output)
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy'])
    return model


model = build_model(lr=1e-3)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_31 (InputLayer)        (None, 300)               0         
_________________________________________________________________
embedding_31 (Embedding)     (None, 300, 300)          45000000  
_________________________________________________________________
spatial_dropout1d_31 (Spatia (None, 300, 300)          0         
_________________________________________________________________
capsule_49 (Capsule)         (None, 64, 8)             153600    
_________________________________________________________________
capsule_50 (Capsule)         (None, 32, 8)             2048      
_________________________________________________________________
capsule_51 (Capsule)         (None, 16, 8)             1024      
_________________________________________________________________
capsule_52 (Capsule)         (None, 8, 8)              512       
__________

In [21]:

def build_model(lr=0.0):
    inp = Input(shape=(max_text_len, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable=False)(inp)
    x = SpatialDropout1D(rate_drop_dense)(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule)
    x = Bidirectional(CuDNNGRU(64))(x)
    output = Dense(6, activation='sigmoid')(x)
    model = Model(inputs=inp, outputs=output)
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy'])
    return model


model = build_model(lr=1e-3)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_37 (InputLayer)        (None, 300)               0         
_________________________________________________________________
embedding_37 (Embedding)     (None, 300, 300)          45000000  
_________________________________________________________________
spatial_dropout1d_37 (Spatia (None, 300, 300)          0         
_________________________________________________________________
dense_49 (Dense)             (None, 300, 512)          154112    
_________________________________________________________________
dense_50 (Dense)             (None, 300, 256)          131328    
_________________________________________________________________
dense_51 (Dense)             (None, 300, 256)          65792     
_________________________________________________________________
bidirectional_26 (Bidirectio (None, 128)               123648    
__________

In [23]:
fold_count = 1
fold_size = len(X) // 10
for fold_id in range(fold_count):
    fold_start = fold_size * fold_id
    fold_end = fold_start + fold_size

    if fold_id == 9:
        fold_end = len(X)

    X_valid = X[fold_start:fold_end]
    Y_valid = Y[fold_start:fold_end]
    X_train = np.concatenate([X[:fold_start], X[fold_end:]])
    Y_train = np.concatenate([Y[:fold_start], Y[fold_end:]])

    model = build_model(lr = 0.001)
    file_path = "CAPS5_%s_.hdf5" %fold_id
    ra_val = RocAucEvaluation(validation_data = (X_valid, Y_valid), interval = 1)
    check_point = ModelCheckpoint(file_path, monitor = "val_loss", mode = "min", save_best_only = True, verbose = 1)
    history = model.fit(X_train, Y_train, batch_size = 128, epochs = 10, validation_data = (X_valid, Y_valid),
                  verbose = 1, callbacks = [ra_val, check_point])

ResourceExhaustedError: OOM when allocating tensor with shape[150000,300]
	 [[Node: embedding_39/embeddings/Assign = Assign[T=DT_FLOAT, _class=["loc:@embedding_39/embeddings"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](embedding_39/embeddings, embedding_39/random_uniform)]]

Caused by op 'embedding_39/embeddings/Assign', defined at:
  File "/home/christof/miniconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/christof/miniconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/christof/miniconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/christof/miniconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 112, in start
    self.asyncio_loop.run_forever()
  File "/home/christof/miniconda3/lib/python3.6/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/home/christof/miniconda3/lib/python3.6/asyncio/base_events.py", line 1426, in _run_once
    handle._run()
  File "/home/christof/miniconda3/lib/python3.6/asyncio/events.py", line 127, in _run
    self._callback(*self._args)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 102, in _handle_events
    handler_func(fileobj, events)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/christof/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/christof/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-23-a9d40830e021>", line 15, in <module>
    model = build_model(lr = 0.001)
  File "<ipython-input-21-f3c1eb1cbdc2>", line 4, in build_model
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable=False)(inp)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/keras/engine/topology.py", line 590, in __call__
    self.build(input_shapes[0])
  File "/home/christof/miniconda3/lib/python3.6/site-packages/keras/layers/embeddings.py", line 105, in build
    dtype=self.dtype)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/keras/engine/topology.py", line 414, in add_weight
    constraint=constraint)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 392, in variable
    v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 213, in __init__
    constraint=constraint)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 346, in _init_from_args
    validate_shape=validate_shape).op
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 276, in assign
    validate_shape=validate_shape)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 57, in assign
    use_locking=use_locking, name=name)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/christof/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[150000,300]
	 [[Node: embedding_39/embeddings/Assign = Assign[T=DT_FLOAT, _class=["loc:@embedding_39/embeddings"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](embedding_39/embeddings, embedding_39/random_uniform)]]


In [None]:
fold_count = 10
fold_size = len(X) // 10
for fold_id in range(fold_count):
    fold_start = fold_size * fold_id
    fold_end = fold_start + fold_size

    if fold_id == 9:
        fold_end = len(X)

    X_valid = X[fold_start:fold_end]
    Y_valid = Y[fold_start:fold_end]
    X_train = np.concatenate([X[:fold_start], X[fold_end:]])
    Y_train = np.concatenate([Y[:fold_start], Y[fold_end:]])

    model = build_model(lr = 0.001)
    file_path = "CAPS4_%s_.hdf5" %fold_id
    es = EarlyStopping(monitor='val_loss',patience=5, mode='min')
    rop = ReduceLROnPlateau(monitor='val_loss', patience=1, mode='min',epsilon=1e-6, factor=0.9)
    ra_val = RocAucEvaluation(validation_data = (X_valid, Y_valid), interval = 1)
    check_point = ModelCheckpoint(file_path, monitor = "val_loss", mode = "min", save_best_only = True, verbose = 1)
    history = model.fit(X_train, Y_train, batch_size = 256, epochs = 10, validation_data = (X_valid, Y_valid),
                  verbose = 1, callbacks = [ra_val, check_point, es,rop])

                               

In [7]:
list_of_preds = []
list_of_vals = []
list_of_y = []
fold_count = 10
fold_size = len(X) // 10
for fold_id in range(0, fold_count):
    fold_start = fold_size * fold_id
    fold_end = fold_start + fold_size

    if fold_id == 9:
        fold_end = len(X)

    X_valid = X[fold_start:fold_end]
    Y_valid = Y[fold_start:fold_end]
    X_train = np.concatenate([X[:fold_start], X[fold_end:]])
    Y_train = np.concatenate([Y[:fold_start], Y[fold_end:]])

    file_path = 'CAPS4_' + str(fold_id) + '_.hdf5'
    model = build_model(lr = 0.001)
    model.load_weights(file_path)
    #model = load_model(file_path,custom_objects = {"Capsule": Capsule})
    preds = model.predict(X_test, batch_size = 256, verbose = 1)
    list_of_preds.append(preds)
    vals = model.predict(X_valid, batch_size = 256, verbose = 1)
    list_of_vals.append(vals)
    list_of_y.append(Y_valid)
test_predicts = np.zeros(list_of_preds[0].shape)
for fold_predict in list_of_preds:
    test_predicts += fold_predict

test_predicts /= len(list_of_preds)
submission = pd.read_csv('assets/raw_data/sample_submission.csv')
submission[LIST_CLASSES] = test_predicts
submission.to_csv('CAPS4_l2_test_data.csv', index=False)

l2_data = pd.DataFrame(columns=['logits_' + c for c in LIST_CLASSES]+LIST_CLASSES)
l2_data[['logits_' + c for c in LIST_CLASSES]] = pd.DataFrame(np.concatenate(list_of_vals,axis = 0))
l2_data[LIST_CLASSES] = pd.DataFrame(np.concatenate(list_of_y,axis = 0))
l2_data.to_csv('CAPS4_l2_train_data.csv')

