# Deep learning tweeter

In [None]:
%load_ext tensorboard


## Imports

In [1]:
import sys
import cufflinks
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import copy
import pickle

warnings.filterwarnings('ignore')
seed = 5
np.random.seed(seed)

sys.path.append('..')
cufflinks.go_offline()

In [2]:
from Corpus.Corpus import get_corpus, filter_binary_pn, filter_corpus_small
from auxiliar.VectorizerHelper import vectorizer, vectorizerIdf, tokenize, procesar_corpus
from auxiliar import parameters
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import recall_score
from auxiliar.HtmlParser import HtmlParser

In [3]:
import tensorflow as tf
from tensorflow.python.keras.callbacks import TensorBoard

In [4]:
import math
import pickle
import pandas as pd
import Levenshtein as lv
from nltk.stem import SnowballStemmer
from gensim.models import Doc2Vec
import gensim
from nltk.tokenize import word_tokenize
from Corpus.Corpus import get_corpus, filter_binary_pn, filter_corpus_small
from time import time, strftime

In [5]:
import nltk
# nltk.download()

## Config

In [6]:
polarity_dim = 3
# clasificadores=['lstm', '2lstm', '2dcnn', '2dcnn+lstm', 'cnn+lstm', 'bidirectionalLstm']
clasificadores=['lstm']
idf = True
target_names=['Neg', 'Pos']
kfolds = 10
base_dir = '2-clases' if polarity_dim == 2 else ('3-clases' if polarity_dim == 3 else '5-clases')
name = 'deep_learning'

In [7]:
w2vec_file = 'data/w2vec.bin'
stemmer = SnowballStemmer('spanish')

## Get data

Get train corpus and filter it by polarity

In [8]:
# cine = HtmlParser(200, "http://www.muchocine.net/criticas_ultimas.php", 1)
data_corpus = get_corpus('general-corpus', 'general-corpus', 1, None)

if polarity_dim == 2:
    data_corpus = filter_binary_pn(data_corpus)
#     cine = filter_binary_pn(cine.get_corpus())
elif polarity_dim == 3:
    data_corpus = filter_corpus_small(data_corpus)
#     cine = filter_corpus_small(cine.get_corpus())
# used_data = cine[:5000]

#Intentando obtener datos del archivo csv...
/home/suampa/Documentos/SentimentAnalysis/Corpus/../data/general-corpus.csv
#Datos recuperados!


In [14]:
data_corpus.reset_index().groupby('polarity').agg({'index': 'count'}).iplot(kind='bar')

We have a very low amount of neutral elements, it will lead to bad results

## Preprocess

we use our axiliar **preprocessor** function (VectorizerHelper.procesar_corpus) with params:

text, process_text, stop_words, negation, repeated_letters

In [15]:
def apply_prepro(data):
    return procesar_corpus(data, True, True, False, True)
data_corpus.content = data_corpus.content.apply(apply_prepro)

### Model initialization

In [16]:
model = gensim.models.keyedvectors.KeyedVectors.load_word2vec_format(w2vec_file, binary=True)

### Tokenize texts

We use auxiliar function **tokenize** to split content in tokens (words). This method receives a flag that indicates if it will use stemming or not

In [17]:
def apply_tokenization(data):
    return tokenize(data, False)
tokens = data_corpus.content.apply(apply_tokenization)

In [18]:
token_df = pd.DataFrame([x for x in tokens]).transpose()
token_df.columns = pd.MultiIndex.from_arrays([data_corpus.polarity, token_df.columns])

In [19]:
print('palabras totales', token_df.count().sum())
print('media de palabras por texto', token_df.count().mean())

palabras totales 44512
media de palabras por texto 8.87931378416118


In [20]:
token_df.count().iplot(kind='histogram')

Although most of the documents have a maximum amount of 10 words, we will use 28 words as maximum, to avoid penalization on train data

In [21]:
pd.DataFrame([token_df[x].count().sum() for x in token_df.columns.levels[0]]).iplot(kind='bar')

There is a several inequality in amount of words per class, as it was expected

### w2vec process

In [22]:
# stem_vocab = np.array([stemmer.stem(x) for x in model.vocab])
stem_vocab = np.array([x for x in model.vocab])
stem_vocab_dict = dict.fromkeys(stem_vocab, 1)

#### Not found in vocab

In [27]:
not_in_vocab = pd.concat([token_df[d][token_df[d].apply(lambda x: x not in stem_vocab_dict)] for d in token_df.columns], axis=1)

In [28]:
print('total words not found in vocab', not_in_vocab.count().sum())
print('not found words mean', not_in_vocab.count().mean())

total de palabras no encontradas en el vocabulario 2065
media de palabras no encontradas en el conjunto 0.41192898463993616


In [29]:
pd.DataFrame([not_in_vocab[x].count().sum() for x in not_in_vocab.columns.levels[0]]).iplot(kind='bar')

In [30]:
not_in_vocab.columns = not_in_vocab.columns.droplevel()

In [31]:
not_in_vocab_words = pd.DataFrame(
    [x for sublist in [not_in_vocab[y].dropna().values for y in not_in_vocab.columns] for x in sublist]
).drop_duplicates()

#### Found in vocab

In [32]:
in_vocab = pd.concat([token_df[d][token_df[d].apply(lambda x: x in stem_vocab_dict)] for d in token_df.columns], axis=1)

In [33]:
in_vocab.columns = in_vocab.columns.droplevel()

In [34]:
in_vocab_words = pd.DataFrame(
    [x for sublist in [in_vocab[y].dropna().values for y in in_vocab.columns] for x in sublist]
).drop_duplicates()

In [35]:
in_vocab.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5003,5004,5005,5006,5007,5008,5009,5010,5011,5012
0,gracias,off,conozco,toca,buen,escaño,buenos,sistema,caca,buen,...,ya,rajoy,rick,,nace,muy,más,crean,sorprendente,está
1,mar,pensando,adicto,grabación,día,listo,días,económico,ajuste,viernes,...,dos,da,santorum,será,jirafa,indignante,pobres,banco,huída,muy
2,,regalito,drama,especial,primero,empezar,em,recorta,,,...,ganas,espalda,retira,presidente,primera,si,discriminar,productos,hoy,bien
3,,sinde,ja,navideño,mandar,congreso,no,dinero,,,...,verte,post,campaña,,su,repara,mujer,mujeres,senado,versión
4,,va,ja,mari,abrazo,,ira,prestaciones,,,...,rt,buzón,primarias,,especie,hoy,,cáncer,rajoy,gallega


#### Replace process

In [36]:
def replace_words(w):
    found_positions = np.where(stem_vocab == w)[0]
    return model[vocab_keys[found_positions[0]]]

In [37]:
def custom_levenshtein(word, dictionary):
    ascii_matrix = np.repeat([word], [dictionary.shape[0]], axis=0)
    difference = ascii_matrix - dictionary
    difference = np.where(difference != 0, 1, difference)
    difference_sum = np.sum(difference, axis=1)
    minval = np.min(difference_sum)
    minidx = np.argmin(difference_sum)
    if minval < 2 and minval > 0:
        return minidx
    else:
        return None

In [38]:
def find_coincidences(word, dictionary):
    min_len = max(len(word) - 1, 1)
    max_len = len(word) + 1
    dist = np.array([lv.distance(x, word) - x.startswith(word) if len(x) > min_len and len(x) < max_len else 999 for x in dictionary])
    minval = np.min(dist)
    if minval < 2 and minval > 0:
        minidx = np.argmin(dist)
#         root_logger.info('current word %s - found_distance %s - idx to replace %s word', word, minval, minidx)
        return dictionary[minidx], np.delete(dictionary, minidx)
    else:
        return None, dictionary
    
    
    

In [39]:
import gc
gc.collect()

83557

In [41]:
dictionary = not_in_vocab_words.values.reshape(1,-1)[0]
ascii_dictionary = np.array([np.array([ord(x) for x in y]) for y in dictionary])
padded_dictionary = tf.keras.preprocessing.sequence.pad_sequences(ascii_dictionary, maxlen=23, padding="post")

found = dict()
with tqdm(total=len(padded_asci_codes)) as pbar:
    for i, val in enumerate(padded_asci_codes):
        pbar.update(1)
        coincidence = custom_levenshtein(val, padded_dictionary)
        if coincidence:
            found[dictionary[coincidence]] = model[stem_vocab[i]]
        if len(found) == dictionary.shape[0]:
            break

100%|██████████| 1000653/1000653 [15:54<00:00, 1048.32it/s]


In [42]:
gc.collect()

67

In [43]:
dictionary = in_vocab_words.values.reshape(1,-1)[0]
w2vec_found = dict()
with tqdm(total=len(dictionary)) as pbar:
    for word in dictionary:
        pbar.update(1)
        w2vec_found[word] = model[word]

100%|██████████| 11308/11308 [00:00<00:00, 71416.81it/s]


In [44]:
in_vocab_replaced = in_vocab.applymap(lambda x: w2vec_found[x] if x in w2vec_found else math.nan)

In [45]:
not_in_vocab_replaced = not_in_vocab.applymap(lambda x: found[x] if x in found else math.nan)

In [46]:
in_vocab_replaced.update(not_in_vocab_replaced)

#### Replace nan positions

All text must have the same length, so we need to fill those that did not match this requirement.

We will use a zero's array.

In [47]:
nan_pos = pd.DataFrame([in_vocab_replaced[c].isna() for c in in_vocab_replaced.columns]).transpose()

In [48]:
features = 300
zeros = np.zeros((features))
in_vocab_replaced.update(nan_pos.applymap(lambda x: zeros if x else math.nan))

In [49]:
in_vocab_replaced.to_pickle('tweeter_wemb_3_clases.pkl')

## Split data

In [236]:
in_vocab_replaced = pd.read_pickle('tweeter_wemb_3_clases.pkl')

In [237]:
in_vocab_replaced.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5003,5004,5005,5006,5007,5008,5009,5010,5011,5012
0,"[0.123253495, 0.047755074, 0.18744704, -0.0576...","[-0.03825481, 0.4745884, 0.06159374, -0.211678...","[0.08140966, -0.2937571, 0.09323869, -0.070561...","[-0.08855907, -0.04540643, -0.03799705, 0.1179...","[-0.24166, -0.31998757, 0.05182405, -0.0511, 0...","[-0.30192834, -0.091402225, -0.07635854, -0.32...","[0.16187154, -0.15262279, 0.15911105, 0.136880...","[-0.03385875, -0.05679143, 0.15936868, 0.03850...","[0.08511154, -0.5224435, -0.1114207, -0.029714...","[-0.24166, -0.31998757, 0.05182405, -0.0511, 0...",...,"[0.08881656, -0.08638146, 0.19055837, 0.011414...","[0.24844696, -0.045677852, 0.023781389, -0.055...","[0.028837433, -0.1533759, -0.15925558, -0.0515...","[0.084870994, 0.018372163, -0.19153509, -0.140...","[0.19372217, -0.00981669, 0.0936164, 0.0327220...","[0.2661146, 0.10789581, 0.24465632, 0.09246798...","[0.0937914, -0.06750509, 0.11355269, -0.071939...","[0.05255723, -0.173229, -0.043076243, -0.07914...","[0.08545774, -0.18351299, 0.040896367, -0.2875...","[0.19491133, 0.13588089, 0.26361302, 0.0549132..."
1,"[-0.27860123, -0.0073691155, 0.07620924, -0.18...","[-0.13225149, 0.007982017, -0.15443377, -0.041...","[-0.22758521, 0.119482145, 0.07687994, -0.1562...","[0.19492386, 0.34440613, 0.05423296, 0.1994891...","[0.11887759, -0.062084418, 0.24743606, 0.08697...","[-0.03682754, 0.124864206, 0.08532753, 0.11072...","[0.29450724, -0.08953724, 0.22900815, -0.13844...","[-0.01992176, -0.38204813, 0.08824053, 0.02404...","[0.032929733, 0.071419924, -0.063104734, -0.08...","[0.16452287, 0.04510333, 0.17681116, -0.175676...",...,"[0.15753947, -0.11052575, 0.026150983, -0.0067...","[0.18700868, -0.04345352, -0.21399334, -0.0299...","[-0.115100406, 0.04057121, -0.051373735, -0.14...","[0.008335834, -0.13954785, 0.07759602, -0.0191...","[0.057889074, -0.0860811, 0.005500754, 0.19180...","[-0.09766652, -0.055921096, -0.18628502, -0.33...","[-0.14751814, 0.014919235, -0.06650046, -0.193...","[0.20185633, 0.16108106, 0.17906275, 0.0547961...","[-0.015799766, 0.09396541, 0.08841807, -0.0635...","[0.2661146, 0.10789581, 0.24465632, 0.09246798..."
2,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.1472689, 0.035210133, -0.0905985, 0.235152...","[-0.19423386, -0.1605221, -0.122799665, -0.226...","[0.010192282, 0.011454537, -0.024294477, -0.14...","[-0.016709665, 0.04634203, 0.12317722, -0.1102...","[0.015565158, 0.013814226, 0.2070413, -0.10364...","[0.23736276, 0.31505284, -0.36881423, 0.341764...","[-0.2906534, 0.38485017, -0.32198793, -0.46555...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[-0.15636458, -0.14417209, 0.15087254, -0.0876...","[-0.18826456, -0.26298478, -0.30475214, -0.043...","[-0.32796225, 0.12644982, 0.1373805, -0.328892...","[-0.021904068, -0.28292415, 0.036686286, -0.12...","[0.31308195, 0.28671673, 0.061079856, -0.18517...","[0.060204167, -0.17087598, 0.2108287, -0.02707...","[-0.09252765, -0.06127216, -0.21862276, -0.010...","[0.10744205, 0.0075490335, -0.28650734, 0.2955...","[0.025537454, -0.1880749, 0.1568079, -0.178805...","[-0.06769281, -0.30116892, 0.08937262, 0.08235..."
3,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.038064547, -0.045823276, -0.07485215, -0.07...","[0.25501376, -0.011608973, -0.27930972, 0.3525...","[-0.18029697, 0.08496069, 0.01006801, -9.25252...","[-0.25586495, 0.28853804, 0.059111107, -0.0686...","[0.055961747, 0.03338554, 0.17965522, -0.18108...","[-0.0312227, -0.05622646, 0.2197303, -0.072537...","[-0.17901269, -0.015474343, 0.004863352, 0.029...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[-0.2848985, -0.20139082, -0.099536225, 0.1538...","[0.08226747, 0.031416256, 0.0009123865, -0.560...","[0.40826887, -0.085287675, 0.13206185, -0.1375...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.16144432, -0.056896593, -0.029683227, -0.1...","[-0.21747229, -0.19461995, 0.11051066, -0.0933...","[-0.001835166, -0.24167489, 0.1767869, 0.11757...","[0.109622784, 0.012106776, 0.24526607, 0.17946...","[-0.099428646, 0.39090794, -0.092911236, -0.35...","[0.28721282, 0.26410416, 0.026493348, -0.04709..."
4,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.11524121, 0.048132487, 0.16708641, 0.080564...","[0.25501376, -0.011608973, -0.27930972, 0.3525...","[0.04357922, -0.36061037, -0.74110204, 0.37670...","[6.7697583e-06, -0.39855585, 0.1004604, -0.018...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.15381046, -0.13758525, 0.09431736, -0.5013...","[-0.324229, 0.33888578, -0.41092703, 0.3568749...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[0.5313497, -0.14747444, -0.03454075, -0.03119...","[-0.38848424, -0.050834656, 0.008239044, 0.219...","[0.10601474, -0.044864, -0.20512107, -0.215069...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.23924325, 0.06999114, 0.06436243, -0.126434...","[0.025537454, -0.1880749, 0.1568079, -0.178805...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.44580138, 0.09412842, 0.27231395, -0.158739...","[0.24844696, -0.045677852, 0.023781389, -0.055...","[-0.13998565, 0.21335681, 0.119549684, -0.1903..."


In [52]:
split = in_vocab_replaced.shape[1] * 0.7

In [53]:
train_corpus = in_vocab_replaced.loc[:, :split - 1]
test_corpus = in_vocab_replaced.loc[:, split:]

In [54]:
print("corpus shape ", in_vocab_replaced.shape)
print("train_corpus shape ", train_corpus.shape)
print("test_corpus shape ", test_corpus.shape)

corpus shape  (28, 5013)
train_corpus shape  (28, 3509)
test_corpus shape  (28, 1503)


In [55]:
features=train_corpus[0][0].shape[0]
timesteps=train_corpus.shape[0]
elements=train_corpus.shape[1]
print("features ", features)
print("timesteps ", timesteps)
print("elements ", elements)

features  300
timesteps  28
elements  3509


In [56]:
wemb_x = np.array([np.concatenate(train_corpus[x].values) for x in train_corpus.columns])

In [57]:
wemb_test = np.array([np.concatenate(test_corpus[x].values) for x in test_corpus.columns])

In [58]:
tokenizer = tf.keras.preprocessing.text.Tokenizer()

In [59]:
tokenizer.fit_on_texts(data_corpus.content)

In [60]:
sequences = tokenizer.texts_to_sequences(data_corpus.content)

In [61]:
sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=28, padding='post')

In [62]:
sequences.shape

(5013, 28)

In [63]:
pd.DataFrame(tokenizer.word_index.items(), columns=["word", "idx"]).to_pickle('word_index.pkl')

## Get final train data

In [64]:
wemb_x.shape

(3509, 8400)

In [65]:
x_train = wemb_x.reshape(elements, timesteps, features)

In [66]:
x_train.shape

(3509, 28, 300)

In [67]:
x_test = wemb_test.reshape(test_corpus.shape[1], timesteps, features)

In [68]:
x_test.shape

(1503, 28, 300)

In [69]:
from sklearn.preprocessing import LabelEncoder

In [76]:
def get_Y(corpus, polarity_dim=polarity_dim):
    if polarity_dim == 2:
        encoder = LabelEncoder()
        encoder.fit(corpus.polarity.values)
        Y = encoder.transform(corpus.polarity.values)
        return Y
    else:
        return tf.keras.utils.to_categorical([x for x in corpus.polarity.values])

In [128]:
data_corpus.polarity.unique()

array([3, 1, 2])

In [86]:
Y = get_Y(data_corpus, polarity_dim)

In [87]:
Y.shape

(5013, 4)

In [119]:
y_train = Y[:3509]
y_test = Y[3510:]

In [120]:
print('y_train shape ', y_train.shape)
print('y_test shape ', y_test.shape)

y_train shape  (3509, 4)
y_test shape  (1503, 4)


In [91]:
y_train

array([[0., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       ...,
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.]], dtype=float32)

In [93]:
data_corpus[:3509].polarity.iplot(kind='histogram')

In [95]:
data_corpus[3509:].polarity.iplot(kind='histogram')

### Definición de funciones

In [96]:
colors=['red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue']

In [104]:
kf = KFold(n_splits=kfolds, shuffle=True, random_state=None) # realización de k-folds
folds = kf.split(x_train)
folds = pd.DataFrame([x for x in folds])
folds.to_pickle('folds_3_clases.pkl')
folds = pd.read_pickle('folds_3_clases.pkl')

In [105]:
folds = folds.values

In [107]:
def convert_to_df(hist):
    cols = pd.MultiIndex.from_product([["step1", "step2", "step3", "step4", "step5", "step6", "step7", "step8", "step9", "step10"], hist[0].history.keys()])
    hist_df = pd.concat([pd.DataFrame(x.history) for x in  hist], axis=1)
    hist_df.columns = cols
    hist_df.head()
    return hist_df

In [173]:
def compute_metrics(predictions, real):
    metrics = dict()
    real = [np.argmax(p) - 1 for p in real]
    bin_preds = [np.argmax(p) - 1 for p in predictions]
    metrics['mse'] = mean_squared_error(bin_preds, real)
    metrics['recall'] = recall_score(bin_preds, real, average='macro')
    metrics['f1'] = f1_score(bin_preds, real, average='macro')
    metrics['acc'] = accuracy_score(bin_preds, real)
    return metrics
    

In [130]:
predictions

NameError: name 'predictions' is not defined

In [131]:
def kfold_train(model_func, model_name, **params):
    evaluations = list()
    hists = list()
    i = 0
    for train_index, val_index in folds:
        i += 1
        model = model_func()
        train_x = x_train[train_index]
        train_y = y_train[train_index]
        val_x = x_train[val_index]
        val_y = y_train[val_index]
        
        logdir="logs/DL/"+ model_name+"/kfold" + str(i)
        print(logdir)
#         tensorboard_callback = TensorBoard(log_dir=logdir)

#         hist = model.fit(train_x, train_y, validation_data=(val_x, val_y), callbacks=[tensorboard_callback], **params)
        hist = model.fit(train_x, train_y, validation_data=(val_x, val_y), **params)
        
        hists.append(hist)
        evaluations.append(compute_metrics(model.predict(x_test), y_test))
    hist_df = convert_to_df(hists)
    hist_df.to_pickle('results/'+name+'/tweeter/' + base_dir + '/'+model_name+'_lstm.pkl')
    evas_df = pd.DataFrame.from_dict(evaluations)
    evas_df.to_pickle('results/'+name+'/tweeter/' + base_dir + '/'+model_name+'_lstm_evas.pkl')    
    return hist_df, evas_df

In [132]:
def compute_full_model(model_func, model_name, **params):
    model = model_func()
    model.fit(x_train, y_train, **params)
    model.save('results/'+name+'/tweeter/' + base_dir + '/' + model_name + '.h5' )
    preds = model.predict(x_test)
    pd.DataFrame(preds).to_pickle('results/'+name+'/tweeter/' + base_dir + '/' + model_name + '_preds.pkl')
    return preds

In [133]:
def plot_loss(hist_df):
    hist_df.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
    print('mean\n', hist_df.stack(level=0).mean())
    print('std\n', hist_df.stack(level=0).std())

In [134]:
def plot_val(evas_df):
    evas_df.iplot()
    print('media\n', evas_df.mean())
    print('std\n', evas_df.std())

In [135]:
def process_results(model_name):
    hist = pd.read_pickle('results/'+name+'/tweeter/' + base_dir + '/'+ model_name + '_lstm.pkl')
    evas = pd.read_pickle('results/'+name+'/tweeter/' + base_dir + '/'+ model_name + '_lstm_evas.pkl')
    plot_loss(hist)
    plot_val(evas)

### Definición de modelos

#### Linea base

In [238]:
def create_lstm_val():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
    return model

In [239]:
val_hist, val_evas = kfold_train(create_lstm_val, 'lstm_val', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/lstm_val/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 66s - loss: 1.3745 - acc: 0.5016 - val_loss: 1.3329 - val_acc: 0.4188
Epoch 2/30
3158/3158 - 3s - loss: 1.2837 - acc: 0.3949 - val_loss: 1.2457 - val_acc: 0.4188
Epoch 3/30
3158/3158 - 3s - loss: 1.2424 - acc: 0.3968 - val_loss: 1.1545 - val_acc: 0.4188
Epoch 4/30
3158/3158 - 1s - loss: 1.0202 - acc: 0.4411 - val_loss: 0.9758 - val_acc: 0.5413
Epoch 5/30
3158/3158 - 1s - loss: 0.9010 - acc: 0.6203 - val_loss: 0.8689 - val_acc: 0.6439
Epoch 6/30
3158/3158 - 1s - loss: 0.8049 - acc: 0.6631 - val_loss: 0.7445 - val_acc: 0.6895
Epoch 7/30
3158/3158 - 1s - loss: 0.6909 - acc: 0.7223 - val_loss: 0.6847 - val_acc: 0.7066
Epoch 8/30
3158/3158 - 1s - loss: 0.6089 - acc: 0.7644 - val_loss: 0.6567 - val_acc: 0.7407
Epoch 9/30
3158/3158 - 1s - loss: 0.5680 - acc: 0.7799 - val_loss: 0.6522 - val_acc: 0.7521
Epoch 10/30
3158/3158 - 1s - loss: 0.5644 - acc: 0.7825 - val_loss: 0.5746 - val_acc: 0.7949
Epoch 1

3158/3158 - 0s - loss: 0.2858 - acc: 0.8933 - val_loss: 0.9833 - val_acc: 0.7721
Epoch 28/30
3158/3158 - 0s - loss: 0.3330 - acc: 0.8749 - val_loss: 0.8722 - val_acc: 0.7778
Epoch 29/30
3158/3158 - 0s - loss: 0.3750 - acc: 0.8702 - val_loss: 1.0211 - val_acc: 0.7322
Epoch 30/30
3158/3158 - 0s - loss: 0.3932 - acc: 0.8512 - val_loss: 1.8861 - val_acc: 0.7863
logs/DL/lstm_val/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 64s - loss: 1.3742 - acc: 0.4937 - val_loss: 1.3305 - val_acc: 0.6011
Epoch 2/30
3158/3158 - 3s - loss: 1.2783 - acc: 0.5389 - val_loss: 1.1608 - val_acc: 0.6011
Epoch 3/30
3158/3158 - 3s - loss: 1.1031 - acc: 0.5408 - val_loss: 0.9560 - val_acc: 0.6011
Epoch 4/30
3158/3158 - 3s - loss: 0.9505 - acc: 0.5342 - val_loss: 0.8623 - val_acc: 0.6011
Epoch 5/30
3158/3158 - 3s - loss: 0.8952 - acc: 0.5513 - val_loss: 0.8346 - val_acc: 0.5954
Epoch 6/30
3158/3158 - 3s - loss: 0.8497 - acc: 0.5912 - val_loss: 0.7575 - val_acc: 0.6952
Epoch 7/30
3158/

3158/3158 - 3s - loss: 0.5569 - acc: 0.7809 - val_loss: 0.5888 - val_acc: 0.7607
Epoch 24/30
3158/3158 - 3s - loss: 0.5554 - acc: 0.7825 - val_loss: 0.5983 - val_acc: 0.7607
Epoch 25/30
3158/3158 - 3s - loss: 0.5552 - acc: 0.7780 - val_loss: 0.6147 - val_acc: 0.7464
Epoch 26/30
3158/3158 - 3s - loss: 0.5553 - acc: 0.7761 - val_loss: 0.6313 - val_acc: 0.7436
Epoch 27/30
3158/3158 - 3s - loss: 0.5533 - acc: 0.7777 - val_loss: 0.6378 - val_acc: 0.7436
Epoch 28/30
3158/3158 - 3s - loss: 0.5457 - acc: 0.7818 - val_loss: 0.6227 - val_acc: 0.7521
Epoch 29/30
3158/3158 - 3s - loss: 0.5330 - acc: 0.7907 - val_loss: 0.6122 - val_acc: 0.7493
Epoch 30/30
3158/3158 - 3s - loss: 0.5189 - acc: 0.7986 - val_loss: 0.6073 - val_acc: 0.7464
logs/DL/lstm_val/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 23s - loss: 1.3743 - acc: 0.5063 - val_loss: 1.3268 - val_acc: 0.5299
Epoch 2/30
3158/3158 - 3s - loss: 1.2740 - acc: 0.5456 - val_loss: 1.1582 - val_acc: 0.5299
Epoch 3/30
3

Epoch 20/30
3158/3158 - 3s - loss: 0.4934 - acc: 0.8040 - val_loss: 0.5774 - val_acc: 0.7664
Epoch 21/30
3158/3158 - 3s - loss: 0.4021 - acc: 0.8464 - val_loss: 0.6202 - val_acc: 0.7635
Epoch 22/30
3158/3158 - 3s - loss: 0.3832 - acc: 0.8562 - val_loss: 0.6624 - val_acc: 0.7835
Epoch 23/30
3158/3158 - 3s - loss: 0.3724 - acc: 0.8604 - val_loss: 0.6519 - val_acc: 0.7835
Epoch 24/30
3158/3158 - 3s - loss: 0.3576 - acc: 0.8651 - val_loss: 0.6784 - val_acc: 0.7635
Epoch 25/30
3158/3158 - 3s - loss: 0.3291 - acc: 0.8746 - val_loss: 0.7252 - val_acc: 0.7635
Epoch 26/30
3158/3158 - 3s - loss: 0.2974 - acc: 0.8825 - val_loss: 0.8257 - val_acc: 0.7578
Epoch 27/30
3158/3158 - 3s - loss: 0.2770 - acc: 0.8889 - val_loss: 0.8897 - val_acc: 0.7493
Epoch 28/30
3158/3158 - 3s - loss: 0.2459 - acc: 0.8968 - val_loss: 1.0978 - val_acc: 0.7578
Epoch 29/30
3158/3158 - 3s - loss: 0.2384 - acc: 0.8996 - val_loss: 1.1752 - val_acc: 0.7550
Epoch 30/30
3158/3158 - 3s - loss: 0.2463 - acc: 0.9018 - val_loss: 1.

In [240]:
process_results('lstm_val')

mean
 acc         0.771575
loss        1.719370
val_acc     0.714641
val_loss    2.202245
dtype: float64
std
 acc          0.132246
loss         9.240276
val_acc      0.095821
val_loss    12.842593
dtype: float64


media
 acc       0.705921
f1        0.488868
mse       0.933200
recall    0.490363
dtype: float64
std
 acc       0.095301
f1        0.070502
mse       0.386437
recall    0.079809
dtype: float64


In [241]:
val_pred = compute_full_model(create_lstm_val, 'lstm_val', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(val_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.8063872255489022,
 'recall': 0.49817507021690927,
 'f1': 0.5132004011493058,
 'acc': 0.7425149700598802}

La reducción de la variable loss en el conjunto de entrenamiento produce un efecto de sobreentrenamiento que provoca un mayor error en el conjunto de validación

Algunas de las opciones para reducir este efecto son:
* Reducir la complejidad de la red neuronal
* Aplicar alguna clase de regularización al modelo
* Buscar una topología que se adapte mejor al problema
* Obtener más datos

#### Reducción complejidad modelo

In [242]:
def create_simpler_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [243]:
simpler_hist, simpler_evas = kfold_train(create_simpler_model, 'lstm_simple', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/lstm_simple/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 46s - loss: 1.3803 - acc: 0.5336 - val_loss: 1.3717 - val_acc: 0.5185
Epoch 2/30
3158/3158 - 3s - loss: 1.3602 - acc: 0.5469 - val_loss: 1.3148 - val_acc: 0.5670
Epoch 3/30
3158/3158 - 3s - loss: 1.1928 - acc: 0.5114 - val_loss: 1.0130 - val_acc: 0.4188
Epoch 4/30
3158/3158 - 3s - loss: 0.9662 - acc: 0.4303 - val_loss: 0.9705 - val_acc: 0.4843
Epoch 5/30
3158/3158 - 3s - loss: 0.9147 - acc: 0.5142 - val_loss: 0.9434 - val_acc: 0.5157
Epoch 6/30
3158/3158 - 3s - loss: 0.9024 - acc: 0.5374 - val_loss: 0.9291 - val_acc: 0.5413
Epoch 7/30
3158/3158 - 3s - loss: 0.8859 - acc: 0.5475 - val_loss: 0.9189 - val_acc: 0.5442
Epoch 8/30
3158/3158 - 3s - loss: 0.8912 - acc: 0.5396 - val_loss: 0.9195 - val_acc: 0.5356
Epoch 9/30
3158/3158 - 3s - loss: 0.8844 - acc: 0.5564 - val_loss: 0.9027 - val_acc: 0.5783
Epoch 10/30
3158/3158 - 3s - loss: 0.8741 - acc: 0.5532 - val_loss: 0.9020 - val_acc: 0.5157
Epoc

Epoch 27/30
3158/3158 - 3s - loss: 0.7552 - acc: 0.6998 - val_loss: 0.7537 - val_acc: 0.6866
Epoch 28/30
3158/3158 - 3s - loss: 0.7394 - acc: 0.7033 - val_loss: 0.7425 - val_acc: 0.7094
Epoch 29/30
3158/3158 - 3s - loss: 0.7255 - acc: 0.7191 - val_loss: 0.7111 - val_acc: 0.7037
Epoch 30/30
3158/3158 - 3s - loss: 0.7016 - acc: 0.7201 - val_loss: 0.6907 - val_acc: 0.7151
logs/DL/lstm_simple/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 52s - loss: 1.3802 - acc: 0.5361 - val_loss: 1.3707 - val_acc: 0.6011
Epoch 2/30
3158/3158 - 3s - loss: 1.3622 - acc: 0.5374 - val_loss: 1.3437 - val_acc: 0.6011
Epoch 3/30
3158/3158 - 3s - loss: 1.3117 - acc: 0.5370 - val_loss: 1.1930 - val_acc: 0.6011
Epoch 4/30
3158/3158 - 3s - loss: 1.0719 - acc: 0.5323 - val_loss: 0.9544 - val_acc: 0.4046
Epoch 5/30
3158/3158 - 3s - loss: 0.9577 - acc: 0.5092 - val_loss: 0.8780 - val_acc: 0.6239
Epoch 6/30
3158/3158 - 3s - loss: 0.9313 - acc: 0.5440 - val_loss: 0.8565 - val_acc: 0.6296
E

Epoch 23/30
3158/3158 - 3s - loss: 0.5081 - acc: 0.8116 - val_loss: 0.6333 - val_acc: 0.7550
Epoch 24/30
3158/3158 - 3s - loss: 0.4581 - acc: 0.8322 - val_loss: 0.6237 - val_acc: 0.7692
Epoch 25/30
3158/3158 - 3s - loss: 0.4501 - acc: 0.8388 - val_loss: 0.6434 - val_acc: 0.7806
Epoch 26/30
3158/3158 - 3s - loss: 0.4353 - acc: 0.8445 - val_loss: 0.6754 - val_acc: 0.7749
Epoch 27/30
3158/3158 - 3s - loss: 0.4249 - acc: 0.8471 - val_loss: 0.7528 - val_acc: 0.7521
Epoch 28/30
3158/3158 - 3s - loss: 0.4249 - acc: 0.8496 - val_loss: 0.7153 - val_acc: 0.7664
Epoch 29/30
3158/3158 - 3s - loss: 0.4307 - acc: 0.8471 - val_loss: 0.6832 - val_acc: 0.7806
Epoch 30/30
3158/3158 - 3s - loss: 0.4316 - acc: 0.8531 - val_loss: 0.6113 - val_acc: 0.7721
logs/DL/lstm_simple/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 67s - loss: 1.3808 - acc: 0.5158 - val_loss: 1.3733 - val_acc: 0.4672
Epoch 2/30
3158/3158 - 3s - loss: 1.3656 - acc: 0.4772 - val_loss: 1.3617 - val_acc: 0.40

Epoch 19/30
3158/3158 - 3s - loss: 0.7668 - acc: 0.6571 - val_loss: 0.7555 - val_acc: 0.6952
Epoch 20/30
3158/3158 - 3s - loss: 0.7421 - acc: 0.6827 - val_loss: 0.7195 - val_acc: 0.7009
Epoch 21/30
3158/3158 - 3s - loss: 0.7074 - acc: 0.7128 - val_loss: 0.6984 - val_acc: 0.7094
Epoch 22/30
3158/3158 - 3s - loss: 0.6748 - acc: 0.7286 - val_loss: 0.6787 - val_acc: 0.7151
Epoch 23/30
3158/3158 - 3s - loss: 0.6539 - acc: 0.7378 - val_loss: 0.6659 - val_acc: 0.7379
Epoch 24/30
3158/3158 - 3s - loss: 0.6351 - acc: 0.7435 - val_loss: 0.6561 - val_acc: 0.7521
Epoch 25/30
3158/3158 - 3s - loss: 0.6178 - acc: 0.7530 - val_loss: 0.6509 - val_acc: 0.7379
Epoch 26/30
3158/3158 - 3s - loss: 0.6040 - acc: 0.7597 - val_loss: 0.6512 - val_acc: 0.7379
Epoch 27/30
3158/3158 - 2s - loss: 0.5952 - acc: 0.7641 - val_loss: 0.6414 - val_acc: 0.7692
Epoch 28/30
3158/3158 - 1s - loss: 0.5846 - acc: 0.7733 - val_loss: 0.6402 - val_acc: 0.7749
Epoch 29/30
3158/3158 - 1s - loss: 0.5758 - acc: 0.7787 - val_loss: 0.

In [244]:
process_results('lstm_simple')

mean
 acc         0.657681
loss        1.387907
val_acc     0.644913
val_loss    1.455271
dtype: float64
std
 acc         0.127786
loss        4.735591
val_acc     0.119262
val_loss    5.048748
dtype: float64


media
 acc       0.668729
f1        0.461952
mse       0.853826
recall    0.468828
dtype: float64
std
 acc       0.203504
f1        0.138010
mse       0.115464
recall    0.069861
dtype: float64


In [245]:
simpler_pred = compute_full_model(create_simpler_model, 'lstm_simple', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(simpler_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.7664670658682635,
 'recall': 0.5070165505846829,
 'f1': 0.5204446138563762,
 'acc': 0.7524950099800399}

La reducción de dimensionalidad de la red neuronal permite reducir la velocidad de aprendizaje sobre el conjunto de entrenamiento reduciendo así la perdida en el conjunto de validación.

Si lo compramos con el resultado anterior vemos que en general se reducen los picos negativos, en un 0.30 como minimo, sin embargo la convergencia sigue estando sobre 0.4 en los mejores casos

#### Regularización por dropout

In [246]:
def create_drop_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.7),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [247]:
drop_hist, drop_evas = kfold_train(create_drop_model, 'dropout_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)

logs/DL/dropout_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold2
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
logs/DL/dropout_lstm/kfold3
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold5
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold6
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30


Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold8
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30


Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold9
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold10
Train on 3159 samples, validate on 350 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30


Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [248]:
process_results('dropout_lstm')

mean
 acc         0.564700
loss        0.974106
val_acc     0.614227
val_loss    0.898497
dtype: float64
std
 acc         0.067233
loss        0.218324
val_acc     0.102782
val_loss    0.249355
dtype: float64


media
 acc       0.696407
f1        0.461742
mse       0.990818
recall    0.453802
dtype: float64
std
 acc       0.061429
f1        0.084195
mse       0.245717
recall    0.097699
dtype: float64


In [249]:
drop_pred = compute_full_model(create_drop_model, 'dropout_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(drop_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.9208250166333999,
 'recall': 0.4937753239072378,
 'f1': 0.49462390254469457,
 'acc': 0.7139055222887558}

Un valor tan alto de dropout (0.7) provoca peores resultados

In [250]:
def create_drop_model_2():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [251]:
drop2_hist, drop2_evas = kfold_train(create_drop_model_2, 'dropout2_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)

logs/DL/dropout2_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold2
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
logs/DL/dropout2_lstm/kfold3
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30


Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold5
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold6
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold8
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30


Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold9
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold10
Train on 3159 samples, validate on 350 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30


Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [252]:
process_results('dropout2_lstm')

mean
 acc         0.625194
loss        0.858744
val_acc     0.639819
val_loss    0.845117
dtype: float64
std
 acc         0.106437
loss        0.249825
val_acc     0.105757
val_loss    0.234881
dtype: float64


media
 acc       0.695476
f1        0.461811
mse       0.994544
recall    0.447763
dtype: float64
std
 acc       0.074270
f1        0.090149
mse       0.297082
recall    0.100226
dtype: float64


In [253]:
drop2_pred = compute_full_model(create_simpler_model, 'dropout2_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(drop2_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.7638057218895542,
 'recall': 0.49986985914026577,
 'f1': 0.5193671892488557,
 'acc': 0.7531603459747173}

Con un dropout de 0.2 se mejora la media de accuracy en 2 puntos y el f1 en casi 2 puntos también, sin embargo en el conjunto de test se reduce notablemente el resultado obtenido

#### Batch Normalization

In [254]:
def create_bn_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [255]:
bn_hist, bn_evas = kfold_train(create_bn_model, 'bn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)

logs/DL/bn_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold2
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
logs/DL/bn_lstm/kfold3
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold5
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold6
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold8
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30


Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold9
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold10
Train on 3159 samples, validate on 350 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30


Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [256]:
process_results('bn_lstm')

mean
 acc         0.770882
loss        0.768122
val_acc     0.730880
val_loss    0.930276
dtype: float64
std
 acc         0.096310
loss        0.293716
val_acc     0.078869
val_loss    0.248393
dtype: float64


media
 acc       0.756620
f1        0.520680
mse       0.749967
recall    0.504872
dtype: float64
std
 acc       0.011930
f1        0.009517
mse       0.047720
recall    0.006156
dtype: float64


In [257]:
bn_pred = compute_full_model(create_bn_model, 'bn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(bn_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.6892880904856953,
 'recall': 0.511508003493501,
 'f1': 0.5302065482569778,
 'acc': 0.7717897538256819}

#### Weights initialization

Probamos la inicialización de pesos por el algoritmo Xavier (glorot_normal)

In [258]:
def glorot_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='glorot_normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='glorot_normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [259]:
gl_hist, gl_evas = kfold_train(glorot_model, 'glorot_lstm', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/glorot_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 32s - loss: 1.3545 - acc: 0.4731 - val_loss: 1.3509 - val_acc: 0.5071
Epoch 2/30
3158/3158 - 1s - loss: 1.2855 - acc: 0.5187 - val_loss: 1.3118 - val_acc: 0.5670
Epoch 3/30
3158/3158 - 1s - loss: 1.1926 - acc: 0.6023 - val_loss: 1.2680 - val_acc: 0.7037
Epoch 4/30
3158/3158 - 1s - loss: 1.1001 - acc: 0.6653 - val_loss: 1.2248 - val_acc: 0.7379
Epoch 5/30
3158/3158 - 1s - loss: 0.9900 - acc: 0.7058 - val_loss: 1.1796 - val_acc: 0.7464
Epoch 6/30
3158/3158 - 1s - loss: 0.9073 - acc: 0.7413 - val_loss: 1.1316 - val_acc: 0.7607
Epoch 7/30
3158/3158 - 1s - loss: 0.8291 - acc: 0.7590 - val_loss: 1.0908 - val_acc: 0.7607
Epoch 8/30
3158/3158 - 1s - loss: 0.7627 - acc: 0.7768 - val_loss: 1.0415 - val_acc: 0.7692
Epoch 9/30
3158/3158 - 1s - loss: 0.7175 - acc: 0.7920 - val_loss: 1.0000 - val_acc: 0.7721
Epoch 10/30
3158/3158 - 1s - loss: 0.6862 - acc: 0.7980 - val_loss: 0.9654 - val_acc: 0.7749
Epoc

Epoch 27/30
3158/3158 - 3s - loss: 0.4268 - acc: 0.8689 - val_loss: 0.6359 - val_acc: 0.7721
Epoch 28/30
3158/3158 - 3s - loss: 0.4055 - acc: 0.8771 - val_loss: 0.6112 - val_acc: 0.8006
Epoch 29/30
3158/3158 - 3s - loss: 0.3934 - acc: 0.8828 - val_loss: 0.6278 - val_acc: 0.7835
Epoch 30/30
3158/3158 - 3s - loss: 0.3810 - acc: 0.8876 - val_loss: 0.6092 - val_acc: 0.7977
logs/DL/glorot_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 90s - loss: 1.3765 - acc: 0.4443 - val_loss: 1.3610 - val_acc: 0.6011
Epoch 2/30
3158/3158 - 3s - loss: 1.3298 - acc: 0.5250 - val_loss: 1.3332 - val_acc: 0.6325
Epoch 3/30
3158/3158 - 3s - loss: 1.2784 - acc: 0.5617 - val_loss: 1.3021 - val_acc: 0.6353
Epoch 4/30
3158/3158 - 3s - loss: 1.2288 - acc: 0.5484 - val_loss: 1.2710 - val_acc: 0.6410
Epoch 5/30
3158/3158 - 3s - loss: 1.1846 - acc: 0.5671 - val_loss: 1.2386 - val_acc: 0.6439
Epoch 6/30
3158/3158 - 3s - loss: 1.1258 - acc: 0.6007 - val_loss: 1.2051 - val_acc: 0.6553
E

Epoch 23/30
3158/3158 - 3s - loss: 0.5491 - acc: 0.8141 - val_loss: 0.7342 - val_acc: 0.7892
Epoch 24/30
3158/3158 - 3s - loss: 0.5351 - acc: 0.8138 - val_loss: 0.7186 - val_acc: 0.7892
Epoch 25/30
3158/3158 - 3s - loss: 0.5239 - acc: 0.8268 - val_loss: 0.7029 - val_acc: 0.7892
Epoch 26/30
3158/3158 - 3s - loss: 0.5121 - acc: 0.8249 - val_loss: 0.6953 - val_acc: 0.7863
Epoch 27/30
3158/3158 - 3s - loss: 0.4973 - acc: 0.8303 - val_loss: 0.7048 - val_acc: 0.7778
Epoch 28/30
3158/3158 - 3s - loss: 0.4918 - acc: 0.8265 - val_loss: 0.7319 - val_acc: 0.7749
Epoch 29/30
3158/3158 - 3s - loss: 0.4792 - acc: 0.8338 - val_loss: 0.7726 - val_acc: 0.7464
Epoch 30/30
3158/3158 - 3s - loss: 0.4947 - acc: 0.8315 - val_loss: 0.6175 - val_acc: 0.7835
logs/DL/glorot_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 51s - loss: 1.3754 - acc: 0.5142 - val_loss: 1.3524 - val_acc: 0.5442
Epoch 2/30
3158/3158 - 1s - loss: 1.3044 - acc: 0.5877 - val_loss: 1.3168 - val_acc: 0.59

Epoch 19/30
3158/3158 - 3s - loss: 0.6160 - acc: 0.7977 - val_loss: 0.7468 - val_acc: 0.7664
Epoch 20/30
3158/3158 - 3s - loss: 0.5831 - acc: 0.8094 - val_loss: 0.7358 - val_acc: 0.7521
Epoch 21/30
3158/3158 - 3s - loss: 0.5676 - acc: 0.8129 - val_loss: 0.7063 - val_acc: 0.7692
Epoch 22/30
3158/3158 - 3s - loss: 0.5542 - acc: 0.8170 - val_loss: 0.7037 - val_acc: 0.7692
Epoch 23/30
3158/3158 - 3s - loss: 0.5393 - acc: 0.8182 - val_loss: 0.7029 - val_acc: 0.7721
Epoch 24/30
3158/3158 - 3s - loss: 0.5338 - acc: 0.8195 - val_loss: 0.7018 - val_acc: 0.7721
Epoch 25/30
3158/3158 - 3s - loss: 0.5121 - acc: 0.8262 - val_loss: 0.7038 - val_acc: 0.7778
Epoch 26/30
3158/3158 - 3s - loss: 0.4954 - acc: 0.8322 - val_loss: 0.7195 - val_acc: 0.7664
Epoch 27/30
3158/3158 - 3s - loss: 0.4850 - acc: 0.8385 - val_loss: 0.7273 - val_acc: 0.7692
Epoch 28/30
3158/3158 - 3s - loss: 0.4715 - acc: 0.8388 - val_loss: 0.7153 - val_acc: 0.7578
Epoch 29/30
3158/3158 - 3s - loss: 0.4653 - acc: 0.8388 - val_loss: 0.

In [260]:
process_results('glorot_lstm')

mean
 acc         0.757019
loss        0.736413
val_acc     0.713525
val_loss    0.919137
dtype: float64
std
 acc         0.113294
loss        0.283448
val_acc     0.099437
val_loss    0.223512
dtype: float64


media
 acc       0.756021
f1        0.521104
mse       0.752362
recall    0.503337
dtype: float64
std
 acc       0.012917
f1        0.008283
mse       0.051667
recall    0.006569
dtype: float64


In [261]:
gl_pred = compute_full_model(create_bn_model, 'glorot_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(gl_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.7132401862940785,
 'recall': 0.5068307993758845,
 'f1': 0.5265139072670454,
 'acc': 0.7658017298735862}

In [262]:
def glorot_model_wo_bn():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='glorot_normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='glorot_normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [263]:
gl__wobn_hist, gl_wobn_evas = kfold_train(glorot_model_wo_bn, 'glorot__wobn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/glorot__wobn_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 66s - loss: 1.3765 - acc: 0.4069 - val_loss: 1.3572 - val_acc: 0.4188
Epoch 2/30
3158/3158 - 1s - loss: 1.3108 - acc: 0.4060 - val_loss: 1.2455 - val_acc: 0.4188
Epoch 3/30
3158/3158 - 1s - loss: 1.2266 - acc: 0.4186 - val_loss: 1.1340 - val_acc: 0.4188
Epoch 4/30
3158/3158 - 1s - loss: 1.1475 - acc: 0.4246 - val_loss: 1.0552 - val_acc: 0.4330
Epoch 5/30
3158/3158 - 1s - loss: 1.0443 - acc: 0.4582 - val_loss: 0.9537 - val_acc: 0.6325
Epoch 6/30
3158/3158 - 1s - loss: 0.9663 - acc: 0.6048 - val_loss: 0.8775 - val_acc: 0.7236
Epoch 7/30
3158/3158 - 1s - loss: 0.8980 - acc: 0.6463 - val_loss: 0.8044 - val_acc: 0.7350
Epoch 8/30
3158/3158 - 1s - loss: 0.8411 - acc: 0.6824 - val_loss: 0.7526 - val_acc: 0.7322
Epoch 9/30
3158/3158 - 1s - loss: 0.8044 - acc: 0.6859 - val_loss: 0.7179 - val_acc: 0.7379
Epoch 10/30
3158/3158 - 1s - loss: 0.7570 - acc: 0.6995 - val_loss: 0.6764 - val_acc: 0.763

Epoch 27/30
3158/3158 - 1s - loss: 0.6665 - acc: 0.7394 - val_loss: 0.6984 - val_acc: 0.7493
Epoch 28/30
3158/3158 - 1s - loss: 0.6435 - acc: 0.7616 - val_loss: 0.6923 - val_acc: 0.7664
Epoch 29/30
3158/3158 - 1s - loss: 0.6386 - acc: 0.7571 - val_loss: 0.6806 - val_acc: 0.7664
Epoch 30/30
3158/3158 - 1s - loss: 0.6366 - acc: 0.7663 - val_loss: 0.6699 - val_acc: 0.7721
logs/DL/glorot__wobn_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 18s - loss: 1.3802 - acc: 0.5149 - val_loss: 1.3710 - val_acc: 0.6011
Epoch 2/30
3158/3158 - 1s - loss: 1.3654 - acc: 0.5342 - val_loss: 1.3544 - val_acc: 0.6011
Epoch 3/30
3158/3158 - 1s - loss: 1.3492 - acc: 0.5298 - val_loss: 1.3354 - val_acc: 0.6011
Epoch 4/30
3158/3158 - 1s - loss: 1.3295 - acc: 0.5307 - val_loss: 1.3080 - val_acc: 0.6011
Epoch 5/30
3158/3158 - 1s - loss: 1.3017 - acc: 0.5393 - val_loss: 1.2623 - val_acc: 0.6011
Epoch 6/30
3158/3158 - 1s - loss: 1.2678 - acc: 0.5374 - val_loss: 1.2319 - val_acc: 0.

Epoch 23/30
3158/3158 - 1s - loss: 0.6299 - acc: 0.7384 - val_loss: 0.5827 - val_acc: 0.7949
Epoch 24/30
3158/3158 - 1s - loss: 0.6068 - acc: 0.7533 - val_loss: 0.5724 - val_acc: 0.7920
Epoch 25/30
3158/3158 - 1s - loss: 0.5761 - acc: 0.7622 - val_loss: 0.5713 - val_acc: 0.7920
Epoch 26/30
3158/3158 - 1s - loss: 0.5745 - acc: 0.7647 - val_loss: 0.5592 - val_acc: 0.7920
Epoch 27/30
3158/3158 - 1s - loss: 0.5598 - acc: 0.7742 - val_loss: 0.5499 - val_acc: 0.8006
Epoch 28/30
3158/3158 - 1s - loss: 0.5540 - acc: 0.7714 - val_loss: 0.5453 - val_acc: 0.8006
Epoch 29/30
3158/3158 - 1s - loss: 0.5444 - acc: 0.7783 - val_loss: 0.5549 - val_acc: 0.7920
Epoch 30/30
3158/3158 - 1s - loss: 0.5424 - acc: 0.7752 - val_loss: 0.5584 - val_acc: 0.7863
logs/DL/glorot__wobn_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 18s - loss: 1.3774 - acc: 0.5386 - val_loss: 1.3642 - val_acc: 0.5299
Epoch 2/30
3158/3158 - 1s - loss: 1.3507 - acc: 0.5434 - val_loss: 1.3190 - val_acc

Epoch 19/30
3158/3158 - 1s - loss: 0.8235 - acc: 0.5427 - val_loss: 0.8312 - val_acc: 0.5442
Epoch 20/30
3158/3158 - 1s - loss: 0.8030 - acc: 0.5494 - val_loss: 0.8327 - val_acc: 0.5442
Epoch 21/30
3158/3158 - 1s - loss: 0.7880 - acc: 0.5560 - val_loss: 0.8284 - val_acc: 0.5442
Epoch 22/30
3158/3158 - 1s - loss: 0.7797 - acc: 0.5674 - val_loss: 0.8301 - val_acc: 0.5442
Epoch 23/30
3158/3158 - 1s - loss: 0.7652 - acc: 0.5814 - val_loss: 0.8358 - val_acc: 0.5641
Epoch 24/30
3158/3158 - 1s - loss: 0.7505 - acc: 0.5928 - val_loss: 0.8450 - val_acc: 0.5926
Epoch 25/30
3158/3158 - 1s - loss: 0.7402 - acc: 0.6184 - val_loss: 0.8440 - val_acc: 0.6382
Epoch 26/30
3158/3158 - 1s - loss: 0.7233 - acc: 0.6438 - val_loss: 0.8577 - val_acc: 0.6724
Epoch 27/30
3158/3158 - 2s - loss: 0.7073 - acc: 0.6704 - val_loss: 0.8521 - val_acc: 0.7236
Epoch 28/30
3158/3158 - 2s - loss: 0.6922 - acc: 0.7001 - val_loss: 0.8426 - val_acc: 0.7322
Epoch 29/30
3158/3158 - 2s - loss: 0.6811 - acc: 0.7141 - val_loss: 0.

In [264]:
process_results('glorot__wobn_lstm')

mean
 acc         0.623706
loss        0.900176
val_acc     0.645710
val_loss    0.896537
dtype: float64
std
 acc         0.111341
loss        0.272639
val_acc     0.109510
val_loss    0.237901
dtype: float64


media
 acc       0.726946
f1        0.496822
mse       0.868663
recall    0.487015
dtype: float64
std
 acc       0.052915
f1        0.051653
mse       0.211659
recall    0.035410
dtype: float64


In [265]:
gl_wobn_pred = compute_full_model(glorot_model_wo_bn, 'glorot__wobn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(gl_wobn_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.8090485695276114,
 'recall': 0.5003367711310521,
 'f1': 0.5131251925628079,
 'acc': 0.7418496340652029}

## Pruebas con topologías

### 2 lstm

In [266]:
def double_lstm_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='glorot_normal', activation='relu', return_sequences=True, input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.LSTM(10, activation='relu', name='2lstm'),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='glorot_normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [267]:
double_hist, double_evas = kfold_train(double_lstm_model, 'double_lstm', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/double_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 99s - loss: 1.3797 - acc: 0.3841 - val_loss: 1.3699 - val_acc: 0.4188
Epoch 2/30
3158/3158 - 6s - loss: 1.3610 - acc: 0.3968 - val_loss: 1.3416 - val_acc: 0.4188
Epoch 3/30
3158/3158 - 6s - loss: 1.3003 - acc: 0.3968 - val_loss: 1.1931 - val_acc: 0.4188
Epoch 4/30
3158/3158 - 6s - loss: 1.2246 - acc: 0.4284 - val_loss: 1.1488 - val_acc: 0.4900
Epoch 5/30
3158/3158 - 6s - loss: 1.1227 - acc: 0.5342 - val_loss: 1.0228 - val_acc: 0.5299
Epoch 6/30
3158/3158 - 6s - loss: 0.9939 - acc: 0.6222 - val_loss: 0.9331 - val_acc: 0.6866
Epoch 7/30
3158/3158 - 6s - loss: 0.8907 - acc: 0.6884 - val_loss: 0.8670 - val_acc: 0.7407
Epoch 8/30
3158/3158 - 6s - loss: 0.8173 - acc: 0.7042 - val_loss: 0.7940 - val_acc: 0.7265
Epoch 9/30
3158/3158 - 6s - loss: 0.7931 - acc: 0.6897 - val_loss: 0.7613 - val_acc: 0.7123
Epoch 10/30
3158/3158 - 6s - loss: 0.7372 - acc: 0.7090 - val_loss: 0.7317 - val_acc: 0.7607
Epoc

Epoch 27/30
3158/3158 - 1s - loss: 0.5362 - acc: 0.7945 - val_loss: 0.6225 - val_acc: 0.7692
Epoch 28/30
3158/3158 - 1s - loss: 0.5417 - acc: 0.7904 - val_loss: 0.6245 - val_acc: 0.7806
Epoch 29/30
3158/3158 - 1s - loss: 0.5364 - acc: 0.7932 - val_loss: 0.6130 - val_acc: 0.7692
Epoch 30/30
3158/3158 - 1s - loss: 0.5410 - acc: 0.7875 - val_loss: 0.6206 - val_acc: 0.7749
logs/DL/double_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 27s - loss: 1.3783 - acc: 0.5168 - val_loss: 1.3644 - val_acc: 0.6040
Epoch 2/30
3158/3158 - 4s - loss: 1.3556 - acc: 0.5408 - val_loss: 1.3331 - val_acc: 0.6211
Epoch 3/30
3158/3158 - 6s - loss: 1.3281 - acc: 0.5652 - val_loss: 1.2950 - val_acc: 0.6154
Epoch 4/30
3158/3158 - 6s - loss: 1.2927 - acc: 0.5405 - val_loss: 1.2560 - val_acc: 0.6011
Epoch 5/30
3158/3158 - 6s - loss: 1.2520 - acc: 0.5386 - val_loss: 1.1915 - val_acc: 0.6011
Epoch 6/30
3158/3158 - 6s - loss: 1.1813 - acc: 0.5408 - val_loss: 1.0526 - val_acc: 0.6239
E

Epoch 23/30
3158/3158 - 1s - loss: 0.5416 - acc: 0.7973 - val_loss: 0.5347 - val_acc: 0.7920
Epoch 24/30
3158/3158 - 1s - loss: 0.5446 - acc: 0.7961 - val_loss: 0.5486 - val_acc: 0.7892
Epoch 25/30
3158/3158 - 1s - loss: 0.5500 - acc: 0.7977 - val_loss: 0.5740 - val_acc: 0.7806
Epoch 26/30
3158/3158 - 1s - loss: 0.5441 - acc: 0.8011 - val_loss: 0.5810 - val_acc: 0.7692
Epoch 27/30
3158/3158 - 1s - loss: 0.5440 - acc: 0.7964 - val_loss: 0.5771 - val_acc: 0.7835
Epoch 28/30
3158/3158 - 2s - loss: 0.5379 - acc: 0.8018 - val_loss: 0.5554 - val_acc: 0.7920
Epoch 29/30
3158/3158 - 3s - loss: 0.5229 - acc: 0.8075 - val_loss: 0.5386 - val_acc: 0.7920
Epoch 30/30
3158/3158 - 3s - loss: 0.5072 - acc: 0.8170 - val_loss: 0.5390 - val_acc: 0.7920
logs/DL/double_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 50s - loss: 1.3771 - acc: 0.5016 - val_loss: 1.3642 - val_acc: 0.5299
Epoch 2/30
3158/3158 - 6s - loss: 1.3498 - acc: 0.5453 - val_loss: 1.3244 - val_acc: 0.52

Epoch 19/30
3158/3158 - 6s - loss: 0.5565 - acc: 0.7958 - val_loss: 0.6349 - val_acc: 0.7407
Epoch 20/30
3158/3158 - 6s - loss: 0.5510 - acc: 0.7904 - val_loss: 0.6315 - val_acc: 0.7407
Epoch 21/30
3158/3158 - 6s - loss: 0.5302 - acc: 0.8094 - val_loss: 0.6463 - val_acc: 0.7464
Epoch 22/30
3158/3158 - 6s - loss: 0.5246 - acc: 0.8100 - val_loss: 0.6346 - val_acc: 0.7464
Epoch 23/30
3158/3158 - 6s - loss: 0.5127 - acc: 0.8106 - val_loss: 0.6449 - val_acc: 0.7436
Epoch 24/30
3158/3158 - 6s - loss: 0.5066 - acc: 0.8170 - val_loss: 0.6559 - val_acc: 0.7379
Epoch 25/30
3158/3158 - 6s - loss: 0.4923 - acc: 0.8211 - val_loss: 0.6644 - val_acc: 0.7493
Epoch 26/30
3158/3158 - 6s - loss: 0.4916 - acc: 0.8293 - val_loss: 0.6545 - val_acc: 0.7493
Epoch 27/30
3158/3158 - 6s - loss: 0.4755 - acc: 0.8309 - val_loss: 0.6802 - val_acc: 0.7521
Epoch 28/30
3158/3158 - 6s - loss: 0.4776 - acc: 0.8281 - val_loss: 0.6841 - val_acc: 0.7493
Epoch 29/30
3158/3158 - 6s - loss: 0.4737 - acc: 0.8357 - val_loss: 0.

In [268]:
process_results('double_lstm')

mean
 acc         0.691916
loss        0.759822
val_acc     0.680806
val_loss    0.804116
dtype: float64
std
 acc         0.132092
loss        0.284650
val_acc     0.112107
val_loss    0.231751
dtype: float64


media
 acc       0.754225
f1        0.518277
mse       0.759548
recall    0.503700
dtype: float64
std
 acc       0.008504
f1        0.008352
mse       0.034017
recall    0.002963
dtype: float64


In [269]:
double_pred = compute_full_model(double_lstm_model, 'double_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(double_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.8117099135063207,
 'recall': 0.4901140086676599,
 'f1': 0.5096701657296863,
 'acc': 0.7411842980705257}

### Double lstm fine tune

### Convolucional

In [270]:
def convolutional_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Reshape((28, 300, 1), input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Conv2D(128, (4, 300), padding='same', name='conv_layer'),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(16,16), strides=None),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [271]:
convolutional_model().summary()

Model: "sequential_256"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (Reshape)               (None, 28, 300, 1)        0         
_________________________________________________________________
conv_layer (Conv2D)          (None, 28, 300, 128)      153728    
_________________________________________________________________
activation_61 (Activation)   (None, 28, 300, 128)      0         
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 1, 18, 128)        0         
_________________________________________________________________
flatten_49 (Flatten)         (None, 2304)              0         
_________________________________________________________________
dropout_192 (Dropout)        (None, 2304)              0         
_________________________________________________________________
dense (Dense)                (None, 4)              

In [272]:
conv_hist, conv_evas = kfold_train(convolutional_model, 'convolutional', batch_size=8, epochs=30, shuffle=False, verbose=2)

logs/DL/convolutional/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 100s - loss: 0.7086 - acc: 0.7087 - val_loss: 0.6766 - val_acc: 0.7635
Epoch 2/30
3158/3158 - 27s - loss: 0.5373 - acc: 0.7980 - val_loss: 0.6503 - val_acc: 0.7664
Epoch 3/30
3158/3158 - 20s - loss: 0.3752 - acc: 0.8664 - val_loss: 0.6526 - val_acc: 0.7635
Epoch 4/30
3158/3158 - 14s - loss: 0.2601 - acc: 0.9009 - val_loss: 1.0542 - val_acc: 0.6895
Epoch 5/30
3158/3158 - 14s - loss: 0.2289 - acc: 0.9142 - val_loss: 1.4278 - val_acc: 0.6467
Epoch 6/30
3158/3158 - 14s - loss: 0.2107 - acc: 0.9234 - val_loss: 1.0709 - val_acc: 0.7208
Epoch 7/30
3158/3158 - 14s - loss: 0.2254 - acc: 0.9227 - val_loss: 0.9322 - val_acc: 0.7379
Epoch 8/30
3158/3158 - 14s - loss: 0.2327 - acc: 0.9208 - val_loss: 1.0013 - val_acc: 0.7749
Epoch 9/30
3158/3158 - 14s - loss: 0.1217 - acc: 0.9557 - val_loss: 1.1238 - val_acc: 0.7607
Epoch 10/30
3158/3158 - 14s - loss: 0.0695 - acc: 0.9778 - val_loss: 0.9298 - val_acc:

3158/3158 - 16s - loss: 0.0217 - acc: 0.9943 - val_loss: 1.2894 - val_acc: 0.7749
Epoch 27/30
3158/3158 - 26s - loss: 0.0154 - acc: 0.9934 - val_loss: 1.1104 - val_acc: 0.7635
Epoch 28/30
3158/3158 - 27s - loss: 0.0288 - acc: 0.9896 - val_loss: 1.6813 - val_acc: 0.7379
Epoch 29/30
3158/3158 - 27s - loss: 0.0123 - acc: 0.9962 - val_loss: 1.7529 - val_acc: 0.6838
Epoch 30/30
3158/3158 - 27s - loss: 0.0253 - acc: 0.9921 - val_loss: 2.1714 - val_acc: 0.7179
logs/DL/convolutional/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 61s - loss: 0.7126 - acc: 0.7220 - val_loss: 0.5532 - val_acc: 0.7863
Epoch 2/30
3158/3158 - 14s - loss: 0.5281 - acc: 0.8002 - val_loss: 0.5660 - val_acc: 0.7949
Epoch 3/30
3158/3158 - 22s - loss: 0.3593 - acc: 0.8680 - val_loss: 0.7453 - val_acc: 0.7236
Epoch 4/30
3158/3158 - 27s - loss: 0.2550 - acc: 0.9123 - val_loss: 0.8781 - val_acc: 0.7464
Epoch 5/30
3158/3158 - 27s - loss: 0.2416 - acc: 0.9117 - val_loss: 0.8510 - val_acc: 0.7692
E

Epoch 22/30
3158/3158 - 17s - loss: 0.0459 - acc: 0.9886 - val_loss: 1.1206 - val_acc: 0.7749
Epoch 23/30
3158/3158 - 16s - loss: 0.0504 - acc: 0.9851 - val_loss: 0.9412 - val_acc: 0.7835
Epoch 24/30
3158/3158 - 15s - loss: 0.0426 - acc: 0.9861 - val_loss: 1.0377 - val_acc: 0.7920
Epoch 25/30
3158/3158 - 14s - loss: 0.0672 - acc: 0.9788 - val_loss: 1.2738 - val_acc: 0.7863
Epoch 26/30
3158/3158 - 18s - loss: 0.0386 - acc: 0.9880 - val_loss: 1.1648 - val_acc: 0.7863
Epoch 27/30
3158/3158 - 28s - loss: 0.0313 - acc: 0.9915 - val_loss: 1.0182 - val_acc: 0.7607
Epoch 28/30
3158/3158 - 28s - loss: 0.0376 - acc: 0.9867 - val_loss: 1.1351 - val_acc: 0.7949
Epoch 29/30
3158/3158 - 28s - loss: 0.0427 - acc: 0.9851 - val_loss: 1.0603 - val_acc: 0.7949
Epoch 30/30
3158/3158 - 28s - loss: 0.0314 - acc: 0.9908 - val_loss: 1.0987 - val_acc: 0.8034
logs/DL/convolutional/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 60s - loss: 0.7069 - acc: 0.7185 - val_loss: 0.7683 - v

3158/3158 - 14s - loss: 0.0328 - acc: 0.9886 - val_loss: 1.5214 - val_acc: 0.7236
Epoch 18/30
3158/3158 - 14s - loss: 0.0357 - acc: 0.9892 - val_loss: 1.0583 - val_acc: 0.7749
Epoch 19/30
3158/3158 - 14s - loss: 0.0495 - acc: 0.9858 - val_loss: 1.4357 - val_acc: 0.7407
Epoch 20/30
3158/3158 - 14s - loss: 0.0730 - acc: 0.9763 - val_loss: 1.6612 - val_acc: 0.7037
Epoch 21/30
3158/3158 - 14s - loss: 0.0400 - acc: 0.9883 - val_loss: 1.3409 - val_acc: 0.7778
Epoch 22/30
3158/3158 - 18s - loss: 0.0748 - acc: 0.9753 - val_loss: 1.4001 - val_acc: 0.7607
Epoch 23/30
3158/3158 - 27s - loss: 0.0778 - acc: 0.9763 - val_loss: 1.2547 - val_acc: 0.7635
Epoch 24/30
3158/3158 - 27s - loss: 0.0381 - acc: 0.9861 - val_loss: 1.3444 - val_acc: 0.7835
Epoch 25/30
3158/3158 - 28s - loss: 0.0244 - acc: 0.9927 - val_loss: 1.7939 - val_acc: 0.7521
Epoch 26/30
3158/3158 - 28s - loss: 0.0245 - acc: 0.9930 - val_loss: 1.4402 - val_acc: 0.7892
Epoch 27/30
3158/3158 - 27s - loss: 0.0141 - acc: 0.9962 - val_loss: 1.7

In [273]:
process_results('convolutional')

mean
 acc         0.952942
loss        0.132164
val_acc     0.754518
val_loss    1.124048
dtype: float64
std
 acc         0.063006
loss        0.160757
val_acc     0.042134
val_loss    0.327676
dtype: float64


media
 acc       0.747638
f1        0.522833
mse       0.758949
recall    0.533047
dtype: float64
std
 acc       0.015292
f1        0.017070
mse       0.062578
recall    0.034583
dtype: float64


In [274]:
conv_pred = compute_full_model(convolutional_model, 'convolutional', batch_size=8, epochs=30, shuffle=False, verbose=1)
compute_metrics(conv_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.7764471057884231,
 'recall': 0.5376819950851736,
 'f1': 0.5192534727272653,
 'acc': 0.7325349301397206}

### Convolutional 1D

In [275]:
def convolutional1d_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(4, 300, input_shape=(28, 300), padding='same', name='conv_layer'),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(pool_size=(4), strides=None),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(4, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [276]:
convolutional1d_model().summary()

Model: "sequential_268"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_layer (Conv1D)          (None, 28, 4)             360004    
_________________________________________________________________
activation_73 (Activation)   (None, 28, 4)             0         
_________________________________________________________________
max_pooling1d_25 (MaxPooling (None, 7, 4)              0         
_________________________________________________________________
flatten_61 (Flatten)         (None, 28)                0         
_________________________________________________________________
dropout_204 (Dropout)        (None, 28)                0         
_________________________________________________________________
dense (Dense)                (None, 4)                 116       
Total params: 360,120
Trainable params: 360,120
Non-trainable params: 0
______________________________________________

In [277]:
conv1d_hist, conv1d_evas = kfold_train(convolutional1d_model, 'convolutional1d', batch_size=8, epochs=30, shuffle=False, verbose=2)

logs/DL/convolutional1d/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 78s - loss: 0.7561 - acc: 0.6799 - val_loss: 0.6552 - val_acc: 0.7236
Epoch 2/30
3158/3158 - 19s - loss: 0.5497 - acc: 0.7932 - val_loss: 0.6252 - val_acc: 0.7578
Epoch 3/30
3158/3158 - 19s - loss: 0.4457 - acc: 0.8338 - val_loss: 0.6380 - val_acc: 0.7692
Epoch 4/30
3158/3158 - 19s - loss: 0.3533 - acc: 0.8797 - val_loss: 0.6928 - val_acc: 0.7550
Epoch 5/30
3158/3158 - 8s - loss: 0.2663 - acc: 0.9047 - val_loss: 0.8201 - val_acc: 0.7179
Epoch 6/30
3158/3158 - 8s - loss: 0.1989 - acc: 0.9376 - val_loss: 0.8267 - val_acc: 0.7236
Epoch 7/30
3158/3158 - 8s - loss: 0.1527 - acc: 0.9531 - val_loss: 0.8355 - val_acc: 0.7493
Epoch 8/30
3158/3158 - 8s - loss: 0.1259 - acc: 0.9645 - val_loss: 0.9024 - val_acc: 0.7350
Epoch 9/30
3158/3158 - 8s - loss: 0.0991 - acc: 0.9728 - val_loss: 0.8623 - val_acc: 0.7578
Epoch 10/30
3158/3158 - 8s - loss: 0.0814 - acc: 0.9772 - val_loss: 0.9341 - val_acc: 0.75

Epoch 27/30
3158/3158 - 8s - loss: 0.0284 - acc: 0.9911 - val_loss: 1.2995 - val_acc: 0.7550
Epoch 28/30
3158/3158 - 8s - loss: 0.0213 - acc: 0.9946 - val_loss: 1.3855 - val_acc: 0.7721
Epoch 29/30
3158/3158 - 8s - loss: 0.0285 - acc: 0.9934 - val_loss: 1.4298 - val_acc: 0.7550
Epoch 30/30
3158/3158 - 8s - loss: 0.0226 - acc: 0.9934 - val_loss: 1.4999 - val_acc: 0.7464
logs/DL/convolutional1d/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 55s - loss: 0.7557 - acc: 0.6856 - val_loss: 0.5791 - val_acc: 0.7692
Epoch 2/30
3158/3158 - 12s - loss: 0.5510 - acc: 0.7920 - val_loss: 0.5727 - val_acc: 0.7863
Epoch 3/30
3158/3158 - 12s - loss: 0.4614 - acc: 0.8293 - val_loss: 0.5691 - val_acc: 0.7778
Epoch 4/30
3158/3158 - 12s - loss: 0.3953 - acc: 0.8524 - val_loss: 0.5898 - val_acc: 0.7692
Epoch 5/30
3158/3158 - 12s - loss: 0.3106 - acc: 0.8860 - val_loss: 0.6398 - val_acc: 0.7749
Epoch 6/30
3158/3158 - 12s - loss: 0.2485 - acc: 0.9110 - val_loss: 0.6841 - val_acc:

Epoch 23/30
3158/3158 - 9s - loss: 0.0228 - acc: 0.9937 - val_loss: 1.0808 - val_acc: 0.7863
Epoch 24/30
3158/3158 - 12s - loss: 0.0206 - acc: 0.9934 - val_loss: 1.2043 - val_acc: 0.7721
Epoch 25/30
3158/3158 - 14s - loss: 0.0199 - acc: 0.9940 - val_loss: 1.3292 - val_acc: 0.7578
Epoch 26/30
3158/3158 - 14s - loss: 0.0187 - acc: 0.9956 - val_loss: 1.2079 - val_acc: 0.7721
Epoch 27/30
3158/3158 - 14s - loss: 0.0157 - acc: 0.9949 - val_loss: 1.1634 - val_acc: 0.7749
Epoch 28/30
3158/3158 - 14s - loss: 0.0173 - acc: 0.9949 - val_loss: 1.2923 - val_acc: 0.7692
Epoch 29/30
3158/3158 - 14s - loss: 0.0174 - acc: 0.9943 - val_loss: 1.3501 - val_acc: 0.7778
Epoch 30/30
3158/3158 - 14s - loss: 0.0165 - acc: 0.9959 - val_loss: 1.2899 - val_acc: 0.7550
logs/DL/convolutional1d/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 107s - loss: 0.7591 - acc: 0.6963 - val_loss: 0.6394 - val_acc: 0.7265
Epoch 2/30
3158/3158 - 19s - loss: 0.5526 - acc: 0.7894 - val_loss: 0.5991 - 

Epoch 19/30
3158/3158 - 12s - loss: 0.0357 - acc: 0.9911 - val_loss: 1.2488 - val_acc: 0.7464
Epoch 20/30
3158/3158 - 12s - loss: 0.0342 - acc: 0.9889 - val_loss: 1.3690 - val_acc: 0.7521
Epoch 21/30
3158/3158 - 12s - loss: 0.0298 - acc: 0.9902 - val_loss: 1.3176 - val_acc: 0.7407
Epoch 22/30
3158/3158 - 12s - loss: 0.0279 - acc: 0.9937 - val_loss: 1.4154 - val_acc: 0.7407
Epoch 23/30
3158/3158 - 12s - loss: 0.0304 - acc: 0.9908 - val_loss: 1.4231 - val_acc: 0.7464
Epoch 24/30
3158/3158 - 12s - loss: 0.0250 - acc: 0.9940 - val_loss: 1.3362 - val_acc: 0.7322
Epoch 25/30
3158/3158 - 12s - loss: 0.0266 - acc: 0.9921 - val_loss: 1.4225 - val_acc: 0.7407
Epoch 26/30
3158/3158 - 12s - loss: 0.0290 - acc: 0.9911 - val_loss: 1.3909 - val_acc: 0.7179
Epoch 27/30
3158/3158 - 12s - loss: 0.0255 - acc: 0.9924 - val_loss: 1.5161 - val_acc: 0.7407
Epoch 28/30
3158/3158 - 12s - loss: 0.0260 - acc: 0.9924 - val_loss: 1.5086 - val_acc: 0.7436
Epoch 29/30
3158/3158 - 12s - loss: 0.0249 - acc: 0.9927 - v

In [278]:
process_results('convolutional1d')

mean
 acc         0.954887
loss        0.128248
val_acc     0.754804
val_loss    1.045698
dtype: float64
std
 acc         0.072175
loss        0.180131
val_acc     0.019763
val_loss    0.300689
dtype: float64


media
 acc       0.758683
f1        0.532122
mse       0.710778
recall    0.529146
dtype: float64
std
 acc       0.005286
f1        0.009899
mse       0.023814
recall    0.023859
dtype: float64


In [279]:
conv1d_pred = compute_full_model(convolutional1d_model, 'convolutional1d', batch_size=8, epochs=30, shuffle=False, verbose=1)
compute_metrics(conv1d_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.7677977378576181,
 'recall': 0.5231151400307517,
 'f1': 0.5245111178634484,
 'acc': 0.7351962741184298}

### Bidirectional

In [280]:
def bidirectional_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(10), input_shape=(28, 300)),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(4, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [281]:
bidi_hist, bidi_evas = kfold_train(bidirectional_model, 'bidirectional', batch_size=8, epochs=30, shuffle=False, verbose=2)

logs/DL/bidirectional/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 209s - loss: 0.8743 - acc: 0.6371 - val_loss: 0.6999 - val_acc: 0.7066
Epoch 2/30
3158/3158 - 41s - loss: 0.6368 - acc: 0.7521 - val_loss: 0.6993 - val_acc: 0.7350
Epoch 3/30
3158/3158 - 106s - loss: 0.5881 - acc: 0.7828 - val_loss: 0.6330 - val_acc: 0.7635
Epoch 4/30
3158/3158 - 149s - loss: 0.5621 - acc: 0.7926 - val_loss: 0.6208 - val_acc: 0.7806
Epoch 5/30
3158/3158 - 65s - loss: 0.5209 - acc: 0.8129 - val_loss: 0.6425 - val_acc: 0.7607
Epoch 6/30
3158/3158 - 86s - loss: 0.5032 - acc: 0.8217 - val_loss: 0.5832 - val_acc: 0.7863
Epoch 7/30
3158/3158 - 149s - loss: 0.4749 - acc: 0.8309 - val_loss: 0.6067 - val_acc: 0.7778
Epoch 8/30
3158/3158 - 83s - loss: 0.4669 - acc: 0.8404 - val_loss: 0.5973 - val_acc: 0.7892
Epoch 9/30
3158/3158 - 68s - loss: 0.4351 - acc: 0.8471 - val_loss: 0.6328 - val_acc: 0.7806
Epoch 10/30
3158/3158 - 149s - loss: 0.4225 - acc: 0.8566 - val_loss: 0.6443 - val_

Epoch 26/30
3158/3158 - 149s - loss: 0.2005 - acc: 0.9275 - val_loss: 0.8516 - val_acc: 0.7835
Epoch 27/30
3158/3158 - 149s - loss: 0.1928 - acc: 0.9360 - val_loss: 1.1260 - val_acc: 0.7493
Epoch 28/30
3158/3158 - 35s - loss: 0.1972 - acc: 0.9303 - val_loss: 1.0103 - val_acc: 0.7664
Epoch 29/30
3158/3158 - 116s - loss: 0.1869 - acc: 0.9351 - val_loss: 0.8082 - val_acc: 0.7920
Epoch 30/30
3158/3158 - 149s - loss: 0.1944 - acc: 0.9281 - val_loss: 0.8531 - val_acc: 0.7721
logs/DL/bidirectional/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 173s - loss: 0.9006 - acc: 0.6624 - val_loss: 0.6515 - val_acc: 0.7407
Epoch 2/30
3158/3158 - 110s - loss: 0.6715 - acc: 0.7597 - val_loss: 0.5580 - val_acc: 0.7721
Epoch 3/30
3158/3158 - 149s - loss: 0.5947 - acc: 0.7885 - val_loss: 0.5535 - val_acc: 0.7778
Epoch 4/30
3158/3158 - 56s - loss: 0.5554 - acc: 0.8040 - val_loss: 0.5534 - val_acc: 0.7835
Epoch 5/30
3158/3158 - 98s - loss: 0.5277 - acc: 0.8122 - val_loss: 0.5718 

Epoch 21/30
3158/3158 - 60s - loss: 0.2677 - acc: 0.9066 - val_loss: 0.6638 - val_acc: 0.7949
Epoch 22/30
3158/3158 - 93s - loss: 0.2614 - acc: 0.9164 - val_loss: 0.6760 - val_acc: 0.7835
Epoch 23/30
3158/3158 - 151s - loss: 0.2518 - acc: 0.9101 - val_loss: 0.6720 - val_acc: 0.8006
Epoch 24/30
3158/3158 - 120s - loss: 0.2443 - acc: 0.9215 - val_loss: 0.6763 - val_acc: 0.7977
Epoch 25/30
3158/3158 - 34s - loss: 0.2354 - acc: 0.9221 - val_loss: 0.7722 - val_acc: 0.7607
Epoch 26/30
3158/3158 - 151s - loss: 0.2233 - acc: 0.9265 - val_loss: 0.7741 - val_acc: 0.7806
Epoch 27/30
3158/3158 - 150s - loss: 0.2354 - acc: 0.9237 - val_loss: 0.7038 - val_acc: 0.7892
Epoch 28/30
3158/3158 - 68s - loss: 0.2055 - acc: 0.9329 - val_loss: 0.7317 - val_acc: 0.7892
Epoch 29/30
3158/3158 - 86s - loss: 0.2271 - acc: 0.9218 - val_loss: 0.7400 - val_acc: 0.7835
Epoch 30/30
3158/3158 - 151s - loss: 0.2038 - acc: 0.9303 - val_loss: 0.7546 - val_acc: 0.7949
logs/DL/bidirectional/kfold7
Train on 3158 samples, val

Epoch 16/30
3158/3158 - 149s - loss: 0.3101 - acc: 0.8946 - val_loss: 0.6353 - val_acc: 0.7692
Epoch 17/30
3158/3158 - 129s - loss: 0.3066 - acc: 0.8946 - val_loss: 0.7371 - val_acc: 0.7692
Epoch 18/30
3158/3158 - 28s - loss: 0.2779 - acc: 0.8999 - val_loss: 0.7646 - val_acc: 0.7721
Epoch 19/30
3158/3158 - 145s - loss: 0.2704 - acc: 0.9034 - val_loss: 0.7645 - val_acc: 0.7521
Epoch 20/30
3158/3158 - 149s - loss: 0.2626 - acc: 0.9072 - val_loss: 0.7748 - val_acc: 0.7635
Epoch 21/30
3158/3158 - 49s - loss: 0.2334 - acc: 0.9145 - val_loss: 0.8393 - val_acc: 0.7550
Epoch 22/30
3158/3158 - 104s - loss: 0.2391 - acc: 0.9136 - val_loss: 0.7607 - val_acc: 0.7664
Epoch 23/30
3158/3158 - 150s - loss: 0.2180 - acc: 0.9243 - val_loss: 0.8897 - val_acc: 0.7778
Epoch 24/30
3158/3158 - 103s - loss: 0.2146 - acc: 0.9231 - val_loss: 0.9002 - val_acc: 0.7578
Epoch 25/30
3158/3158 - 50s - loss: 0.2123 - acc: 0.9288 - val_loss: 0.9182 - val_acc: 0.7578
Epoch 26/30
3158/3158 - 149s - loss: 0.1934 - acc: 0.

In [282]:
process_results('bidirectional')

mean
 acc         0.868176
loss        0.369929
val_acc     0.770763
val_loss    0.732690
dtype: float64
std
 acc         0.064070
loss        0.162153
val_acc     0.021851
val_loss    0.153524
dtype: float64


media
 acc       0.755356
f1        0.539676
mse       0.702728
recall    0.545240
dtype: float64
std
 acc       0.010928
f1        0.006372
mse       0.049166
recall    0.007273
dtype: float64


In [283]:
bidi_pred = compute_full_model(convolutional1d_model, 'bidirectional', batch_size=8, epochs=30, shuffle=False, verbose=1)
compute_metrics(bidi_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 0.7152361942781105,
 'recall': 0.5365594273839642,
 'f1': 0.5339028660942161,
 'acc': 0.7558216899534265}