# Deep learning tweeter

In [1]:
%load_ext tensorboard


## Imports

In [2]:
import sys
import cufflinks
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import copy
import pickle

warnings.filterwarnings('ignore')
seed = 5
np.random.seed(seed)

sys.path.append('..')
cufflinks.go_offline()

In [3]:
from Corpus.Corpus import get_corpus, filter_binary_pn, filter_corpus_small
from auxiliar.VectorizerHelper import vectorizer, vectorizerIdf, tokenize, procesar_corpus
from auxiliar import parameters
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import recall_score
from auxiliar.HtmlParser import HtmlParser

In [4]:
import tensorflow as tf
from tensorflow.python.keras.callbacks import TensorBoard

In [5]:
import math
import pickle
import pandas as pd
import Levenshtein as lv
from nltk.stem import SnowballStemmer
from gensim.models import Doc2Vec
import gensim
from nltk.tokenize import word_tokenize
from Corpus.Corpus import get_corpus, filter_binary_pn, filter_corpus_small
from time import time, strftime

In [6]:
import nltk
# nltk.download()

## Config

In [7]:
polarity_dim = 5
# clasificadores=['lstm', '2lstm', '2dcnn', '2dcnn+lstm', 'cnn+lstm', 'bidirectionalLstm']
clasificadores=['lstm']
idf = True
target_names=['Neg', 'Pos']
kfolds = 10
base_dir = '2-clases' if polarity_dim == 2 else ('3-clases' if polarity_dim == 3 else '5-clases')
name = 'deep_learning'

In [8]:
w2vec_file = 'data/w2vec.bin'
stemmer = SnowballStemmer('spanish')

## Get data

Get train corpus and filter it by polarity

In [9]:
# cine = HtmlParser(200, "http://www.muchocine.net/criticas_ultimas.php", 1)
data_corpus = get_corpus('general-corpus', 'general-corpus', 1, None)

if polarity_dim == 2:
    data_corpus = filter_binary_pn(data_corpus)
#     cine = filter_binary_pn(cine.get_corpus())
elif polarity_dim == 3:
    data_corpus = filter_corpus_small(data_corpus)
#     cine = filter_corpus_small(cine.get_corpus())
# used_data = cine[:5000]

#Intentando obtener datos del archivo csv...
/home/suampa/Documentos/SentimentAnalysis/Corpus/../data/general-corpus.csv
#Datos recuperados!


In [10]:
data_corpus.reset_index().groupby('polarity').agg({'index': 'count'}).iplot(kind='bar')

We have a very low amount of neutral elements, it will lead to bad results

## Preprocess

we use our axiliar **preprocessor** function (VectorizerHelper.procesar_corpus) with params:

text, process_text, stop_words, negation, repeated_letters

In [11]:
def apply_prepro(data):
    return procesar_corpus(data, True, True, False, True)
data_corpus.content = data_corpus.content.apply(apply_prepro)

### Model initialization

In [12]:
model = gensim.models.keyedvectors.KeyedVectors.load_word2vec_format(w2vec_file, binary=True)

### Tokenize texts

We use auxiliar function **tokenize** to split content in tokens (words). This method receives a flag that indicates if it will use stemming or not

In [13]:
def apply_tokenization(data):
    return tokenize(data, False)
tokens = data_corpus.content.apply(apply_tokenization)

In [14]:
token_df = pd.DataFrame([x for x in tokens]).transpose()
token_df.columns = pd.MultiIndex.from_arrays([data_corpus.polarity, token_df.columns])

In [15]:
print('palabras totales', token_df.count().sum())
print('media de palabras por texto', token_df.count().mean())

palabras totales 44512
media de palabras por texto 8.87931378416118


In [16]:
token_df.count().iplot(kind='histogram')

Although most of the documents have a maximum amount of 10 words, we will use 28 words as maximum, to avoid penalization on train data

In [17]:
pd.DataFrame([token_df[x].count().sum() for x in token_df.columns.levels[0]]).iplot(kind='bar')

There is a several inequality in amount of words per class, as it was expected

### w2vec process

In [18]:
# stem_vocab = np.array([stemmer.stem(x) for x in model.vocab])
stem_vocab = np.array([x for x in model.vocab])
stem_vocab_dict = dict.fromkeys(stem_vocab, 1)

In [19]:
asci_codes = [np.array([ord(x) for x in y]) for y in stem_vocab]

In [20]:
len("electroencefalografista")

23

In [21]:
padded_asci_codes = tf.keras.preprocessing.sequence.pad_sequences(asci_codes, maxlen=23, padding='post')

#### Not found in vocab

In [22]:
not_in_vocab = pd.concat([token_df[d][token_df[d].apply(lambda x: x not in stem_vocab_dict)] for d in token_df.columns], axis=1)

In [23]:
print('total words not found in vocab', not_in_vocab.count().sum())
print('not found words mean', not_in_vocab.count().mean())

total words not found in vocab 2065
not found words mean 0.41192898463993616


In [24]:
pd.DataFrame([not_in_vocab[x].count().sum() for x in not_in_vocab.columns.levels[0]]).iplot(kind='bar')

In [25]:
not_in_vocab.columns = not_in_vocab.columns.droplevel()

In [26]:
not_in_vocab_words = pd.DataFrame(
    [x for sublist in [not_in_vocab[y].dropna().values for y in not_in_vocab.columns] for x in sublist]
).drop_duplicates()

#### Found in vocab

In [27]:
in_vocab = pd.concat([token_df[d][token_df[d].apply(lambda x: x in stem_vocab_dict)] for d in token_df.columns], axis=1)

In [28]:
in_vocab.columns = in_vocab.columns.droplevel()

In [29]:
in_vocab_words = pd.DataFrame(
    [x for sublist in [in_vocab[y].dropna().values for y in in_vocab.columns] for x in sublist]
).drop_duplicates()

In [30]:
in_vocab.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5003,5004,5005,5006,5007,5008,5009,5010,5011,5012
0,gracias,off,conozco,toca,buen,escaño,buenos,sistema,caca,buen,...,ya,rajoy,rick,,nace,muy,más,crean,sorprendente,está
1,mar,pensando,adicto,grabación,día,listo,días,económico,ajuste,viernes,...,dos,da,santorum,será,jirafa,indignante,pobres,banco,huída,muy
2,,regalito,drama,especial,primero,empezar,em,recorta,,,...,ganas,espalda,retira,presidente,primera,si,discriminar,productos,hoy,bien
3,,sinde,ja,navideño,mandar,congreso,no,dinero,,,...,verte,post,campaña,,su,repara,mujer,mujeres,senado,versión
4,,va,ja,mari,abrazo,,ira,prestaciones,,,...,rt,buzón,primarias,,especie,hoy,,cáncer,rajoy,gallega


#### Replace process

In [31]:
def replace_words(w):
    found_positions = np.where(stem_vocab == w)[0]
    return model[vocab_keys[found_positions[0]]]

In [32]:
def custom_levenshtein(word, dictionary):
    ascii_matrix = np.repeat([word], [dictionary.shape[0]], axis=0)
    difference = ascii_matrix - dictionary
    difference = np.where(difference != 0, 1, difference)
    difference_sum = np.sum(difference, axis=1)
    minval = np.min(difference_sum)
    minidx = np.argmin(difference_sum)
    if minval < 2 and minval > 0:
        return minidx
    else:
        return None

In [33]:
def find_coincidences(word, dictionary):
    min_len = max(len(word) - 1, 1)
    max_len = len(word) + 1
    dist = np.array([lv.distance(x, word) - x.startswith(word) if len(x) > min_len and len(x) < max_len else 999 for x in dictionary])
    minval = np.min(dist)
    if minval < 2 and minval > 0:
        minidx = np.argmin(dist)
#         root_logger.info('current word %s - found_distance %s - idx to replace %s word', word, minval, minidx)
        return dictionary[minidx], np.delete(dictionary, minidx)
    else:
        return None, dictionary
    
    
    

In [34]:
import gc
gc.collect()

83536

In [35]:
dictionary = not_in_vocab_words.values.reshape(1,-1)[0]
ascii_dictionary = np.array([np.array([ord(x) for x in y]) for y in dictionary])
padded_dictionary = tf.keras.preprocessing.sequence.pad_sequences(ascii_dictionary, maxlen=23, padding="post")

found = dict()
with tqdm(total=len(padded_asci_codes)) as pbar:
    for i, val in enumerate(padded_asci_codes):
        pbar.update(1)
        coincidence = custom_levenshtein(val, padded_dictionary)
        if coincidence:
            found[dictionary[coincidence]] = model[stem_vocab[i]]
        if len(found) == dictionary.shape[0]:
            break

100%|██████████| 1000653/1000653 [13:47<00:00, 1209.14it/s]


In [36]:
gc.collect()

20

In [37]:
dictionary = in_vocab_words.values.reshape(1,-1)[0]
w2vec_found = dict()
with tqdm(total=len(dictionary)) as pbar:
    for word in dictionary:
        pbar.update(1)
        w2vec_found[word] = model[word]

100%|██████████| 11308/11308 [00:00<00:00, 206642.43it/s]


In [38]:
in_vocab_replaced = in_vocab.applymap(lambda x: w2vec_found[x] if x in w2vec_found else math.nan)

In [39]:
not_in_vocab_replaced = not_in_vocab.applymap(lambda x: found[x] if x in found else math.nan)

In [40]:
in_vocab_replaced.update(not_in_vocab_replaced)

#### Replace nan positions

All text must have the same length, so we need to fill those that did not match this requirement.

We will use a zero's array.

In [41]:
nan_pos = pd.DataFrame([in_vocab_replaced[c].isna() for c in in_vocab_replaced.columns]).transpose()

In [42]:
features = 300
zeros = np.zeros((features))
in_vocab_replaced.update(nan_pos.applymap(lambda x: zeros if x else math.nan))

In [43]:
in_vocab_replaced.to_pickle('tweeter_wemb_5_clases.pkl')

## Split data

In [44]:
in_vocab_replaced = pd.read_pickle('tweeter_wemb_5_clases.pkl')

In [45]:
in_vocab_replaced.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5003,5004,5005,5006,5007,5008,5009,5010,5011,5012
0,"[0.123253495, 0.047755074, 0.18744704, -0.0576...","[-0.03825481, 0.4745884, 0.06159374, -0.211678...","[0.08140966, -0.2937571, 0.09323869, -0.070561...","[-0.08855907, -0.04540643, -0.03799705, 0.1179...","[-0.24166, -0.31998757, 0.05182405, -0.0511, 0...","[-0.30192834, -0.091402225, -0.07635854, -0.32...","[0.16187154, -0.15262279, 0.15911105, 0.136880...","[-0.03385875, -0.05679143, 0.15936868, 0.03850...","[0.08511154, -0.5224435, -0.1114207, -0.029714...","[-0.24166, -0.31998757, 0.05182405, -0.0511, 0...",...,"[0.08881656, -0.08638146, 0.19055837, 0.011414...","[0.24844696, -0.045677852, 0.023781389, -0.055...","[0.028837433, -0.1533759, -0.15925558, -0.0515...","[0.084870994, 0.018372163, -0.19153509, -0.140...","[0.19372217, -0.00981669, 0.0936164, 0.0327220...","[0.2661146, 0.10789581, 0.24465632, 0.09246798...","[0.0937914, -0.06750509, 0.11355269, -0.071939...","[0.05255723, -0.173229, -0.043076243, -0.07914...","[0.08545774, -0.18351299, 0.040896367, -0.2875...","[0.19491133, 0.13588089, 0.26361302, 0.0549132..."
1,"[-0.27860123, -0.0073691155, 0.07620924, -0.18...","[-0.13225149, 0.007982017, -0.15443377, -0.041...","[-0.22758521, 0.119482145, 0.07687994, -0.1562...","[0.19492386, 0.34440613, 0.05423296, 0.1994891...","[0.11887759, -0.062084418, 0.24743606, 0.08697...","[-0.03682754, 0.124864206, 0.08532753, 0.11072...","[0.29450724, -0.08953724, 0.22900815, -0.13844...","[-0.01992176, -0.38204813, 0.08824053, 0.02404...","[0.032929733, 0.071419924, -0.063104734, -0.08...","[0.16452287, 0.04510333, 0.17681116, -0.175676...",...,"[0.15753947, -0.11052575, 0.026150983, -0.0067...","[0.18700868, -0.04345352, -0.21399334, -0.0299...","[-0.115100406, 0.04057121, -0.051373735, -0.14...","[0.008335834, -0.13954785, 0.07759602, -0.0191...","[0.057889074, -0.0860811, 0.005500754, 0.19180...","[-0.09766652, -0.055921096, -0.18628502, -0.33...","[-0.14751814, 0.014919235, -0.06650046, -0.193...","[0.20185633, 0.16108106, 0.17906275, 0.0547961...","[-0.015799766, 0.09396541, 0.08841807, -0.0635...","[0.2661146, 0.10789581, 0.24465632, 0.09246798..."
2,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.1472689, 0.035210133, -0.0905985, 0.235152...","[-0.19423386, -0.1605221, -0.122799665, -0.226...","[0.010192282, 0.011454537, -0.024294477, -0.14...","[-0.016709665, 0.04634203, 0.12317722, -0.1102...","[0.015565158, 0.013814226, 0.2070413, -0.10364...","[0.23736276, 0.31505284, -0.36881423, 0.341764...","[-0.2906534, 0.38485017, -0.32198793, -0.46555...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[-0.15636458, -0.14417209, 0.15087254, -0.0876...","[-0.18826456, -0.26298478, -0.30475214, -0.043...","[-0.32796225, 0.12644982, 0.1373805, -0.328892...","[-0.021904068, -0.28292415, 0.036686286, -0.12...","[0.31308195, 0.28671673, 0.061079856, -0.18517...","[0.060204167, -0.17087598, 0.2108287, -0.02707...","[-0.09252765, -0.06127216, -0.21862276, -0.010...","[0.10744205, 0.0075490335, -0.28650734, 0.2955...","[0.025537454, -0.1880749, 0.1568079, -0.178805...","[-0.06769281, -0.30116892, 0.08937262, 0.08235..."
3,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.038064547, -0.045823276, -0.07485215, -0.07...","[0.25501376, -0.011608973, -0.27930972, 0.3525...","[-0.18029697, 0.08496069, 0.01006801, -9.25252...","[-0.25586495, 0.28853804, 0.059111107, -0.0686...","[0.055961747, 0.03338554, 0.17965522, -0.18108...","[-0.0312227, -0.05622646, 0.2197303, -0.072537...","[-0.17901269, -0.015474343, 0.004863352, 0.029...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[-0.2848985, -0.20139082, -0.099536225, 0.1538...","[0.08226747, 0.031416256, 0.0009123865, -0.560...","[0.40826887, -0.085287675, 0.13206185, -0.1375...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.16144432, -0.056896593, -0.029683227, -0.1...","[-0.21747229, -0.19461995, 0.11051066, -0.0933...","[-0.001835166, -0.24167489, 0.1767869, 0.11757...","[0.109622784, 0.012106776, 0.24526607, 0.17946...","[-0.099428646, 0.39090794, -0.092911236, -0.35...","[0.28721282, 0.26410416, 0.026493348, -0.04709..."
4,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.11524121, 0.048132487, 0.16708641, 0.080564...","[0.25501376, -0.011608973, -0.27930972, 0.3525...","[0.04357922, -0.36061037, -0.74110204, 0.37670...","[6.7697583e-06, -0.39855585, 0.1004604, -0.018...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.15381046, -0.13758525, 0.09431736, -0.5013...","[-0.324229, 0.33888578, -0.41092703, 0.3568749...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[0.5313497, -0.14747444, -0.03454075, -0.03119...","[-0.38848424, -0.050834656, 0.008239044, 0.219...","[0.10601474, -0.044864, -0.20512107, -0.215069...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.23924325, 0.06999114, 0.06436243, -0.126434...","[0.025537454, -0.1880749, 0.1568079, -0.178805...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.44580138, 0.09412842, 0.27231395, -0.158739...","[0.24844696, -0.045677852, 0.023781389, -0.055...","[-0.13998565, 0.21335681, 0.119549684, -0.1903..."


In [46]:
split = in_vocab_replaced.shape[1] * 0.7

In [47]:
train_corpus = in_vocab_replaced.loc[:, :split - 1]
test_corpus = in_vocab_replaced.loc[:, split:]

In [48]:
print("corpus shape ", in_vocab_replaced.shape)
print("train_corpus shape ", train_corpus.shape)
print("test_corpus shape ", test_corpus.shape)

corpus shape  (28, 5013)
train_corpus shape  (28, 3509)
test_corpus shape  (28, 1503)


In [49]:
features=train_corpus[0][0].shape[0]
timesteps=train_corpus.shape[0]
elements=train_corpus.shape[1]
print("features ", features)
print("timesteps ", timesteps)
print("elements ", elements)

features  300
timesteps  28
elements  3509


In [50]:
wemb_x = np.array([np.concatenate(train_corpus[x].values) for x in train_corpus.columns])

In [51]:
wemb_test = np.array([np.concatenate(test_corpus[x].values) for x in test_corpus.columns])

In [52]:
tokenizer = tf.keras.preprocessing.text.Tokenizer()

In [53]:
tokenizer.fit_on_texts(data_corpus.content)

In [54]:
sequences = tokenizer.texts_to_sequences(data_corpus.content)

In [55]:
sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=28, padding='post')

In [56]:
sequences.shape

(5013, 28)

In [57]:
pd.DataFrame(tokenizer.word_index.items(), columns=["word", "idx"]).to_pickle('word_index.pkl')

## Get final train data

In [58]:
wemb_x.shape

(3509, 8400)

In [59]:
x_train = wemb_x.reshape(elements, timesteps, features)

In [60]:
x_train.shape

(3509, 28, 300)

In [61]:
x_test = wemb_test.reshape(test_corpus.shape[1], timesteps, features)

In [62]:
x_test.shape

(1503, 28, 300)

In [63]:
from sklearn.preprocessing import LabelEncoder

In [64]:
def get_Y(corpus, polarity_dim=polarity_dim):
    if polarity_dim == 2:
        encoder = LabelEncoder()
        encoder.fit(corpus.polarity.values)
        Y = encoder.transform(corpus.polarity.values)
        return Y
    else:
        return tf.keras.utils.to_categorical([x for x in corpus.polarity.values])

In [65]:
data_corpus.polarity.unique()

array([4, 1, 5, 2, 3])

In [66]:
Y = get_Y(data_corpus, polarity_dim)

In [67]:
Y.shape

(5013, 6)

In [68]:
y_train = Y[:3509]
y_test = Y[3510:]

In [69]:
print('y_train shape ', y_train.shape)
print('y_test shape ', y_test.shape)

y_train shape  (3509, 6)
y_test shape  (1503, 6)


In [70]:
y_train

array([[0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.],
       ...,
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0., 0.]], dtype=float32)

In [71]:
data_corpus[:3509].polarity.iplot(kind='histogram')

In [72]:
data_corpus[3509:].polarity.iplot(kind='histogram')

### Definición de funciones

In [73]:
colors=['red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue']

In [74]:
kf = KFold(n_splits=kfolds, shuffle=True, random_state=None) # realización de k-folds
folds = kf.split(x_train)
folds = pd.DataFrame([x for x in folds])
folds.to_pickle('folds_5_clases.pkl')
folds = pd.read_pickle('folds_5_clases.pkl')

In [75]:
folds = folds.values

In [76]:
def convert_to_df(hist):
    cols = pd.MultiIndex.from_product([["step1", "step2", "step3", "step4", "step5", "step6", "step7", "step8", "step9", "step10"], hist[0].history.keys()])
    hist_df = pd.concat([pd.DataFrame(x.history) for x in  hist], axis=1)
    hist_df.columns = cols
    hist_df.head()
    return hist_df

In [77]:
def compute_metrics(predictions, real):
    metrics = dict()
    real = [np.argmax(p) - 1 for p in real]
    bin_preds = [np.argmax(p) - 1 for p in predictions]
    metrics['mse'] = mean_squared_error(bin_preds, real)
    metrics['recall'] = recall_score(bin_preds, real, average='macro')
    metrics['f1'] = f1_score(bin_preds, real, average='macro')
    metrics['acc'] = accuracy_score(bin_preds, real)
    return metrics
    

In [79]:
def kfold_train(model_func, model_name, **params):
    evaluations = list()
    hists = list()
    i = 0
    for train_index, val_index in folds:
        i += 1
        model = model_func()
        train_x = x_train[train_index]
        train_y = y_train[train_index]
        val_x = x_train[val_index]
        val_y = y_train[val_index]
        
        logdir="logs/DL/"+ model_name+"/kfold" + str(i)
        print(logdir)
#         tensorboard_callback = TensorBoard(log_dir=logdir)

#         hist = model.fit(train_x, train_y, validation_data=(val_x, val_y), callbacks=[tensorboard_callback], **params)
        hist = model.fit(train_x, train_y, validation_data=(val_x, val_y), **params)
        
        hists.append(hist)
        evaluations.append(compute_metrics(model.predict(x_test), y_test))
    hist_df = convert_to_df(hists)
    hist_df.to_pickle('results/'+name+'/tweeter/' + base_dir + '/'+model_name+'_lstm.pkl')
    evas_df = pd.DataFrame.from_dict(evaluations)
    evas_df.to_pickle('results/'+name+'/tweeter/' + base_dir + '/'+model_name+'_lstm_evas.pkl')    
    return hist_df, evas_df

In [80]:
def compute_full_model(model_func, model_name, **params):
    model = model_func()
    model.fit(x_train, y_train, **params)
    model.save('results/'+name+'/tweeter/' + base_dir + '/' + model_name + '.h5' )
    preds = model.predict(x_test)
    pd.DataFrame(preds).to_pickle('results/'+name+'/tweeter/' + base_dir + '/' + model_name + '_preds.pkl')
    return preds

In [81]:
def plot_loss(hist_df):
    hist_df.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
    print('mean\n', hist_df.stack(level=0).mean())
    print('std\n', hist_df.stack(level=0).std())

In [82]:
def plot_val(evas_df):
    evas_df.iplot()
    print('media\n', evas_df.mean())
    print('std\n', evas_df.std())

In [83]:
def process_results(model_name):
    hist = pd.read_pickle('results/'+name+'/tweeter/' + base_dir + '/'+ model_name + '_lstm.pkl')
    evas = pd.read_pickle('results/'+name+'/tweeter/' + base_dir + '/'+ model_name + '_lstm_evas.pkl')
    plot_loss(hist)
    plot_val(evas)

### Definición de modelos

#### Linea base

In [87]:
def create_lstm_val():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
    return model

In [88]:
val_hist, val_evas = kfold_train(create_lstm_val, 'lstm_val', batch_size=256, epochs=30, shuffle=False, verbose=2)

W0702 08:06:23.467154 140009374406464 deprecation.py:323] From /home/suampa/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/math_grad.py:1251: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


logs/DL/lstm_val/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 2s - loss: 1.7865 - acc: 0.3056 - val_loss: 1.7743 - val_acc: 0.3191
Epoch 2/30
3158/3158 - 0s - loss: 1.7442 - acc: 0.3281 - val_loss: 1.6996 - val_acc: 0.3191
Epoch 3/30
3158/3158 - 0s - loss: 1.6717 - acc: 0.3274 - val_loss: 1.6484 - val_acc: 0.3191
Epoch 4/30
3158/3158 - 0s - loss: 1.6049 - acc: 0.3306 - val_loss: 1.5644 - val_acc: 0.3191
Epoch 5/30
3158/3158 - 1s - loss: 1.5571 - acc: 0.3274 - val_loss: 1.4887 - val_acc: 0.3191
Epoch 6/30
3158/3158 - 0s - loss: 1.5120 - acc: 0.3274 - val_loss: 1.4712 - val_acc: 0.3191
Epoch 7/30
3158/3158 - 0s - loss: 1.4867 - acc: 0.3274 - val_loss: 1.4506 - val_acc: 0.3191
Epoch 8/30
3158/3158 - 0s - loss: 1.4587 - acc: 0.3274 - val_loss: 1.4936 - val_acc: 0.3191
Epoch 9/30
3158/3158 - 1s - loss: 1.4572 - acc: 0.3189 - val_loss: 1.3683 - val_acc: 0.3191
Epoch 10/30
3158/3158 - 0s - loss: 1.3672 - acc: 0.3271 - val_loss: 1.3368 - val_acc: 0.3390
Epoch 11

3158/3158 - 0s - loss: 0.9119 - acc: 0.6270 - val_loss: 1.4934 - val_acc: 0.4929
Epoch 28/30
3158/3158 - 0s - loss: 0.8784 - acc: 0.6377 - val_loss: 1.6652 - val_acc: 0.4701
Epoch 29/30
3158/3158 - 0s - loss: 0.8528 - acc: 0.6422 - val_loss: 1.6326 - val_acc: 0.4672
Epoch 30/30
3158/3158 - 0s - loss: 0.8226 - acc: 0.6561 - val_loss: 1.6124 - val_acc: 0.4786
logs/DL/lstm_val/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 1s - loss: 1.7853 - acc: 0.2996 - val_loss: 1.7584 - val_acc: 0.2821
Epoch 2/30
3158/3158 - 0s - loss: 1.7685 - acc: 0.2774 - val_loss: 1.7303 - val_acc: 0.3789
Epoch 3/30
3158/3158 - 0s - loss: 1.7192 - acc: 0.3379 - val_loss: 1.6633 - val_acc: 0.3761
Epoch 4/30
3158/3158 - 0s - loss: 1.6455 - acc: 0.3490 - val_loss: 1.5790 - val_acc: 0.3476
Epoch 5/30
3158/3158 - 1s - loss: 1.5791 - acc: 0.3125 - val_loss: 1.4903 - val_acc: 0.2877
Epoch 6/30
3158/3158 - 1s - loss: 1.5238 - acc: 0.2973 - val_loss: 1.4357 - val_acc: 0.4103
Epoch 7/30
3158/3

Epoch 24/30
3158/3158 - 3s - loss: 0.8387 - acc: 0.6434 - val_loss: 1.5048 - val_acc: 0.4587
Epoch 25/30
3158/3158 - 3s - loss: 0.7875 - acc: 0.6678 - val_loss: 1.6613 - val_acc: 0.4473
Epoch 26/30
3158/3158 - 3s - loss: 0.7635 - acc: 0.6811 - val_loss: 1.6977 - val_acc: 0.4473
Epoch 27/30
3158/3158 - 3s - loss: 0.7306 - acc: 0.6970 - val_loss: 1.8513 - val_acc: 0.4558
Epoch 28/30
3158/3158 - 3s - loss: 0.7062 - acc: 0.7039 - val_loss: 1.7820 - val_acc: 0.4501
Epoch 29/30
3158/3158 - 3s - loss: 0.7206 - acc: 0.7084 - val_loss: 1.4874 - val_acc: 0.4587
Epoch 30/30
3158/3158 - 3s - loss: 0.7626 - acc: 0.6738 - val_loss: 1.9006 - val_acc: 0.3903
logs/DL/lstm_val/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 1s - loss: 1.7843 - acc: 0.2365 - val_loss: 1.7647 - val_acc: 0.2023
Epoch 2/30
3158/3158 - 1s - loss: 1.7267 - acc: 0.2514 - val_loss: 1.6980 - val_acc: 0.2137
Epoch 3/30
3158/3158 - 0s - loss: 1.6863 - acc: 0.2954 - val_loss: 1.6975 - val_acc: 0.3333
Ep

3158/3158 - 3s - loss: 1.1088 - acc: 0.5158 - val_loss: 1.2792 - val_acc: 0.4672
Epoch 21/30
3158/3158 - 3s - loss: 1.2600 - acc: 0.4658 - val_loss: 1.3164 - val_acc: 0.4843
Epoch 22/30
3158/3158 - 3s - loss: 1.1386 - acc: 0.5095 - val_loss: 1.2128 - val_acc: 0.5014
Epoch 23/30
3158/3158 - 3s - loss: 1.1211 - acc: 0.5298 - val_loss: 1.2036 - val_acc: 0.4843
Epoch 24/30
3158/3158 - 3s - loss: 1.0934 - acc: 0.5364 - val_loss: 1.2436 - val_acc: 0.4986
Epoch 25/30
3158/3158 - 3s - loss: 1.0518 - acc: 0.5481 - val_loss: 1.2821 - val_acc: 0.4929
Epoch 26/30
3158/3158 - 3s - loss: 1.0131 - acc: 0.5646 - val_loss: 1.3215 - val_acc: 0.4929
Epoch 27/30
3158/3158 - 3s - loss: 0.9816 - acc: 0.5763 - val_loss: 1.3772 - val_acc: 0.4872
Epoch 28/30
3158/3158 - 3s - loss: 0.9588 - acc: 0.5861 - val_loss: 1.3967 - val_acc: 0.4957
Epoch 29/30
3158/3158 - 3s - loss: 0.9334 - acc: 0.6001 - val_loss: 1.4566 - val_acc: 0.4986
Epoch 30/30
3158/3158 - 3s - loss: 0.9079 - acc: 0.6092 - val_loss: 1.4736 - val_a

In [89]:
process_results('lstm_val')

mean
 acc         0.475233
loss        1.220424
val_acc     0.426318
val_loss    1.433803
dtype: float64
std
 acc         0.122469
loss        0.293250
val_acc     0.076492
val_loss    0.208128
dtype: float64


media
 acc       0.427611
f1        0.317968
mse       1.749102
recall    0.372404
dtype: float64
std
 acc       0.020627
f1        0.037001
mse       0.218692
recall    0.065826
dtype: float64


In [90]:
val_pred = compute_full_model(create_lstm_val, 'lstm_val', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(val_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.9926813040585496,
 'recall': 0.216216619134042,
 'f1': 0.25135620148918336,
 'acc': 0.4085163007318696}

La reducción de la variable loss en el conjunto de entrenamiento produce un efecto de sobreentrenamiento que provoca un mayor error en el conjunto de validación

Algunas de las opciones para reducir este efecto son:
* Reducir la complejidad de la red neuronal
* Aplicar alguna clase de regularización al modelo
* Buscar una topología que se adapte mejor al problema
* Obtener más datos

#### Reducción complejidad modelo

In [91]:
def create_simpler_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [92]:
simpler_hist, simpler_evas = kfold_train(create_simpler_model, 'lstm_simple', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/lstm_simple/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 5s - loss: 1.7887 - acc: 0.2834 - val_loss: 1.7840 - val_acc: 0.3191
Epoch 2/30
3158/3158 - 2s - loss: 1.7799 - acc: 0.3271 - val_loss: 1.7687 - val_acc: 0.3219
Epoch 3/30
3158/3158 - 2s - loss: 1.7282 - acc: 0.3474 - val_loss: 1.6684 - val_acc: 0.3162
Epoch 4/30
3158/3158 - 2s - loss: 1.6420 - acc: 0.3227 - val_loss: 1.5999 - val_acc: 0.3305
Epoch 5/30
3158/3158 - 2s - loss: 1.5773 - acc: 0.3290 - val_loss: 1.5450 - val_acc: 0.3162
Epoch 6/30
3158/3158 - 3s - loss: 1.5449 - acc: 0.3274 - val_loss: 1.5168 - val_acc: 0.3191
Epoch 7/30
3158/3158 - 3s - loss: 1.5263 - acc: 0.3271 - val_loss: 1.5029 - val_acc: 0.3134
Epoch 8/30
3158/3158 - 3s - loss: 1.5119 - acc: 0.3268 - val_loss: 1.4938 - val_acc: 0.3219
Epoch 9/30
3158/3158 - 3s - loss: 1.5035 - acc: 0.3274 - val_loss: 1.4883 - val_acc: 0.3219
Epoch 10/30
3158/3158 - 3s - loss: 1.5030 - acc: 0.3319 - val_loss: 1.4924 - val_acc: 0.3276
Epoch

Epoch 27/30
3158/3158 - 0s - loss: 1.0879 - acc: 0.5317 - val_loss: 1.2470 - val_acc: 0.4672
Epoch 28/30
3158/3158 - 1s - loss: 1.0804 - acc: 0.5288 - val_loss: 1.2529 - val_acc: 0.4530
Epoch 29/30
3158/3158 - 0s - loss: 1.0750 - acc: 0.5364 - val_loss: 1.2922 - val_acc: 0.4530
Epoch 30/30
3158/3158 - 0s - loss: 1.0696 - acc: 0.5383 - val_loss: 1.2703 - val_acc: 0.4701
logs/DL/lstm_simple/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 2s - loss: 1.7884 - acc: 0.2606 - val_loss: 1.7829 - val_acc: 0.3476
Epoch 2/30
3158/3158 - 0s - loss: 1.7792 - acc: 0.3382 - val_loss: 1.7587 - val_acc: 0.3732
Epoch 3/30
3158/3158 - 0s - loss: 1.7082 - acc: 0.3300 - val_loss: 1.6298 - val_acc: 0.3105
Epoch 4/30
3158/3158 - 0s - loss: 1.6136 - acc: 0.2961 - val_loss: 1.5363 - val_acc: 0.3020
Epoch 5/30
3158/3158 - 0s - loss: 1.5598 - acc: 0.2967 - val_loss: 1.4875 - val_acc: 0.3419
Epoch 6/30
3158/3158 - 0s - loss: 1.5280 - acc: 0.3249 - val_loss: 1.4757 - val_acc: 0.3419
Ep

Epoch 23/30
3158/3158 - 3s - loss: 1.5893 - acc: 0.3265 - val_loss: 1.5915 - val_acc: 0.3248
Epoch 24/30
3158/3158 - 3s - loss: 1.5848 - acc: 0.3265 - val_loss: 1.5863 - val_acc: 0.3248
Epoch 25/30
3158/3158 - 3s - loss: 1.5793 - acc: 0.3265 - val_loss: 1.5812 - val_acc: 0.3248
Epoch 26/30
3158/3158 - 3s - loss: 1.5762 - acc: 0.3265 - val_loss: 1.5770 - val_acc: 0.3248
Epoch 27/30
3158/3158 - 3s - loss: 1.5712 - acc: 0.3265 - val_loss: 1.5732 - val_acc: 0.3248
Epoch 28/30
3158/3158 - 3s - loss: 1.5678 - acc: 0.3265 - val_loss: 1.5696 - val_acc: 0.3248
Epoch 29/30
3158/3158 - 3s - loss: 1.5639 - acc: 0.3265 - val_loss: 1.5661 - val_acc: 0.3248
Epoch 30/30
3158/3158 - 3s - loss: 1.5611 - acc: 0.3262 - val_loss: 1.5631 - val_acc: 0.3248
logs/DL/lstm_simple/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 12s - loss: 1.7887 - acc: 0.2419 - val_loss: 1.7851 - val_acc: 0.3333
Epoch 2/30
3158/3158 - 3s - loss: 1.7812 - acc: 0.3274 - val_loss: 1.7780 - val_acc: 0.34

Epoch 19/30
3158/3158 - 3s - loss: 1.2004 - acc: 0.4794 - val_loss: 1.2229 - val_acc: 0.5071
Epoch 20/30
3158/3158 - 3s - loss: 1.1919 - acc: 0.4823 - val_loss: 1.2162 - val_acc: 0.5128
Epoch 21/30
3158/3158 - 3s - loss: 1.1845 - acc: 0.4867 - val_loss: 1.2105 - val_acc: 0.5128
Epoch 22/30
3158/3158 - 3s - loss: 1.1785 - acc: 0.4883 - val_loss: 1.2068 - val_acc: 0.5185
Epoch 23/30
3158/3158 - 3s - loss: 1.1744 - acc: 0.4889 - val_loss: 1.2040 - val_acc: 0.5214
Epoch 24/30
3158/3158 - 3s - loss: 1.1737 - acc: 0.4911 - val_loss: 1.2074 - val_acc: 0.5043
Epoch 25/30
3158/3158 - 3s - loss: 1.1714 - acc: 0.4940 - val_loss: 1.1984 - val_acc: 0.5043
Epoch 26/30
3158/3158 - 3s - loss: 1.1694 - acc: 0.4886 - val_loss: 1.2178 - val_acc: 0.5043
Epoch 27/30
3158/3158 - 3s - loss: 1.2130 - acc: 0.4750 - val_loss: 1.3954 - val_acc: 0.4330
Epoch 28/30
3158/3158 - 3s - loss: 1.4090 - acc: 0.4253 - val_loss: 1.2588 - val_acc: 0.4786
Epoch 29/30
3158/3158 - 3s - loss: 1.2465 - acc: 0.4839 - val_loss: 1.

In [93]:
process_results('lstm_simple')

mean
 acc         0.394891
loss        1.436582
val_acc     0.390047
val_loss    1.475402
dtype: float64
std
 acc         0.075653
loss        0.285101
val_acc     0.066712
val_loss    0.745058
dtype: float64


media
 acc       0.395476
f1        0.243851
mse       2.779375
recall    0.265534
dtype: float64
std
 acc       0.050636
f1        0.084254
mse       1.489295
recall    0.122294
dtype: float64


In [94]:
simpler_pred = compute_full_model(create_simpler_model, 'lstm_simple', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(simpler_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.692614770459082,
 'recall': 0.36040393790563774,
 'f1': 0.3276697530495673,
 'acc': 0.43912175648702595}

La reducción de dimensionalidad de la red neuronal permite reducir la velocidad de aprendizaje sobre el conjunto de entrenamiento reduciendo así la perdida en el conjunto de validación.

Si lo compramos con el resultado anterior vemos que en general se reducen los picos negativos, en un 0.30 como minimo, sin embargo la convergencia sigue estando sobre 0.4 en los mejores casos

#### Regularización por dropout

In [95]:
def create_drop_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.7),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [96]:
drop_hist, drop_evas = kfold_train(create_drop_model, 'dropout_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)

W0702 08:36:30.377500 140009374406464 nn_ops.py:4230] Large dropout rate: 0.7 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


logs/DL/dropout_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


W0702 08:36:58.572413 140009374406464 nn_ops.py:4230] Large dropout rate: 0.7 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


logs/DL/dropout_lstm/kfold2
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


W0702 08:39:12.995260 140009374406464 nn_ops.py:4230] Large dropout rate: 0.7 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


logs/DL/dropout_lstm/kfold3
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


W0702 08:41:34.892739 140009374406464 nn_ops.py:4230] Large dropout rate: 0.7 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


logs/DL/dropout_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


W0702 08:43:57.741701 140009374406464 nn_ops.py:4230] Large dropout rate: 0.7 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


logs/DL/dropout_lstm/kfold5
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold6
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
logs/DL/dropout_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold8
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold9
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout_lstm/kfold10
Train on 3159 samples, validate on 350 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [97]:
process_results('dropout_lstm')

mean
 acc         0.338046
loss        1.536374
val_acc     0.368772
val_loss    1.473193
dtype: float64
std
 acc         0.039505
loss        0.141150
val_acc     0.067186
val_loss    0.156797
dtype: float64


media
 acc       0.391351
f1        0.194366
mse       2.967532
recall    0.189685
dtype: float64
std
 acc       0.044301
f1        0.060508
mse       1.555356
recall    0.087759
dtype: float64


In [98]:
drop_pred = compute_full_model(create_drop_model, 'dropout_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(drop_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 5.367265469061876,
 'recall': 0.06520292747837658,
 'f1': 0.09834420471650779,
 'acc': 0.3260146373918829}

Un valor tan alto de dropout (0.7) provoca peores resultados

In [99]:
def create_drop_model_2():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [100]:
drop2_hist, drop2_evas = kfold_train(create_drop_model_2, 'dropout2_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)

logs/DL/dropout2_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold2
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
logs/DL/dropout2_lstm/kfold3
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold5
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold6
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold8
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30


Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold9
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/dropout2_lstm/kfold10
Train on 3159 samples, validate on 350 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30


Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [101]:
process_results('dropout2_lstm')

mean
 acc         0.380527
loss        1.426880
val_acc     0.382825
val_loss    1.430514
dtype: float64
std
 acc         0.077975
loss        0.198132
val_acc     0.069909
val_loss    0.175671
dtype: float64


media
 acc       0.406321
f1        0.231727
mse       2.549102
recall    0.274378
dtype: float64
std
 acc       0.048056
f1        0.078221
mse       1.496920
recall    0.119855
dtype: float64


In [102]:
drop2_pred = compute_full_model(create_simpler_model, 'dropout2_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(drop2_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.7950765136393878,
 'recall': 0.36543370563600863,
 'f1': 0.3079059481306069,
 'acc': 0.4517631403858949}

Con un dropout de 0.2 se mejora la media de accuracy en 2 puntos y el f1 en casi 2 puntos también, sin embargo en el conjunto de test se reduce notablemente el resultado obtenido

#### Batch Normalization

In [103]:
def create_bn_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [104]:
bn_hist, bn_evas = kfold_train(create_bn_model, 'bn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)

logs/DL/bn_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold2
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
logs/DL/bn_lstm/kfold3
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold5
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold6
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold8
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30


Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold9
Train on 3158 samples, validate on 351 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
logs/DL/bn_lstm/kfold10
Train on 3159 samples, validate on 350 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30


Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [105]:
process_results('bn_lstm')

mean
 acc         0.473434
loss        1.311151
val_acc     0.453160
val_loss    1.439688
dtype: float64
std
 acc         0.079300
loss        0.235200
val_acc     0.056810
val_loss    0.188396
dtype: float64


media
 acc       0.451830
f1        0.298643
mse       1.818363
recall    0.348051
dtype: float64
std
 acc       0.014296
f1        0.045934
mse       0.182455
recall    0.031738
dtype: float64


In [106]:
bn_pred = compute_full_model(create_bn_model, 'bn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(bn_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.7671324018629408,
 'recall': 0.3498351004363663,
 'f1': 0.25397113436961577,
 'acc': 0.4357950765136394}

#### Weights initialization

Probamos la inicialización de pesos por el algoritmo Xavier (glorot_normal)

In [107]:
def glorot_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='glorot_normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='glorot_normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [108]:
gl_hist, gl_evas = kfold_train(glorot_model, 'glorot_lstm', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/glorot_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 5s - loss: 1.7894 - acc: 0.1704 - val_loss: 1.7782 - val_acc: 0.2194
Epoch 2/30
3158/3158 - 0s - loss: 1.7725 - acc: 0.2733 - val_loss: 1.7640 - val_acc: 0.2194
Epoch 3/30
3158/3158 - 0s - loss: 1.7586 - acc: 0.2609 - val_loss: 1.7488 - val_acc: 0.2194
Epoch 4/30
3158/3158 - 0s - loss: 1.7429 - acc: 0.2749 - val_loss: 1.7325 - val_acc: 0.2194
Epoch 5/30
3158/3158 - 0s - loss: 1.7263 - acc: 0.2609 - val_loss: 1.7150 - val_acc: 0.2194
Epoch 6/30
3158/3158 - 0s - loss: 1.7084 - acc: 0.2847 - val_loss: 1.6968 - val_acc: 0.2194
Epoch 7/30
3158/3158 - 0s - loss: 1.6874 - acc: 0.3322 - val_loss: 1.6782 - val_acc: 0.2194
Epoch 8/30
3158/3158 - 0s - loss: 1.6615 - acc: 0.3531 - val_loss: 1.6583 - val_acc: 0.2194
Epoch 9/30
3158/3158 - 0s - loss: 1.6254 - acc: 0.3718 - val_loss: 1.6398 - val_acc: 0.2194
Epoch 10/30
3158/3158 - 0s - loss: 1.5915 - acc: 0.3914 - val_loss: 1.6225 - val_acc: 0.2194
Epoch

Epoch 27/30
3158/3158 - 3s - loss: 1.1142 - acc: 0.5377 - val_loss: 1.3124 - val_acc: 0.4587
Epoch 28/30
3158/3158 - 3s - loss: 1.1110 - acc: 0.5389 - val_loss: 1.3103 - val_acc: 0.4558
Epoch 29/30
3158/3158 - 3s - loss: 1.1080 - acc: 0.5475 - val_loss: 1.2688 - val_acc: 0.4387
Epoch 30/30
3158/3158 - 3s - loss: 1.0830 - acc: 0.5488 - val_loss: 1.2927 - val_acc: 0.4330
logs/DL/glorot_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 4s - loss: 1.7898 - acc: 0.2258 - val_loss: 1.7789 - val_acc: 0.3419
Epoch 2/30
3158/3158 - 0s - loss: 1.7683 - acc: 0.3379 - val_loss: 1.7657 - val_acc: 0.3447
Epoch 3/30
3158/3158 - 0s - loss: 1.7439 - acc: 0.3448 - val_loss: 1.7508 - val_acc: 0.3476
Epoch 4/30
3158/3158 - 0s - loss: 1.7087 - acc: 0.3619 - val_loss: 1.7345 - val_acc: 0.3561
Epoch 5/30
3158/3158 - 0s - loss: 1.6737 - acc: 0.3790 - val_loss: 1.7194 - val_acc: 0.3476
Epoch 6/30
3158/3158 - 0s - loss: 1.6436 - acc: 0.3841 - val_loss: 1.6998 - val_acc: 0.3618
Ep

Epoch 23/30
3158/3158 - 1s - loss: 1.1445 - acc: 0.5256 - val_loss: 1.3084 - val_acc: 0.4786
Epoch 24/30
3158/3158 - 1s - loss: 1.1319 - acc: 0.5370 - val_loss: 1.2761 - val_acc: 0.4872
Epoch 25/30
3158/3158 - 1s - loss: 1.1039 - acc: 0.5503 - val_loss: 1.2695 - val_acc: 0.5071
Epoch 26/30
3158/3158 - 1s - loss: 1.0936 - acc: 0.5554 - val_loss: 1.2633 - val_acc: 0.4986
Epoch 27/30
3158/3158 - 1s - loss: 1.0767 - acc: 0.5655 - val_loss: 1.2601 - val_acc: 0.4872
Epoch 28/30
3158/3158 - 1s - loss: 1.0637 - acc: 0.5659 - val_loss: 1.2534 - val_acc: 0.4786
Epoch 29/30
3158/3158 - 1s - loss: 1.0760 - acc: 0.5573 - val_loss: 1.2344 - val_acc: 0.5043
Epoch 30/30
3158/3158 - 1s - loss: 1.0713 - acc: 0.5545 - val_loss: 1.3807 - val_acc: 0.4302
logs/DL/glorot_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 6s - loss: 1.7817 - acc: 0.2603 - val_loss: 1.7734 - val_acc: 0.3333
Epoch 2/30
3158/3158 - 1s - loss: 1.7613 - acc: 0.2593 - val_loss: 1.7551 - val_acc: 0.182

Epoch 19/30
3158/3158 - 2s - loss: 1.1935 - acc: 0.4962 - val_loss: 1.4436 - val_acc: 0.4444
Epoch 20/30
3158/3158 - 2s - loss: 1.1936 - acc: 0.4956 - val_loss: 1.4183 - val_acc: 0.4672
Epoch 21/30
3158/3158 - 2s - loss: 1.1597 - acc: 0.5044 - val_loss: 1.4056 - val_acc: 0.4843
Epoch 22/30
3158/3158 - 2s - loss: 1.1540 - acc: 0.5120 - val_loss: 1.3925 - val_acc: 0.4644
Epoch 23/30
3158/3158 - 3s - loss: 1.1442 - acc: 0.5152 - val_loss: 1.4049 - val_acc: 0.4530
Epoch 24/30
3158/3158 - 3s - loss: 1.1424 - acc: 0.5066 - val_loss: 1.4722 - val_acc: 0.4245
Epoch 25/30
3158/3158 - 3s - loss: 1.1420 - acc: 0.5073 - val_loss: 1.3529 - val_acc: 0.4843
Epoch 26/30
3158/3158 - 3s - loss: 1.1107 - acc: 0.5127 - val_loss: 1.3348 - val_acc: 0.4644
Epoch 27/30
3158/3158 - 3s - loss: 1.0890 - acc: 0.5310 - val_loss: 1.3321 - val_acc: 0.4701
Epoch 28/30
3158/3158 - 3s - loss: 1.0701 - acc: 0.5396 - val_loss: 1.3284 - val_acc: 0.4729
Epoch 29/30
3158/3158 - 3s - loss: 1.0518 - acc: 0.5453 - val_loss: 1.

In [109]:
process_results('glorot_lstm')

mean
 acc         0.453102
loss        1.346713
val_acc     0.420580
val_loss    1.492219
dtype: float64
std
 acc         0.089908
loss        0.233835
val_acc     0.081679
val_loss    0.168763
dtype: float64


media
 acc       0.443912
f1        0.304236
mse       1.793879
recall    0.388121
dtype: float64
std
 acc       0.023723
f1        0.042277
mse       0.215723
recall    0.078441
dtype: float64


In [110]:
gl_pred = compute_full_model(create_bn_model, 'glorot_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(gl_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 2.0159680638722555,
 'recall': 0.31452384925322996,
 'f1': 0.28099705356905647,
 'acc': 0.4550898203592814}

In [111]:
def glorot_model_wo_bn():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='glorot_normal', activation='relu', input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='glorot_normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [112]:
gl__wobn_hist, gl_wobn_evas = kfold_train(glorot_model_wo_bn, 'glorot__wobn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/glorot__wobn_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 30s - loss: 1.8005 - acc: 0.2432 - val_loss: 1.7689 - val_acc: 0.3219
Epoch 2/30
3158/3158 - 3s - loss: 1.7709 - acc: 0.2894 - val_loss: 1.7632 - val_acc: 0.3362
Epoch 3/30
3158/3158 - 3s - loss: 1.7597 - acc: 0.3243 - val_loss: 1.7463 - val_acc: 0.3219
Epoch 4/30
3158/3158 - 3s - loss: 1.7393 - acc: 0.3201 - val_loss: 1.6992 - val_acc: 0.3191
Epoch 5/30
3158/3158 - 3s - loss: 1.6845 - acc: 0.3144 - val_loss: 1.6393 - val_acc: 0.3134
Epoch 6/30
3158/3158 - 3s - loss: 1.6507 - acc: 0.3274 - val_loss: 1.6108 - val_acc: 0.3333
Epoch 7/30
3158/3158 - 3s - loss: 1.5884 - acc: 0.3312 - val_loss: 1.5019 - val_acc: 0.3618
Epoch 8/30
3158/3158 - 3s - loss: 1.5483 - acc: 0.3534 - val_loss: 1.4737 - val_acc: 0.3618
Epoch 9/30
3158/3158 - 3s - loss: 1.4973 - acc: 0.3724 - val_loss: 1.4064 - val_acc: 0.3732
Epoch 10/30
3158/3158 - 3s - loss: 1.4798 - acc: 0.3676 - val_loss: 1.3824 - val_acc: 0.381

Epoch 27/30
3158/3158 - 3s - loss: 1.1816 - acc: 0.4544 - val_loss: 1.3178 - val_acc: 0.4245
Epoch 28/30
3158/3158 - 3s - loss: 1.1577 - acc: 0.4791 - val_loss: 1.3650 - val_acc: 0.4387
Epoch 29/30
3158/3158 - 3s - loss: 1.1568 - acc: 0.4715 - val_loss: 1.3537 - val_acc: 0.4387
Epoch 30/30
3158/3158 - 3s - loss: 1.1381 - acc: 0.4725 - val_loss: 1.6442 - val_acc: 0.4274
logs/DL/glorot__wobn_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 6s - loss: 1.7834 - acc: 0.2213 - val_loss: 1.8451 - val_acc: 0.2365
Epoch 2/30
3158/3158 - 1s - loss: 1.7969 - acc: 0.2454 - val_loss: 1.7669 - val_acc: 0.2735
Epoch 3/30
3158/3158 - 1s - loss: 1.7655 - acc: 0.2707 - val_loss: 1.7591 - val_acc: 0.3077
Epoch 4/30
3158/3158 - 1s - loss: 1.7573 - acc: 0.2764 - val_loss: 1.7493 - val_acc: 0.2934
Epoch 5/30
3158/3158 - 1s - loss: 1.7478 - acc: 0.2590 - val_loss: 1.7385 - val_acc: 0.2963
Epoch 6/30
3158/3158 - 1s - loss: 1.7377 - acc: 0.2638 - val_loss: 1.7267 - val_acc: 0.2

Epoch 23/30
3158/3158 - 1s - loss: 1.2726 - acc: 0.4588 - val_loss: 1.2503 - val_acc: 0.4558
Epoch 24/30
3158/3158 - 1s - loss: 1.2415 - acc: 0.4715 - val_loss: 1.2452 - val_acc: 0.4530
Epoch 25/30
3158/3158 - 1s - loss: 1.2227 - acc: 0.4829 - val_loss: 1.2401 - val_acc: 0.4558
Epoch 26/30
3158/3158 - 1s - loss: 1.2069 - acc: 0.4848 - val_loss: 1.2380 - val_acc: 0.4644
Epoch 27/30
3158/3158 - 1s - loss: 1.1953 - acc: 0.4867 - val_loss: 1.2282 - val_acc: 0.4587
Epoch 28/30
3158/3158 - 2s - loss: 1.1751 - acc: 0.5000 - val_loss: 1.2420 - val_acc: 0.4672
Epoch 29/30
3158/3158 - 2s - loss: 1.1673 - acc: 0.4984 - val_loss: 1.2295 - val_acc: 0.4701
Epoch 30/30
3158/3158 - 2s - loss: 1.1555 - acc: 0.5085 - val_loss: 1.2324 - val_acc: 0.4615
logs/DL/glorot__wobn_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 33s - loss: 1.7867 - acc: 0.2964 - val_loss: 1.7803 - val_acc: 0.3333
Epoch 2/30
3158/3158 - 3s - loss: 1.7710 - acc: 0.3163 - val_loss: 1.7554 - val_acc

Epoch 19/30
3158/3158 - 3s - loss: 1.4756 - acc: 0.3635 - val_loss: 1.4461 - val_acc: 0.4530
Epoch 20/30
3158/3158 - 3s - loss: 1.4713 - acc: 0.3746 - val_loss: 1.4499 - val_acc: 0.4046
Epoch 21/30
3158/3158 - 3s - loss: 1.4396 - acc: 0.3813 - val_loss: 1.4070 - val_acc: 0.4644
Epoch 22/30
3158/3158 - 3s - loss: 1.4191 - acc: 0.3844 - val_loss: 1.3843 - val_acc: 0.4587
Epoch 23/30
3158/3158 - 3s - loss: 1.3903 - acc: 0.3917 - val_loss: 1.3576 - val_acc: 0.4444
Epoch 24/30
3158/3158 - 3s - loss: 1.3669 - acc: 0.3996 - val_loss: 1.3410 - val_acc: 0.4758
Epoch 25/30
3158/3158 - 3s - loss: 1.3549 - acc: 0.4066 - val_loss: 1.3180 - val_acc: 0.4815
Epoch 26/30
3158/3158 - 3s - loss: 1.3239 - acc: 0.4300 - val_loss: 1.2873 - val_acc: 0.4758
Epoch 27/30
3158/3158 - 3s - loss: 1.2946 - acc: 0.4354 - val_loss: 1.2778 - val_acc: 0.4957
Epoch 28/30
3158/3158 - 3s - loss: 1.2926 - acc: 0.4227 - val_loss: 1.2628 - val_acc: 0.4786
Epoch 29/30
3158/3158 - 3s - loss: 1.2736 - acc: 0.4215 - val_loss: 1.

In [113]:
process_results('glorot__wobn_lstm')

mean
 acc         0.366442
loss        1.474096
val_acc     0.384222
val_loss    1.473826
dtype: float64
std
 acc         0.073128
loss        0.202504
val_acc     0.066103
val_loss    0.201380
dtype: float64


media
 acc       0.415436
f1        0.257047
mse       2.295675
recall    0.311826
dtype: float64
std
 acc       0.041790
f1        0.073869
mse       1.107481
recall    0.086633
dtype: float64


In [114]:
gl_wobn_pred = compute_full_model(glorot_model_wo_bn, 'glorot__wobn_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(gl_wobn_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.7631403858948769,
 'recall': 0.3038876531246645,
 'f1': 0.23338986478869503,
 'acc': 0.4304723885562209}

## Pruebas con topologías

### 2 lstm

In [115]:
def double_lstm_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(10, kernel_initializer='glorot_normal', activation='relu', return_sequences=True, input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.LSTM(10, activation='relu', name='2lstm'),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='glorot_normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [116]:
double_hist, double_evas = kfold_train(double_lstm_model, 'double_lstm', batch_size=256, epochs=30, shuffle=False, verbose=2)

logs/DL/double_lstm/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 7s - loss: 1.7876 - acc: 0.3040 - val_loss: 1.7809 - val_acc: 0.3191
Epoch 2/30
3158/3158 - 1s - loss: 1.7745 - acc: 0.3271 - val_loss: 1.7623 - val_acc: 0.3191
Epoch 3/30
3158/3158 - 1s - loss: 1.7498 - acc: 0.3300 - val_loss: 1.7445 - val_acc: 0.3191
Epoch 4/30
3158/3158 - 1s - loss: 1.7329 - acc: 0.3344 - val_loss: 1.7252 - val_acc: 0.3191
Epoch 5/30
3158/3158 - 1s - loss: 1.7093 - acc: 0.3293 - val_loss: 1.7050 - val_acc: 0.3162
Epoch 6/30
3158/3158 - 1s - loss: 1.6798 - acc: 0.3271 - val_loss: 1.6656 - val_acc: 0.3191
Epoch 7/30
3158/3158 - 1s - loss: 1.6375 - acc: 0.3265 - val_loss: 1.6116 - val_acc: 0.3191
Epoch 8/30
3158/3158 - 1s - loss: 1.5734 - acc: 0.3265 - val_loss: 1.5553 - val_acc: 0.3191
Epoch 9/30
3158/3158 - 1s - loss: 1.5161 - acc: 0.3281 - val_loss: 1.4795 - val_acc: 0.3248
Epoch 10/30
3158/3158 - 1s - loss: 1.4607 - acc: 0.3414 - val_loss: 1.4381 - val_acc: 0.3704
Epoch

Epoch 27/30
3158/3158 - 6s - loss: 1.1249 - acc: 0.5196 - val_loss: 1.2555 - val_acc: 0.4416
Epoch 28/30
3158/3158 - 6s - loss: 1.1203 - acc: 0.5206 - val_loss: 1.2408 - val_acc: 0.4672
Epoch 29/30
3158/3158 - 6s - loss: 1.1102 - acc: 0.5250 - val_loss: 1.2681 - val_acc: 0.4444
Epoch 30/30
3158/3158 - 6s - loss: 1.1018 - acc: 0.5307 - val_loss: 1.2633 - val_acc: 0.4615
logs/DL/double_lstm/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 8s - loss: 1.7845 - acc: 0.2400 - val_loss: 1.7737 - val_acc: 0.2707
Epoch 2/30
3158/3158 - 1s - loss: 1.7636 - acc: 0.2432 - val_loss: 1.7442 - val_acc: 0.2707
Epoch 3/30
3158/3158 - 1s - loss: 1.7217 - acc: 0.2381 - val_loss: 1.6587 - val_acc: 0.2735
Epoch 4/30
3158/3158 - 1s - loss: 1.7027 - acc: 0.2438 - val_loss: 1.6803 - val_acc: 0.2707
Epoch 5/30
3158/3158 - 2s - loss: 1.6670 - acc: 0.2432 - val_loss: 1.6382 - val_acc: 0.2707
Epoch 6/30
3158/3158 - 3s - loss: 1.6261 - acc: 0.2432 - val_loss: 1.5974 - val_acc: 0.2707
Ep

Epoch 23/30
3158/3158 - 1s - loss: 1.2160 - acc: 0.4826 - val_loss: 1.2732 - val_acc: 0.4444
Epoch 24/30
3158/3158 - 1s - loss: 1.1908 - acc: 0.4981 - val_loss: 1.2675 - val_acc: 0.4587
Epoch 25/30
3158/3158 - 1s - loss: 1.1822 - acc: 0.4911 - val_loss: 1.2747 - val_acc: 0.4672
Epoch 26/30
3158/3158 - 1s - loss: 1.1707 - acc: 0.4943 - val_loss: 1.2621 - val_acc: 0.4729
Epoch 27/30
3158/3158 - 1s - loss: 1.1588 - acc: 0.5044 - val_loss: 1.2574 - val_acc: 0.4615
Epoch 28/30
3158/3158 - 2s - loss: 1.1385 - acc: 0.5101 - val_loss: 1.2577 - val_acc: 0.4501
Epoch 29/30
3158/3158 - 3s - loss: 1.1235 - acc: 0.5142 - val_loss: 1.2408 - val_acc: 0.4587
Epoch 30/30
3158/3158 - 3s - loss: 1.1151 - acc: 0.5196 - val_loss: 1.2422 - val_acc: 0.4587
logs/DL/double_lstm/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 44s - loss: 1.7821 - acc: 0.2844 - val_loss: 1.7692 - val_acc: 0.3162
Epoch 2/30
3158/3158 - 6s - loss: 1.7497 - acc: 0.3148 - val_loss: 1.7480 - val_acc: 0.33

Epoch 19/30
3158/3158 - 6s - loss: 1.3796 - acc: 0.4554 - val_loss: 1.4103 - val_acc: 0.4900
Epoch 20/30
3158/3158 - 6s - loss: 1.3482 - acc: 0.4576 - val_loss: 1.3812 - val_acc: 0.4815
Epoch 21/30
3158/3158 - 6s - loss: 1.3191 - acc: 0.4677 - val_loss: 1.3534 - val_acc: 0.5043
Epoch 22/30
3158/3158 - 6s - loss: 1.3720 - acc: 0.4639 - val_loss: 1.4199 - val_acc: 0.4530
Epoch 23/30
3158/3158 - 6s - loss: 1.3508 - acc: 0.4307 - val_loss: 1.3204 - val_acc: 0.4758
Epoch 24/30
3158/3158 - 6s - loss: 1.2855 - acc: 0.4712 - val_loss: 1.3289 - val_acc: 0.4501
Epoch 25/30
3158/3158 - 6s - loss: 1.2690 - acc: 0.4766 - val_loss: 1.2911 - val_acc: 0.5157
Epoch 26/30
3158/3158 - 6s - loss: 1.2583 - acc: 0.4737 - val_loss: 1.2868 - val_acc: 0.5100
Epoch 27/30
3158/3158 - 6s - loss: 1.2371 - acc: 0.4810 - val_loss: 1.2746 - val_acc: 0.5071
Epoch 28/30
3158/3158 - 6s - loss: 1.2250 - acc: 0.4750 - val_loss: 1.2646 - val_acc: 0.5100
Epoch 29/30
3158/3158 - 6s - loss: 1.2104 - acc: 0.4772 - val_loss: 1.

In [117]:
process_results('double_lstm')

mean
 acc         0.405491
loss        1.403486
val_acc     0.395798
val_loss    1.429697
dtype: float64
std
 acc         0.077361
loss        0.213470
val_acc     0.062908
val_loss    0.175999
dtype: float64


media
 acc       0.435595
f1        0.318126
mse       1.769993
recall    0.369147
dtype: float64
std
 acc       0.023688
f1        0.026138
mse       0.200008
recall    0.035174
dtype: float64


In [118]:
double_pred = compute_full_model(double_lstm_model, 'double_lstm', batch_size=256, epochs=30, shuffle=False, verbose=1)
compute_metrics(double_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.4238190286094479,
 'recall': 0.2778290304582367,
 'f1': 0.2904467194826683,
 'acc': 0.4184963406520293}

### Double lstm fine tune

### Convolucional

In [119]:
def convolutional_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Reshape((28, 300, 1), input_shape=(28, 300), name='lstm'),
        tf.keras.layers.Conv2D(128, (4, 300), padding='same', name='conv_layer'),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(16,16), strides=None),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [120]:
convolutional_model().summary()

Model: "sequential_89"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (Reshape)               (None, 28, 300, 1)        0         
_________________________________________________________________
conv_layer (Conv2D)          (None, 28, 300, 128)      153728    
_________________________________________________________________
activation (Activation)      (None, 28, 300, 128)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 1, 18, 128)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2304)              0         
_________________________________________________________________
dropout_65 (Dropout)         (None, 2304)              0         
_________________________________________________________________
dense (Dense)                (None, 6)               

In [121]:
conv_hist, conv_evas = kfold_train(convolutional_model, 'convolutional', batch_size=8, epochs=30, shuffle=False, verbose=2)

logs/DL/convolutional/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 41s - loss: 1.3720 - acc: 0.4414 - val_loss: 1.5713 - val_acc: 0.4330
Epoch 2/30
3158/3158 - 12s - loss: 1.1621 - acc: 0.5358 - val_loss: 1.6688 - val_acc: 0.4188
Epoch 3/30
3158/3158 - 12s - loss: 0.9205 - acc: 0.6485 - val_loss: 1.7934 - val_acc: 0.4046
Epoch 4/30
3158/3158 - 12s - loss: 0.6829 - acc: 0.7498 - val_loss: 1.8814 - val_acc: 0.4188
Epoch 5/30
3158/3158 - 12s - loss: 0.5159 - acc: 0.8287 - val_loss: 1.6179 - val_acc: 0.4672
Epoch 6/30
3158/3158 - 12s - loss: 0.4424 - acc: 0.8518 - val_loss: 1.4406 - val_acc: 0.5071
Epoch 7/30
3158/3158 - 12s - loss: 0.4425 - acc: 0.8353 - val_loss: 1.4753 - val_acc: 0.5214
Epoch 8/30
3158/3158 - 12s - loss: 0.3425 - acc: 0.8866 - val_loss: 1.3930 - val_acc: 0.5128
Epoch 9/30
3158/3158 - 12s - loss: 0.2863 - acc: 0.9079 - val_loss: 1.3975 - val_acc: 0.5157
Epoch 10/30
3158/3158 - 12s - loss: 0.2218 - acc: 0.9411 - val_loss: 1.4450 - val_acc: 

3158/3158 - 20s - loss: 0.1061 - acc: 0.9747 - val_loss: 2.3578 - val_acc: 0.4729
Epoch 27/30
3158/3158 - 20s - loss: 0.1036 - acc: 0.9769 - val_loss: 3.1444 - val_acc: 0.4131
Epoch 28/30
3158/3158 - 20s - loss: 0.1027 - acc: 0.9756 - val_loss: 2.3905 - val_acc: 0.4188
Epoch 29/30
3158/3158 - 20s - loss: 0.0827 - acc: 0.9810 - val_loss: 2.4018 - val_acc: 0.4615
Epoch 30/30
3158/3158 - 20s - loss: 0.0789 - acc: 0.9804 - val_loss: 3.2328 - val_acc: 0.3960
logs/DL/convolutional/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 58s - loss: 1.3769 - acc: 0.4402 - val_loss: 1.3187 - val_acc: 0.4217
Epoch 2/30
3158/3158 - 20s - loss: 1.1555 - acc: 0.5408 - val_loss: 1.3278 - val_acc: 0.4473
Epoch 3/30
3158/3158 - 20s - loss: 0.9205 - acc: 0.6517 - val_loss: 1.3878 - val_acc: 0.4330
Epoch 4/30
3158/3158 - 20s - loss: 0.6732 - acc: 0.7654 - val_loss: 1.4619 - val_acc: 0.4672
Epoch 5/30
3158/3158 - 20s - loss: 0.5274 - acc: 0.8186 - val_loss: 1.4520 - val_acc: 0.4872
E

Epoch 22/30
3158/3158 - 20s - loss: 0.0946 - acc: 0.9747 - val_loss: 2.4162 - val_acc: 0.4701
Epoch 23/30
3158/3158 - 21s - loss: 0.0979 - acc: 0.9766 - val_loss: 2.2406 - val_acc: 0.4872
Epoch 24/30
3158/3158 - 20s - loss: 0.0887 - acc: 0.9797 - val_loss: 2.4507 - val_acc: 0.4758
Epoch 25/30
3158/3158 - 20s - loss: 0.0786 - acc: 0.9810 - val_loss: 2.6758 - val_acc: 0.3875
Epoch 26/30
3158/3158 - 20s - loss: 0.0930 - acc: 0.9794 - val_loss: 2.7152 - val_acc: 0.4672
Epoch 27/30
3158/3158 - 20s - loss: 0.0955 - acc: 0.9750 - val_loss: 2.8324 - val_acc: 0.4416
Epoch 28/30
3158/3158 - 20s - loss: 0.0803 - acc: 0.9810 - val_loss: 2.8862 - val_acc: 0.4074
Epoch 29/30
3158/3158 - 21s - loss: 0.0697 - acc: 0.9835 - val_loss: 2.4711 - val_acc: 0.4587
Epoch 30/30
3158/3158 - 20s - loss: 0.0547 - acc: 0.9902 - val_loss: 2.3922 - val_acc: 0.4843
logs/DL/convolutional/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 58s - loss: 1.3655 - acc: 0.4436 - val_loss: 1.5893 - v

3158/3158 - 21s - loss: 0.1058 - acc: 0.9766 - val_loss: 2.3427 - val_acc: 0.4615
Epoch 18/30
3158/3158 - 20s - loss: 0.1135 - acc: 0.9740 - val_loss: 2.1923 - val_acc: 0.4644
Epoch 19/30
3158/3158 - 21s - loss: 0.1122 - acc: 0.9721 - val_loss: 2.0068 - val_acc: 0.4815
Epoch 20/30
3158/3158 - 20s - loss: 0.1138 - acc: 0.9699 - val_loss: 2.1338 - val_acc: 0.4530
Epoch 21/30
3158/3158 - 20s - loss: 0.1137 - acc: 0.9674 - val_loss: 2.1996 - val_acc: 0.4986
Epoch 22/30
3158/3158 - 20s - loss: 0.1244 - acc: 0.9636 - val_loss: 1.9326 - val_acc: 0.4815
Epoch 23/30
3158/3158 - 20s - loss: 0.1295 - acc: 0.9636 - val_loss: 2.2247 - val_acc: 0.5071
Epoch 24/30
3158/3158 - 20s - loss: 0.0812 - acc: 0.9820 - val_loss: 3.0076 - val_acc: 0.4644
Epoch 25/30
3158/3158 - 21s - loss: 0.0687 - acc: 0.9877 - val_loss: 2.4703 - val_acc: 0.4872
Epoch 26/30
3158/3158 - 20s - loss: 0.0897 - acc: 0.9759 - val_loss: 3.0974 - val_acc: 0.4587
Epoch 27/30
3158/3158 - 20s - loss: 0.0855 - acc: 0.9797 - val_loss: 2.2

In [122]:
process_results('convolutional')

mean
 acc         0.908749
loss        0.268690
val_acc     0.457701
val_loss    2.157744
dtype: float64
std
 acc         0.135416
loss        0.331012
val_acc     0.039876
val_loss    0.620059
dtype: float64


media
 acc       0.470326
f1        0.349074
mse       2.174717
recall    0.389098
dtype: float64
std
 acc       0.010885
f1        0.025079
mse       0.214826
recall    0.020551
dtype: float64


In [123]:
conv_pred = compute_full_model(convolutional_model, 'convolutional', batch_size=8, epochs=30, shuffle=False, verbose=1)
compute_metrics(conv_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 2.147704590818363,
 'recall': 0.3959661465630031,
 'f1': 0.36522993981400315,
 'acc': 0.4930139720558882}

### Convolutional 1D

In [124]:
def convolutional1d_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(4, 300, input_shape=(28, 300), padding='same', name='conv_layer'),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(pool_size=(4), strides=None),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(6, activation='softmax', kernel_initializer='normal', name='dense')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [125]:
convolutional1d_model().summary()

Model: "sequential_101"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_layer (Conv1D)          (None, 28, 4)             360004    
_________________________________________________________________
activation_12 (Activation)   (None, 28, 4)             0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 7, 4)              0         
_________________________________________________________________
flatten_12 (Flatten)         (None, 28)                0         
_________________________________________________________________
dropout_77 (Dropout)         (None, 28)                0         
_________________________________________________________________
dense (Dense)                (None, 6)                 174       
Total params: 360,178
Trainable params: 360,178
Non-trainable params: 0
______________________________________________

In [126]:
conv1d_hist, conv1d_evas = kfold_train(convolutional1d_model, 'convolutional1d', batch_size=8, epochs=30, shuffle=False, verbose=2)

logs/DL/convolutional1d/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 52s - loss: 1.3959 - acc: 0.4250 - val_loss: 1.2141 - val_acc: 0.4302
Epoch 2/30
3158/3158 - 14s - loss: 1.1571 - acc: 0.5139 - val_loss: 1.1916 - val_acc: 0.4558
Epoch 3/30
3158/3158 - 14s - loss: 1.0254 - acc: 0.5811 - val_loss: 1.1841 - val_acc: 0.4929
Epoch 4/30
3158/3158 - 14s - loss: 0.8963 - acc: 0.6628 - val_loss: 1.2197 - val_acc: 0.4957
Epoch 5/30
3158/3158 - 14s - loss: 0.7711 - acc: 0.7144 - val_loss: 1.2464 - val_acc: 0.5071
Epoch 6/30
3158/3158 - 14s - loss: 0.6510 - acc: 0.7616 - val_loss: 1.3752 - val_acc: 0.5014
Epoch 7/30
3158/3158 - 13s - loss: 0.5540 - acc: 0.8043 - val_loss: 1.3502 - val_acc: 0.4815
Epoch 8/30
3158/3158 - 14s - loss: 0.4516 - acc: 0.8528 - val_loss: 1.3922 - val_acc: 0.4701
Epoch 9/30
3158/3158 - 14s - loss: 0.3746 - acc: 0.8835 - val_loss: 1.4179 - val_acc: 0.4758
Epoch 10/30
3158/3158 - 14s - loss: 0.3169 - acc: 0.8984 - val_loss: 1.4844 - val_acc

Epoch 27/30
3158/3158 - 7s - loss: 0.0781 - acc: 0.9747 - val_loss: 2.4477 - val_acc: 0.4473
Epoch 28/30
3158/3158 - 7s - loss: 0.0849 - acc: 0.9715 - val_loss: 2.5095 - val_acc: 0.4558
Epoch 29/30
3158/3158 - 7s - loss: 0.0761 - acc: 0.9772 - val_loss: 2.6007 - val_acc: 0.4530
Epoch 30/30
3158/3158 - 7s - loss: 0.0871 - acc: 0.9731 - val_loss: 2.6005 - val_acc: 0.4245
logs/DL/convolutional1d/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 30s - loss: 1.3793 - acc: 0.4278 - val_loss: 1.2460 - val_acc: 0.4558
Epoch 2/30
3158/3158 - 13s - loss: 1.1374 - acc: 0.5329 - val_loss: 1.2000 - val_acc: 0.4672
Epoch 3/30
3158/3158 - 14s - loss: 0.9875 - acc: 0.6054 - val_loss: 1.2402 - val_acc: 0.4672
Epoch 4/30
3158/3158 - 14s - loss: 0.8504 - acc: 0.6681 - val_loss: 1.2161 - val_acc: 0.4900
Epoch 5/30
3158/3158 - 14s - loss: 0.7009 - acc: 0.7397 - val_loss: 1.3080 - val_acc: 0.4701
Epoch 6/30
3158/3158 - 14s - loss: 0.5648 - acc: 0.8053 - val_loss: 1.3577 - val_acc:

Epoch 23/30
3158/3158 - 7s - loss: 0.0886 - acc: 0.9702 - val_loss: 2.4017 - val_acc: 0.4672
Epoch 24/30
3158/3158 - 7s - loss: 0.0738 - acc: 0.9759 - val_loss: 2.3576 - val_acc: 0.4615
Epoch 25/30
3158/3158 - 7s - loss: 0.0792 - acc: 0.9756 - val_loss: 2.4429 - val_acc: 0.4530
Epoch 26/30
3158/3158 - 7s - loss: 0.0887 - acc: 0.9709 - val_loss: 2.5948 - val_acc: 0.4387
Epoch 27/30
3158/3158 - 7s - loss: 0.0768 - acc: 0.9747 - val_loss: 2.6410 - val_acc: 0.4473
Epoch 28/30
3158/3158 - 7s - loss: 0.0785 - acc: 0.9737 - val_loss: 2.7431 - val_acc: 0.4359
Epoch 29/30
3158/3158 - 7s - loss: 0.0721 - acc: 0.9772 - val_loss: 2.7598 - val_acc: 0.4274
Epoch 30/30
3158/3158 - 7s - loss: 0.0687 - acc: 0.9766 - val_loss: 2.7113 - val_acc: 0.4473
logs/DL/convolutional1d/kfold7
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 41s - loss: 1.3575 - acc: 0.4307 - val_loss: 1.2592 - val_acc: 0.4587
Epoch 2/30
3158/3158 - 14s - loss: 1.1351 - acc: 0.5222 - val_loss: 1.2320 - val_acc:

Epoch 19/30
3158/3158 - 7s - loss: 0.1088 - acc: 0.9712 - val_loss: 2.0067 - val_acc: 0.4644
Epoch 20/30
3158/3158 - 7s - loss: 0.1017 - acc: 0.9668 - val_loss: 1.9953 - val_acc: 0.4786
Epoch 21/30
3158/3158 - 7s - loss: 0.1089 - acc: 0.9680 - val_loss: 2.1443 - val_acc: 0.4701
Epoch 22/30
3158/3158 - 7s - loss: 0.1032 - acc: 0.9712 - val_loss: 2.1107 - val_acc: 0.4672
Epoch 23/30
3158/3158 - 7s - loss: 0.0958 - acc: 0.9718 - val_loss: 2.2104 - val_acc: 0.4900
Epoch 24/30
3158/3158 - 7s - loss: 0.0874 - acc: 0.9737 - val_loss: 2.3745 - val_acc: 0.4501
Epoch 25/30
3158/3158 - 7s - loss: 0.0879 - acc: 0.9734 - val_loss: 2.3326 - val_acc: 0.4758
Epoch 26/30
3158/3158 - 7s - loss: 0.0820 - acc: 0.9763 - val_loss: 2.3589 - val_acc: 0.4729
Epoch 27/30
3158/3158 - 7s - loss: 0.0848 - acc: 0.9718 - val_loss: 2.5252 - val_acc: 0.4615
Epoch 28/30
3158/3158 - 7s - loss: 0.0897 - acc: 0.9706 - val_loss: 2.4858 - val_acc: 0.4473
Epoch 29/30
3158/3158 - 7s - loss: 0.0867 - acc: 0.9696 - val_loss: 2.

In [127]:
process_results('convolutional1d')

mean
 acc         0.883700
loss        0.323287
val_acc     0.472284
val_loss    1.849411
dtype: float64
std
 acc         0.145751
loss        0.351638
val_acc     0.027298
val_loss    0.460599
dtype: float64


media
 acc       0.441517
f1        0.372846
mse       1.863407
recall    0.380542
dtype: float64
std
 acc       0.017145
f1        0.016585
mse       0.091755
recall    0.022502
dtype: float64


In [128]:
conv1d_pred = compute_full_model(convolutional1d_model, 'convolutional1d', batch_size=8, epochs=30, shuffle=False, verbose=1)
compute_metrics(conv1d_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.829673985362608,
 'recall': 0.36915124729659454,
 'f1': 0.3653904897894045,
 'acc': 0.4311377245508982}

### Bidirectional

In [129]:
def bidirectional_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(10), input_shape=(28, 300)),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(6, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [130]:
bidi_hist, bidi_evas = kfold_train(bidirectional_model, 'bidirectional', batch_size=8, epochs=30, shuffle=False, verbose=2)

W0702 14:46:31.149762 140009374406464 deprecation.py:506] From /home/suampa/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/init_ops.py:97: calling GlorotUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0702 14:46:31.157485 140009374406464 deprecation.py:506] From /home/suampa/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/init_ops.py:97: calling Orthogonal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0702 14:46:31.193346 140009374406464 deprecation.py:506] From /home/suampa/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/init_ops.py:97: calling Zeros.__init__ (from tensor

logs/DL/bidirectional/kfold1
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 108s - loss: 1.5369 - acc: 0.3683 - val_loss: 1.3833 - val_acc: 0.4046
Epoch 2/30
3158/3158 - 145s - loss: 1.3137 - acc: 0.4607 - val_loss: 1.2161 - val_acc: 0.4416
Epoch 3/30
3158/3158 - 106s - loss: 1.2352 - acc: 0.4864 - val_loss: 1.1809 - val_acc: 0.4587
Epoch 4/30
3158/3158 - 33s - loss: 1.1921 - acc: 0.4994 - val_loss: 1.1738 - val_acc: 0.4815
Epoch 5/30
3158/3158 - 145s - loss: 1.1548 - acc: 0.5196 - val_loss: 1.1720 - val_acc: 0.4872
Epoch 6/30
3158/3158 - 145s - loss: 1.1194 - acc: 0.5377 - val_loss: 1.1550 - val_acc: 0.5527
Epoch 7/30
3158/3158 - 79s - loss: 1.0882 - acc: 0.5535 - val_loss: 1.1844 - val_acc: 0.5385
Epoch 8/30
3158/3158 - 62s - loss: 1.0501 - acc: 0.5741 - val_loss: 1.1546 - val_acc: 0.5499
Epoch 9/30
3158/3158 - 145s - loss: 1.0299 - acc: 0.5817 - val_loss: 1.1781 - val_acc: 0.5385
Epoch 10/30
3158/3158 - 145s - loss: 1.0029 - acc: 0.5937 - val_loss: 1.1626 - va

Epoch 26/30
3158/3158 - 137s - loss: 0.5828 - acc: 0.7951 - val_loss: 1.3867 - val_acc: 0.5014
Epoch 27/30
3158/3158 - 145s - loss: 0.5720 - acc: 0.7891 - val_loss: 1.4074 - val_acc: 0.4758
Epoch 28/30
3158/3158 - 79s - loss: 0.5396 - acc: 0.8125 - val_loss: 1.4807 - val_acc: 0.4815
Epoch 29/30
3158/3158 - 61s - loss: 0.5199 - acc: 0.8135 - val_loss: 1.4720 - val_acc: 0.4815
Epoch 30/30
3158/3158 - 145s - loss: 0.5171 - acc: 0.8129 - val_loss: 1.4860 - val_acc: 0.4872
logs/DL/bidirectional/kfold4
Train on 3158 samples, validate on 351 samples
Epoch 1/30
3158/3158 - 82s - loss: 1.5059 - acc: 0.3692 - val_loss: 1.2642 - val_acc: 0.5043
Epoch 2/30
3158/3158 - 146s - loss: 1.2746 - acc: 0.4709 - val_loss: 1.2022 - val_acc: 0.5185
Epoch 3/30
3158/3158 - 103s - loss: 1.2283 - acc: 0.4782 - val_loss: 1.2251 - val_acc: 0.4872
Epoch 4/30
3158/3158 - 38s - loss: 1.1785 - acc: 0.4984 - val_loss: 1.2154 - val_acc: 0.5071
Epoch 5/30
3158/3158 - 146s - loss: 1.1530 - acc: 0.5161 - val_loss: 1.1858 -

Epoch 21/30
3158/3158 - 132s - loss: 0.7164 - acc: 0.7362 - val_loss: 1.2670 - val_acc: 0.5128
Epoch 22/30
3158/3158 - 146s - loss: 0.6931 - acc: 0.7435 - val_loss: 1.2638 - val_acc: 0.5128
Epoch 23/30
3158/3158 - 77s - loss: 0.6598 - acc: 0.7559 - val_loss: 1.2642 - val_acc: 0.5128
Epoch 24/30
3158/3158 - 65s - loss: 0.6409 - acc: 0.7745 - val_loss: 1.3247 - val_acc: 0.4986
Epoch 25/30
3158/3158 - 146s - loss: 0.6361 - acc: 0.7679 - val_loss: 1.3235 - val_acc: 0.5328
Epoch 26/30
3158/3158 - 146s - loss: 0.6208 - acc: 0.7711 - val_loss: 1.3786 - val_acc: 0.4929
Epoch 27/30
3158/3158 - 22s - loss: 0.5857 - acc: 0.7907 - val_loss: 1.3845 - val_acc: 0.5271
Epoch 28/30
3158/3158 - 124s - loss: 0.5677 - acc: 0.7989 - val_loss: 1.3742 - val_acc: 0.5214
Epoch 29/30
3158/3158 - 146s - loss: 0.5659 - acc: 0.7989 - val_loss: 1.4228 - val_acc: 0.5242
Epoch 30/30
3158/3158 - 104s - loss: 0.5333 - acc: 0.8100 - val_loss: 1.4136 - val_acc: 0.5128
logs/DL/bidirectional/kfold7
Train on 3158 samples, v

Epoch 16/30
3158/3158 - 144s - loss: 0.8428 - acc: 0.6821 - val_loss: 1.2687 - val_acc: 0.5100
Epoch 17/30
3158/3158 - 146s - loss: 0.8129 - acc: 0.6922 - val_loss: 1.2468 - val_acc: 0.5242
Epoch 18/30
3158/3158 - 60s - loss: 0.7864 - acc: 0.7065 - val_loss: 1.2993 - val_acc: 0.5128
Epoch 19/30
3158/3158 - 82s - loss: 0.7747 - acc: 0.7065 - val_loss: 1.2777 - val_acc: 0.5185
Epoch 20/30
3158/3158 - 145s - loss: 0.7592 - acc: 0.7131 - val_loss: 1.2821 - val_acc: 0.4986
Epoch 21/30
3158/3158 - 133s - loss: 0.7265 - acc: 0.7274 - val_loss: 1.3333 - val_acc: 0.4986
Epoch 22/30
3158/3158 - 23s - loss: 0.7274 - acc: 0.7239 - val_loss: 1.3048 - val_acc: 0.5157
Epoch 23/30
3158/3158 - 131s - loss: 0.6990 - acc: 0.7359 - val_loss: 1.3469 - val_acc: 0.5128
Epoch 24/30
3158/3158 - 146s - loss: 0.6640 - acc: 0.7492 - val_loss: 1.3873 - val_acc: 0.5128
Epoch 25/30
3158/3158 - 101s - loss: 0.6635 - acc: 0.7527 - val_loss: 1.3185 - val_acc: 0.4957
Epoch 26/30
3158/3158 - 41s - loss: 0.6488 - acc: 0.7

In [131]:
process_results('bidirectional')

mean
 acc         0.659691
loss        0.870366
val_acc     0.500352
val_loss    1.294376
dtype: float64
std
 acc         0.108298
loss        0.236284
val_acc     0.031157
val_loss    0.119713
dtype: float64


media
 acc       0.477246
f1        0.392883
mse       1.751963
recall    0.394239
dtype: float64
std
 acc       0.011107
f1        0.007486
mse       0.082895
recall    0.011775
dtype: float64


In [132]:
bidi_pred = compute_full_model(convolutional1d_model, 'bidirectional', batch_size=8, epochs=30, shuffle=False, verbose=1)
compute_metrics(bidi_pred, y_test)

Train on 3509 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


{'mse': 1.8236859614105123,
 'recall': 0.38216293694789244,
 'f1': 0.3789552533428749,
 'acc': 0.4550898203592814}