# Evaluación de una red neuronal seguida de SVC y modelos de HMM

In [2]:
#Lectura de la base de datos
import pandas as pd

#Preprocesamiento de los datos
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler, SMOTE

#Evaluación de la actuación del modelo
from sklearn import metrics

#Modelos HMM
import seqlearn.hmm
from seqlearn.hmm import MultinomialHMM
import seqlearn.perceptron
from seqlearn.perceptron import StructuredPerceptron
from pomegranate import *

#Carga y almacenamiento de modelos
import json
import pickle

#Preprocesamiento de los datos
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler, SMOTE
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from numpy import float64

#Redes neuronales
from keras import backend as K
from sklearn import svm
import keras.losses
import keras.metrics
import tensorflow as tf

In [3]:
#Métricas
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))
keras.metrics.f1_m = f1_m
keras.metrics.precision_m = precision_m
keras.metrics.recall_m = recall_m

In [5]:
#Lectura del fichero de datos CSV. Separación por ; e ignorando el espacio inicial
data = pd.read_csv('database_speech.csv', sep = ';', skipinitialspace = True)

#Parámetros de interés
embedding_dim = 300 #Tamaño del vocabulario de incrustación
vocab_size = 1860 #Tamaño del vocabulario de la red neuronal
maxlen = 200 #Máxima longitud del padding

#Preprocesado de datos
X_data, y_data = data['Transcription'],data['Phase']
#Tokenización
tokenizer = Tokenizer(num_words=500)
tokenizer.fit_on_texts(X_data)
X = tokenizer.texts_to_sequences(X_data)
#Padding
X_pad = pad_sequences(X, padding='post', maxlen=maxlen)
#Balanceo
#ROS
ros = RandomOverSampler()
#SMOTE
smote = SMOTE()
X_SM, y_SM = smote.fit_resample(X_pad,y_data)
X_RO, y_RO = ros.fit_resample(X_pad,y_data)
#División en conjunto de entrenamiento y conjunto de evaluación
X_trainSM, X_testSM, y_trainSM, y_testSM = train_test_split(X_SM, y_SM)
X_trainRO, X_testRO, y_trainRO, y_testRO = train_test_split(X_RO, y_RO)

#Carga del modelo neuronal extractor de características
import pickle
filename = 'modelFE.model'
sModel = pickle.load(open(filename, 'rb'))

#Extracción de las características extraidas por las capas (Descomentar el método de balanceo que se desee utilizar)
inp = sModel.input      
outputs = [layer.output for layer in sModel.layers]          
functors = [K.function([inp, K.learning_phase()], [out]) for out in outputs]    
#Características del entrenamiento
layer_outs = [func([X_trainSM, 1.]) for func in functors]
#layer_outs = [func([X_trainRO, 1.]) for func in functors]
#Características de la evaluación
layer_outs_test = [func([X_testSM, 1.]) for func in functors]
#layer_outs_test = [func([X_testRO, 1.]) for func in functors]

#Generación del mejor estimador y ajuste a sus características
clf = svm.SVC(C = 1, decision_function_shape = 'ovo', degree = 1, gamma = 'scale', kernel = 'rbf', probability = True, shrinking = True, tol = 0.01)
clf.fit(layer_outs[2][0], y_trainSM)
#clf.fit(layer_outs[2][0], y_trainRO)
predictions=clf.predict_proba(layer_outs[2][0])
pred=clf.predict_proba(layer_outs_test[2][0])
#Ajuste de los valores obtenidos
pred = float64(pred)

#Modelos HMM
#Inicialización
modelHMM1 = MultinomialHMM(alpha = 15,decode = 'bestfirst')
modelHMM2 = StructuredPerceptron(lr_exponent = 0.05,decode = 'viterbi', max_iter = 1000) #Modificar labels si cambia el balanceo
modelHMM3 = HiddenMarkovModel.from_samples(DirichletDistribution, n_components = 8, X = [predictions], labels = [y_trainSM], algorithm = 'labeled',name = 'HMM', n_jobs = -1)
#Ajuste a los datos de entrenamiento
modelHMM1.fit(predictions,y_trainSM,[len(y_trainSM)])
modelHMM2.fit(predictions,y_trainSM,[len(y_trainSM)])
#modelHMM1.fit(predictions,y_trainRO,[len(y_trainRO)])
#modelHMM2.fit(predictions,y_trainRO,[len(y_trainRO)])
#Predicción
y_pred1 = modelHMM1.predict(pred,[len(y_testSM)])
y_pred2 = modelHMM2.predict(pred,[len(y_testSM)])
y_pred3 = modelHMM3.predict(pred, algorithm = 'labeled')
#y_pred1 = modelHMM1.predict(pred,[len(y_testRO)])
#y_pred2 = modelHMM2.predict(pred,[len(y_testRO)])
#Elimina el estado de inicio generado por el modeloHMM3 por defecto
y_pred3.pop(0)
#Evaluación
print(metrics.f1_score(y_testSM,y_pred1,average = 'weighted'))
print(metrics.f1_score(y_testSM,y_pred2,average = 'weighted'))
print(metrics.f1_score(y_testSM,y_pred3,average = 'weighted'))
#print(metrics.f1_score(y_testRO,y_pred1,average = 'weighted'))
#print(metrics.f1_score(y_testRO,y_pred2,average = 'weighted'))
#print(metrics.f1_score(y_testRO,y_pred3,average = 'weighted'))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



KeyError: 5

Exception ignored in: 'pomegranate.hmm.HiddenMarkovModel._labeled_summarize'
KeyError: 5


KeyError: 5

Exception ignored in: 'pomegranate.hmm.HiddenMarkovModel._labeled_summarize'
KeyError: 5


0.36714957313452934
0.505706555397713
0.05769344208046942
